[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Git][snapshot-team/snapshot][master] 4 commits: cache 302 redirects to farm much longer



Title: GitLab

Baptiste Beauplat pushed to branch master at snapshot / snapshot

Commits:

  • 49ec1bea
    by Felix Moessbauer at 2024-11-11T08:13:47+01:00
    cache 302 redirects to farm much longer
    
    Currently the redirects from the archive to the farm are cached for just
    600 seconds, putting a lot of load on the flask app. The artifacts the
    redirects point to are cached much longer, though. By that, we still see
    a lot of load on the flask app, which finally needs to be rate-limited.
    
    By introducing a new parameter CACHE_TIMEOUT_ARCHIVE_REDIRECT to control
    the max-age of the redirects, we can make these requests also cacheable.
    Currently we use a max-age of 1 day, but this can be fine-tuned later
    on.
    
    Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
    
  • 1aaf5d08
    by Felix Moessbauer at 2024-11-13T21:13:02+01:00
    web: preserve file names where possible
    
    Depending on the configuration, the web frontend directly returns
    artifacts or redirects to the farm and returns artifacts by name. In
    both cases, the artifact name (from the DB) is not preserved, leading to
    hashed download filenames. While this is not a problem for apt itself,
    it is problematic when browsing the archive with a browser or wget.
    
    Now, you can set the option REVERSE_NAME_LOOKUP which performs a DB
    lookup of the hash to get the original file name.
    
    Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
    
  • c02ad5e5
    by Baptiste Beauplat at 2024-11-17T14:03:25+01:00
    Merge branch 'snapshot-fm/cache-redirect'
    
    Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
    
  • 1e3fdb21
    by Baptiste Beauplat at 2024-11-17T14:05:39+01:00
    Merge branch 'snapshot-fm/filenames'
    
    Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
    

6 changed files:

Changes:

  • web/app/snapshot/controllers/archive.py
    ... ... @@ -197,7 +197,8 @@ class ArchiveFile():
    197 197
                     visiblepath is not None:
    
    198 198
                 raise ArchiveRedirect(f'/file/{digest}')
    
    199 199
     
    
    200
    -        self.realpath = get_snapshot_model().get_filepath(digest)
    
    200
    +        model = get_snapshot_model()
    
    201
    +        self.realpath = model.get_filepath(digest)
    
    201 202
             if not exists(self.realpath):
    
    202 203
                 raise ArchiveError('Ooops, we do not have a file with '
    
    203 204
                                    f'digest {digest} even tho we should. '
    
    ... ... @@ -207,9 +208,20 @@ class ArchiveFile():
    207 208
                     f'Ooops, cannot read file with digest {digest}. Maybe this '
    
    208 209
                     'file is not redistributable and this was done on purpose. '
    
    209 210
                     'If in doubt report this.')
    
    211
    +        self.orig_name = None
    
    212
    +        if current_app.config['REVERSE_NAME_LOOKUP']:
    
    213
    +            try:
    
    214
    +                file_infos = model.packages_get_file_info([digest])[digest]
    
    215
    +                if file_infos:
    
    216
    +                    self.orig_name = file_infos[0]['name']
    
    217
    +            except KeyError:
    
    218
    +                pass
    
    210 219
     
    
    211 220
         def get_dir(self):
    
    212 221
             return dirname(self.realpath)
    
    213 222
     
    
    214 223
         def get_filename(self):
    
    215 224
             return basename(self.realpath)
    
    225
    +
    
    226
    +    def get_orig_name(self):
    
    227
    +        return self.orig_name or self.get_filename()

  • web/app/snapshot/lib/helpers.py
    ... ... @@ -23,6 +23,7 @@
    23 23
     from re import sub
    
    24 24
     
    
    25 25
     from markupsafe import Markup
    
    26
    +from flask import __version__ as flask_version
    
    26 27
     
    
    27 28
     
    
    28 29
     def debian_bugs_markup(text):
    
    ... ... @@ -30,3 +31,15 @@ def debian_bugs_markup(text):
    30 31
         html = sub(r'#([0-9]+)', r'<a href="">"https://bugs.debian.org/\1">#\1</a>',
    
    31 32
                    safe)
    
    32 33
         return Markup(html)
    
    34
    +
    
    35
    +
    
    36
    +def set_download_name(name, params=None):
    
    37
    +    if not params:
    
    38
    +        params = {}
    
    39
    +    if not name:
    
    40
    +        return params
    
    41
    +    if flask_version < '2.2.0':
    
    42
    +        params['attachment_filename'] = name
    
    43
    +    else:
    
    44
    +        params['download_name'] = name
    
    45
    +    return params

  • web/app/snapshot/settings/common.py
    ... ... @@ -33,6 +33,8 @@ POOL_CONN_MAX = 10
    33 33
     
    
    34 34
     # Redirect or serve files
    
    35 35
     REDIRECT_TO_FARM = False
    
    36
    +# Lookup file names when serving by hash
    
    37
    +REVERSE_NAME_LOOKUP = True
    
    36 38
     
    
    37 39
     # Cache timeout
    
    38 40
     
    
    ... ... @@ -52,6 +54,7 @@ CACHE_TIMEOUT_PACKAGE_VERSION = CACHE_TIMEOUT_DEFAULT
    52 54
     
    
    53 55
     CACHE_TIMEOUT_ARCHIVE_INDEX = CACHE_TIMEOUT_DEFAULT
    
    54 56
     CACHE_TIMEOUT_ARCHIVE_DIR = CACHE_TIMEOUT_DEFAULT
    
    57
    +CACHE_TIMEOUT_ARCHIVE_REDIRECT = CACHE_TIMEOUT_DEFAULT
    
    55 58
     
    
    56 59
     CACHE_TIMEOUT_ROOT = CACHE_TIMEOUT_DEFAULT
    
    57 60
     CACHE_TIMEOUT_ROOT_NEWS = CACHE_TIMEOUT_DEFAULT
    

  • web/app/snapshot/settings/prod.py
    ... ... @@ -36,3 +36,7 @@ MAIL_ADMINS = [
    36 36
     ]
    
    37 37
     
    
    38 38
     REDIRECT_TO_FARM = True
    
    39
    +REVERSE_NAME_LOOKUP = False
    
    40
    +
    
    41
    +# 1 day
    
    42
    +CACHE_TIMEOUT_ARCHIVE_REDIRECT = 86400

  • web/app/snapshot/views/archive.py
    ... ... @@ -30,6 +30,7 @@ from snapshot.models.snapshot import get_snapshot_model
    30 30
     from snapshot.controllers.archive import ArchiveController, ArchiveError, \
    
    31 31
         ArchiveDir, ArchiveRedirect
    
    32 32
     from snapshot.lib.control_helpers import build_url_archive, get_domain
    
    33
    +from snapshot.lib.helpers import set_download_name
    
    33 34
     
    
    34 35
     log = getLogger(__name__)
    
    35 36
     router = Blueprint("archive", __name__, url_prefix="/archive")
    
    ... ... @@ -93,6 +94,9 @@ def archive_dir(archive, date, path='/'):
    93 94
         except ArchiveError as e:
    
    94 95
             abort(404, str(e))
    
    95 96
         except ArchiveRedirect as e:
    
    97
    +        archive_dir.cache_timeout = current_app.config[
    
    98
    +            'CACHE_TIMEOUT_ARCHIVE_REDIRECT'
    
    99
    +        ]
    
    96 100
             return redirect(str(e))
    
    97 101
     
    
    98 102
         if isinstance(node, ArchiveDir):
    
    ... ... @@ -109,7 +113,9 @@ def archive_dir(archive, date, path='/'):
    109 113
                                    **node.info)
    
    110 114
     
    
    111 115
         # node is an ArchiveFile
    
    112
    -    send_file = send_from_directory(node.get_dir(), node.get_filename())
    
    116
    +    send_args = set_download_name(node.get_orig_name())
    
    117
    +    send_file = send_from_directory(node.get_dir(), node.get_filename(),
    
    118
    +                                    **send_args)
    
    113 119
     
    
    114 120
         # Remove content type to mimic current snapshot behavior
    
    115 121
         response = make_response(send_file)
    

  • web/app/snapshot/views/file.py
    ... ... @@ -25,6 +25,7 @@ from flask import Blueprint, abort, send_from_directory, make_response
    25 25
     
    
    26 26
     from snapshot.controllers.archive import ArchiveController, ArchiveError, \
    
    27 27
         ArchiveDeniedError
    
    28
    +from snapshot.lib.helpers import set_download_name
    
    28 29
     
    
    29 30
     log = getLogger(__name__)
    
    30 31
     router = Blueprint("file", __name__, url_prefix="/file")
    
    ... ... @@ -39,7 +40,9 @@ def file_index(digest):
    39 40
         except ArchiveError as e:
    
    40 41
             abort(404, str(e))
    
    41 42
     
    
    42
    -    send_file = send_from_directory(node.get_dir(), node.get_filename())
    
    43
    +    send_args = set_download_name(node.orig_name)
    
    44
    +    send_file = send_from_directory(node.get_dir(), node.get_filename(),
    
    45
    +                                    **send_args)
    
    43 46
     
    
    44 47
         # Remove content type to mimic current snapshot behavior
    
    45 48
         response = make_response(send_file)
    


  • Reply to: