[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Git][snapshot-team/snapshot][master] 4 commits: web: always lookup filename when serving artifact



Title: GitLab

Baptiste Beauplat pushed to branch master at snapshot / snapshot

Commits:

  • e64ffe15
    by Felix Moessbauer at 2025-02-02T14:17:11+01:00
    web: always lookup filename when serving artifact
    
    In 1aaf5d08f5 we added support to lookup the filename of an artifact
    that is accessed by hash. This was hidden behind the feature flag
    REVERSE_NAME_LOOKUP. To simplify things, we make this lookup
    unconditional. In case REDIRECT_TO_FARM is enabled and the farm is
    directly served by Apache, no additional cost is generated. Otherwise,
    we have a single (cheap) lookup but can avoid handling untrusted user
    input.
    
    Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
    
  • 41945e0b
    by Felix Moessbauer at 2025-02-02T14:17:11+01:00
    web: append filename of artifact to file endpoint
    
    In 1aaf5d08f support was added to lookup the filename when accessing the
    /file endpoint and set the Content-Dispatch response header accordingly.
    However, that is hidden behind a feature flag as it requires a
    potentially costly db lookup. Further, it does not help in prod setups
    where the files are served directly from the farm by the webserver.
    
    To solve this, we now append the url-encoded filename to the path (after
    the digest), so the webserver can decode this and set the
    Content-Dispatch header accordingly.
    
    Proposed-by: Philipp Kern <pkern@debian.org>
    Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
    
  • feac6eed
    by Felix Moessbauer at 2025-02-02T14:17:11+01:00
    web: add test for filename variant of file endpoint
    
    The new endpoint /file/<digest>/<filename> needs to be tested as well,
    in addition to the old /file/<digest> endpoint. We add a test to check
    if the file is delivered under the requested name.
    
    Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
    
  • 5343fe8a
    by Baptiste Beauplat at 2025-02-02T15:04:00+01:00
    Merge branch 'snapshot-fm/filenames'
    
    Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
    

8 changed files:

Changes:

  • API
    ... ... @@ -170,6 +170,10 @@ URL: /file/<hash>
    170 170
     http status codes: 200 500 403 404 451 304
    
    171 171
     [return the file]
    
    172 172
     
    
    173
    +URL: /file/<hash>/<filename>
    
    174
    +http status codes: 200 500 403 400 404 451 304
    
    175
    +[return the file as <filename>]
    
    176
    +
    
    173 177
     URL: /mr/file/<hash>/info
    
    174 178
     http status codes: 200 500 404 304
    
    175 179
     {   '_comment': "yadayadayda",
    

  • etc/apache.conf
    ... ... @@ -41,10 +41,12 @@ WSGIDaemonProcess snapshot.debian.org user=nobody group=nogroup home=/ processes
    41 41
         </Files>
    
    42 42
       </Directory>
    
    43 43
     
    
    44
    -  AliasMatch "^/file/([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{36})$" "/srv/snapshot.debian.org/farm/$1/$2/$1$2$3"
    
    44
    +  AliasMatch "^/file/([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{36})/?([^/]+)?$" "/srv/snapshot.debian.org/farm/$1/$2/$1$2$3"
    
    45 45
       <Directory /srv/snapshot.debian.org/farm>
    
    46 46
         Require all granted
    
    47
    +    SetEnvIf Request_URI "^/file/[0-9a-f]{2}[0-9a-f]{2}[0-9a-f]{36}/([^/]+)$" FILENAME=$1
    
    47 48
         Header set Cache-Control "max-age=31536000, public"
    
    49
    +    Header set Content-Disposition "attachment; filename=%{FILENAME}e" env=FILENAME
    
    48 50
       </Directory>
    
    49 51
     
    
    50 52
       WSGIScriptAlias / /srv/snapshot.debian.org/bin/snapshot.wsgi
    

  • web/app/snapshot/controllers/archive.py
    ... ... @@ -195,7 +195,8 @@ class ArchiveFile():
    195 195
             # (not all apt clients do http redirects)
    
    196 196
             if current_app.config['REDIRECT_TO_FARM'] and \
    
    197 197
                     visiblepath is not None:
    
    198
    -            raise ArchiveRedirect(f'/file/{digest}')
    
    198
    +            filename = basename(visiblepath)
    
    199
    +            raise ArchiveRedirect(f'/file/{digest}/{quote(filename)}')
    
    199 200
     
    
    200 201
             model = get_snapshot_model()
    
    201 202
             self.realpath = model.get_filepath(digest)
    
    ... ... @@ -209,13 +210,12 @@ class ArchiveFile():
    209 210
                     'file is not redistributable and this was done on purpose. '
    
    210 211
                     'If in doubt report this.')
    
    211 212
             self.orig_name = None
    
    212
    -        if current_app.config['REVERSE_NAME_LOOKUP']:
    
    213
    -            try:
    
    214
    -                file_infos = model.packages_get_file_info([digest])[digest]
    
    215
    -                if file_infos:
    
    216
    -                    self.orig_name = file_infos[0]['name']
    
    217
    -            except KeyError:
    
    218
    -                pass
    
    213
    +        try:
    
    214
    +            file_infos = model.packages_get_file_info([digest])[digest]
    
    215
    +            if file_infos:
    
    216
    +                self.orig_name = file_infos[0]['name']
    
    217
    +        except KeyError:
    
    218
    +            pass
    
    219 219
     
    
    220 220
         def get_dir(self):
    
    221 221
             return dirname(self.realpath)
    

  • web/app/snapshot/lib/helpers.py
    ... ... @@ -38,6 +38,7 @@ def set_download_name(name, params=None):
    38 38
             params = {}
    
    39 39
         if not name:
    
    40 40
             return params
    
    41
    +    params['as_attachment'] = True
    
    41 42
         if flask_version < '2.2.0':
    
    42 43
             params['attachment_filename'] = name
    
    43 44
         else:
    

  • web/app/snapshot/settings/common.py
    ... ... @@ -33,8 +33,6 @@ POOL_CONN_MAX = 10
    33 33
     
    
    34 34
     # Redirect or serve files
    
    35 35
     REDIRECT_TO_FARM = False
    
    36
    -# Lookup file names when serving by hash
    
    37
    -REVERSE_NAME_LOOKUP = True
    
    38 36
     
    
    39 37
     # Cache timeout
    
    40 38
     
    

  • web/app/snapshot/settings/prod.py
    ... ... @@ -36,7 +36,6 @@ MAIL_ADMINS = [
    36 36
     ]
    
    37 37
     
    
    38 38
     REDIRECT_TO_FARM = True
    
    39
    -REVERSE_NAME_LOOKUP = False
    
    40 39
     
    
    41 40
     # 1 day
    
    42 41
     CACHE_TIMEOUT_ARCHIVE_REDIRECT = 86400

  • web/app/snapshot/views/file.py
    ... ... @@ -32,7 +32,8 @@ router = Blueprint("file", __name__, url_prefix="/file")
    32 32
     
    
    33 33
     
    
    34 34
     @router.route("/<digest>")
    
    35
    -def file_index(digest):
    
    35
    +@router.route("/<digest>/<filename>")
    
    36
    +def file_index(digest, filename=""):
    
    36 37
         try:
    
    37 38
             node = ArchiveController.get_node_by_hash(digest)
    
    38 39
         except ArchiveDeniedError as e:
    
    ... ... @@ -40,7 +41,7 @@ def file_index(digest):
    40 41
         except ArchiveError as e:
    
    41 42
             abort(404, str(e))
    
    42 43
     
    
    43
    -    send_args = set_download_name(node.orig_name)
    
    44
    +    send_args = set_download_name(node.get_orig_name())
    
    44 45
         send_file = send_from_directory(node.get_dir(), node.get_filename(),
    
    45 46
                                         **send_args)
    
    46 47
     
    

  • web/app/tests/unit/views/test_file.py
    ... ... @@ -40,15 +40,23 @@ def test_file_bad(client, path, error):
    40 40
             unescape(response.data.decode())
    
    41 41
     
    
    42 42
     
    
    43
    -@mark.parametrize('content,access,code,message', (
    
    44
    -    ('file content', True, 200, 'file content'),
    
    45
    -    ('another content', False, 403, 'Ooops, cannot read file with digest')
    
    43
    +@mark.parametrize('content,access,code,message,filename', (
    
    44
    +    ('file content', True, 200, 'file content', ''),
    
    45
    +    ('file content', True, 200, 'file content', 'file.txt'),
    
    46
    +    ('another content', False, 403, 'Ooops, cannot read file with digest', '')
    
    46 47
     ))
    
    47
    -def test_file(client, file_ctrl, content, access, code, message):
    
    48
    +def test_file(client, file_ctrl, content, access, code, message, filename):
    
    48 49
         digest = file_ctrl.create_file(content, access)
    
    49
    -    response = client.get(f'/file/{digest}')
    
    50
    +    file_path = f'/{filename}' if filename else ''
    
    51
    +    response = client.get(f'/file/{digest}{file_path}')
    
    50 52
     
    
    51 53
         assert response.status_code == code
    
    52 54
         assert message in response.data.decode()
    
    55
    +    if filename:
    
    56
    +        # the file controller does not register the file in the DB,
    
    57
    +        # hence the file is just served as-is
    
    58
    +        filename = digest
    
    59
    +        assert f'filename={filename}' in \
    
    60
    +            response.headers['Content-Disposition']
    
    53 61
     
    
    54 62
         file_ctrl.cleanup_file(digest)


  • Reply to: