Baptiste Beauplat pushed to branch master at snapshot / snapshot
Commits:
-
e64ffe15
by Felix Moessbauer at 2025-02-02T14:17:11+01:00
web: always lookup filename when serving artifact
In 1aaf5d08f5 we added support to lookup the filename of an artifact
that is accessed by hash. This was hidden behind the feature flag
REVERSE_NAME_LOOKUP. To simplify things, we make this lookup
unconditional. In case REDIRECT_TO_FARM is enabled and the farm is
directly served by Apache, no additional cost is generated. Otherwise,
we have a single (cheap) lookup but can avoid handling untrusted user
input.
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
-
41945e0b
by Felix Moessbauer at 2025-02-02T14:17:11+01:00
web: append filename of artifact to file endpoint
In 1aaf5d08f support was added to lookup the filename when accessing the
/file endpoint and set the Content-Dispatch response header accordingly.
However, that is hidden behind a feature flag as it requires a
potentially costly db lookup. Further, it does not help in prod setups
where the files are served directly from the farm by the webserver.
To solve this, we now append the url-encoded filename to the path (after
the digest), so the webserver can decode this and set the
Content-Dispatch header accordingly.
Proposed-by: Philipp Kern <pkern@debian.org>
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
-
feac6eed
by Felix Moessbauer at 2025-02-02T14:17:11+01:00
web: add test for filename variant of file endpoint
The new endpoint /file/<digest>/<filename> needs to be tested as well,
in addition to the old /file/<digest> endpoint. We add a test to check
if the file is delivered under the requested name.
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
-
5343fe8a
by Baptiste Beauplat at 2025-02-02T15:04:00+01:00
Merge branch 'snapshot-fm/filenames'
Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
8 changed files:
Changes:
API
... |
... |
@@ -170,6 +170,10 @@ URL: /file/<hash> |
170
|
170
|
http status codes: 200 500 403 404 451 304
|
171
|
171
|
[return the file]
|
172
|
172
|
|
|
173
|
+URL: /file/<hash>/<filename>
|
|
174
|
+http status codes: 200 500 403 400 404 451 304
|
|
175
|
+[return the file as <filename>]
|
|
176
|
+
|
173
|
177
|
URL: /mr/file/<hash>/info
|
174
|
178
|
http status codes: 200 500 404 304
|
175
|
179
|
{ '_comment': "yadayadayda",
|
etc/apache.conf
... |
... |
@@ -41,10 +41,12 @@ WSGIDaemonProcess snapshot.debian.org user=nobody group=nogroup home=/ processes |
41
|
41
|
</Files>
|
42
|
42
|
</Directory>
|
43
|
43
|
|
44
|
|
- AliasMatch "^/file/([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{36})$" "/srv/snapshot.debian.org/farm/$1/$2/$1$2$3"
|
|
44
|
+ AliasMatch "^/file/([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{36})/?([^/]+)?$" "/srv/snapshot.debian.org/farm/$1/$2/$1$2$3"
|
45
|
45
|
<Directory /srv/snapshot.debian.org/farm>
|
46
|
46
|
Require all granted
|
|
47
|
+ SetEnvIf Request_URI "^/file/[0-9a-f]{2}[0-9a-f]{2}[0-9a-f]{36}/([^/]+)$" FILENAME=$1
|
47
|
48
|
Header set Cache-Control "max-age=31536000, public"
|
|
49
|
+ Header set Content-Disposition "attachment; filename=%{FILENAME}e" env=FILENAME
|
48
|
50
|
</Directory>
|
49
|
51
|
|
50
|
52
|
WSGIScriptAlias / /srv/snapshot.debian.org/bin/snapshot.wsgi
|
web/app/snapshot/controllers/archive.py
... |
... |
@@ -195,7 +195,8 @@ class ArchiveFile(): |
195
|
195
|
# (not all apt clients do http redirects)
|
196
|
196
|
if current_app.config['REDIRECT_TO_FARM'] and \
|
197
|
197
|
visiblepath is not None:
|
198
|
|
- raise ArchiveRedirect(f'/file/{digest}')
|
|
198
|
+ filename = basename(visiblepath)
|
|
199
|
+ raise ArchiveRedirect(f'/file/{digest}/{quote(filename)}')
|
199
|
200
|
|
200
|
201
|
model = get_snapshot_model()
|
201
|
202
|
self.realpath = model.get_filepath(digest)
|
... |
... |
@@ -209,13 +210,12 @@ class ArchiveFile(): |
209
|
210
|
'file is not redistributable and this was done on purpose. '
|
210
|
211
|
'If in doubt report this.')
|
211
|
212
|
self.orig_name = None
|
212
|
|
- if current_app.config['REVERSE_NAME_LOOKUP']:
|
213
|
|
- try:
|
214
|
|
- file_infos = model.packages_get_file_info([digest])[digest]
|
215
|
|
- if file_infos:
|
216
|
|
- self.orig_name = file_infos[0]['name']
|
217
|
|
- except KeyError:
|
218
|
|
- pass
|
|
213
|
+ try:
|
|
214
|
+ file_infos = model.packages_get_file_info([digest])[digest]
|
|
215
|
+ if file_infos:
|
|
216
|
+ self.orig_name = file_infos[0]['name']
|
|
217
|
+ except KeyError:
|
|
218
|
+ pass
|
219
|
219
|
|
220
|
220
|
def get_dir(self):
|
221
|
221
|
return dirname(self.realpath)
|
web/app/snapshot/lib/helpers.py
... |
... |
@@ -38,6 +38,7 @@ def set_download_name(name, params=None): |
38
|
38
|
params = {}
|
39
|
39
|
if not name:
|
40
|
40
|
return params
|
|
41
|
+ params['as_attachment'] = True
|
41
|
42
|
if flask_version < '2.2.0':
|
42
|
43
|
params['attachment_filename'] = name
|
43
|
44
|
else:
|
web/app/snapshot/settings/common.py
... |
... |
@@ -33,8 +33,6 @@ POOL_CONN_MAX = 10 |
33
|
33
|
|
34
|
34
|
# Redirect or serve files
|
35
|
35
|
REDIRECT_TO_FARM = False
|
36
|
|
-# Lookup file names when serving by hash
|
37
|
|
-REVERSE_NAME_LOOKUP = True
|
38
|
36
|
|
39
|
37
|
# Cache timeout
|
40
|
38
|
|
web/app/snapshot/settings/prod.py
... |
... |
@@ -36,7 +36,6 @@ MAIL_ADMINS = [ |
36
|
36
|
]
|
37
|
37
|
|
38
|
38
|
REDIRECT_TO_FARM = True
|
39
|
|
-REVERSE_NAME_LOOKUP = False
|
40
|
39
|
|
41
|
40
|
# 1 day
|
42
|
41
|
CACHE_TIMEOUT_ARCHIVE_REDIRECT = 86400 |
web/app/snapshot/views/file.py
... |
... |
@@ -32,7 +32,8 @@ router = Blueprint("file", __name__, url_prefix="/file") |
32
|
32
|
|
33
|
33
|
|
34
|
34
|
@router.route("/<digest>")
|
35
|
|
-def file_index(digest):
|
|
35
|
+@router.route("/<digest>/<filename>")
|
|
36
|
+def file_index(digest, filename=""):
|
36
|
37
|
try:
|
37
|
38
|
node = ArchiveController.get_node_by_hash(digest)
|
38
|
39
|
except ArchiveDeniedError as e:
|
... |
... |
@@ -40,7 +41,7 @@ def file_index(digest): |
40
|
41
|
except ArchiveError as e:
|
41
|
42
|
abort(404, str(e))
|
42
|
43
|
|
43
|
|
- send_args = set_download_name(node.orig_name)
|
|
44
|
+ send_args = set_download_name(node.get_orig_name())
|
44
|
45
|
send_file = send_from_directory(node.get_dir(), node.get_filename(),
|
45
|
46
|
**send_args)
|
46
|
47
|
|
web/app/tests/unit/views/test_file.py
... |
... |
@@ -40,15 +40,23 @@ def test_file_bad(client, path, error): |
40
|
40
|
unescape(response.data.decode())
|
41
|
41
|
|
42
|
42
|
|
43
|
|
-@mark.parametrize('content,access,code,message', (
|
44
|
|
- ('file content', True, 200, 'file content'),
|
45
|
|
- ('another content', False, 403, 'Ooops, cannot read file with digest')
|
|
43
|
+@mark.parametrize('content,access,code,message,filename', (
|
|
44
|
+ ('file content', True, 200, 'file content', ''),
|
|
45
|
+ ('file content', True, 200, 'file content', 'file.txt'),
|
|
46
|
+ ('another content', False, 403, 'Ooops, cannot read file with digest', '')
|
46
|
47
|
))
|
47
|
|
-def test_file(client, file_ctrl, content, access, code, message):
|
|
48
|
+def test_file(client, file_ctrl, content, access, code, message, filename):
|
48
|
49
|
digest = file_ctrl.create_file(content, access)
|
49
|
|
- response = client.get(f'/file/{digest}')
|
|
50
|
+ file_path = f'/{filename}' if filename else ''
|
|
51
|
+ response = client.get(f'/file/{digest}{file_path}')
|
50
|
52
|
|
51
|
53
|
assert response.status_code == code
|
52
|
54
|
assert message in response.data.decode()
|
|
55
|
+ if filename:
|
|
56
|
+ # the file controller does not register the file in the DB,
|
|
57
|
+ # hence the file is just served as-is
|
|
58
|
+ filename = digest
|
|
59
|
+ assert f'filename={filename}' in \
|
|
60
|
+ response.headers['Content-Disposition']
|
53
|
61
|
|
54
|
62
|
file_ctrl.cleanup_file(digest) |
|