Baptiste Beauplat pushed to branch master at snapshot / snapshot
Commits:
-
49ec1bea
by Felix Moessbauer at 2024-11-11T08:13:47+01:00
cache 302 redirects to farm much longer
Currently the redirects from the archive to the farm are cached for just
600 seconds, putting a lot of load on the flask app. The artifacts the
redirects point to are cached much longer, though. By that, we still see
a lot of load on the flask app, which finally needs to be rate-limited.
By introducing a new parameter CACHE_TIMEOUT_ARCHIVE_REDIRECT to control
the max-age of the redirects, we can make these requests also cacheable.
Currently we use a max-age of 1 day, but this can be fine-tuned later
on.
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
-
1aaf5d08
by Felix Moessbauer at 2024-11-13T21:13:02+01:00
web: preserve file names where possible
Depending on the configuration, the web frontend directly returns
artifacts or redirects to the farm and returns artifacts by name. In
both cases, the artifact name (from the DB) is not preserved, leading to
hashed download filenames. While this is not a problem for apt itself,
it is problematic when browsing the archive with a browser or wget.
Now, you can set the option REVERSE_NAME_LOOKUP which performs a DB
lookup of the hash to get the original file name.
Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
-
c02ad5e5
by Baptiste Beauplat at 2024-11-17T14:03:25+01:00
Merge branch 'snapshot-fm/cache-redirect'
Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
-
1e3fdb21
by Baptiste Beauplat at 2024-11-17T14:05:39+01:00
Merge branch 'snapshot-fm/filenames'
Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
6 changed files:
Changes:
web/app/snapshot/controllers/archive.py
... |
... |
@@ -197,7 +197,8 @@ class ArchiveFile(): |
197
|
197
|
visiblepath is not None:
|
198
|
198
|
raise ArchiveRedirect(f'/file/{digest}')
|
199
|
199
|
|
200
|
|
- self.realpath = get_snapshot_model().get_filepath(digest)
|
|
200
|
+ model = get_snapshot_model()
|
|
201
|
+ self.realpath = model.get_filepath(digest)
|
201
|
202
|
if not exists(self.realpath):
|
202
|
203
|
raise ArchiveError('Ooops, we do not have a file with '
|
203
|
204
|
f'digest {digest} even tho we should. '
|
... |
... |
@@ -207,9 +208,20 @@ class ArchiveFile(): |
207
|
208
|
f'Ooops, cannot read file with digest {digest}. Maybe this '
|
208
|
209
|
'file is not redistributable and this was done on purpose. '
|
209
|
210
|
'If in doubt report this.')
|
|
211
|
+ self.orig_name = None
|
|
212
|
+ if current_app.config['REVERSE_NAME_LOOKUP']:
|
|
213
|
+ try:
|
|
214
|
+ file_infos = model.packages_get_file_info([digest])[digest]
|
|
215
|
+ if file_infos:
|
|
216
|
+ self.orig_name = file_infos[0]['name']
|
|
217
|
+ except KeyError:
|
|
218
|
+ pass
|
210
|
219
|
|
211
|
220
|
def get_dir(self):
|
212
|
221
|
return dirname(self.realpath)
|
213
|
222
|
|
214
|
223
|
def get_filename(self):
|
215
|
224
|
return basename(self.realpath)
|
|
225
|
+
|
|
226
|
+ def get_orig_name(self):
|
|
227
|
+ return self.orig_name or self.get_filename() |
web/app/snapshot/lib/helpers.py
... |
... |
@@ -23,6 +23,7 @@ |
23
|
23
|
from re import sub
|
24
|
24
|
|
25
|
25
|
from markupsafe import Markup
|
|
26
|
+from flask import __version__ as flask_version
|
26
|
27
|
|
27
|
28
|
|
28
|
29
|
def debian_bugs_markup(text):
|
... |
... |
@@ -30,3 +31,15 @@ def debian_bugs_markup(text): |
30
|
31
|
html = sub(r'#([0-9]+)', r'<a href="">"https://bugs.debian.org/\1">#\1</a>',
|
31
|
32
|
safe)
|
32
|
33
|
return Markup(html)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+def set_download_name(name, params=None):
|
|
37
|
+ if not params:
|
|
38
|
+ params = {}
|
|
39
|
+ if not name:
|
|
40
|
+ return params
|
|
41
|
+ if flask_version < '2.2.0':
|
|
42
|
+ params['attachment_filename'] = name
|
|
43
|
+ else:
|
|
44
|
+ params['download_name'] = name
|
|
45
|
+ return params |
web/app/snapshot/settings/common.py
... |
... |
@@ -33,6 +33,8 @@ POOL_CONN_MAX = 10 |
33
|
33
|
|
34
|
34
|
# Redirect or serve files
|
35
|
35
|
REDIRECT_TO_FARM = False
|
|
36
|
+# Lookup file names when serving by hash
|
|
37
|
+REVERSE_NAME_LOOKUP = True
|
36
|
38
|
|
37
|
39
|
# Cache timeout
|
38
|
40
|
|
... |
... |
@@ -52,6 +54,7 @@ CACHE_TIMEOUT_PACKAGE_VERSION = CACHE_TIMEOUT_DEFAULT |
52
|
54
|
|
53
|
55
|
CACHE_TIMEOUT_ARCHIVE_INDEX = CACHE_TIMEOUT_DEFAULT
|
54
|
56
|
CACHE_TIMEOUT_ARCHIVE_DIR = CACHE_TIMEOUT_DEFAULT
|
|
57
|
+CACHE_TIMEOUT_ARCHIVE_REDIRECT = CACHE_TIMEOUT_DEFAULT
|
55
|
58
|
|
56
|
59
|
CACHE_TIMEOUT_ROOT = CACHE_TIMEOUT_DEFAULT
|
57
|
60
|
CACHE_TIMEOUT_ROOT_NEWS = CACHE_TIMEOUT_DEFAULT
|
web/app/snapshot/settings/prod.py
... |
... |
@@ -36,3 +36,7 @@ MAIL_ADMINS = [ |
36
|
36
|
]
|
37
|
37
|
|
38
|
38
|
REDIRECT_TO_FARM = True
|
|
39
|
+REVERSE_NAME_LOOKUP = False
|
|
40
|
+
|
|
41
|
+# 1 day
|
|
42
|
+CACHE_TIMEOUT_ARCHIVE_REDIRECT = 86400 |
web/app/snapshot/views/archive.py
... |
... |
@@ -30,6 +30,7 @@ from snapshot.models.snapshot import get_snapshot_model |
30
|
30
|
from snapshot.controllers.archive import ArchiveController, ArchiveError, \
|
31
|
31
|
ArchiveDir, ArchiveRedirect
|
32
|
32
|
from snapshot.lib.control_helpers import build_url_archive, get_domain
|
|
33
|
+from snapshot.lib.helpers import set_download_name
|
33
|
34
|
|
34
|
35
|
log = getLogger(__name__)
|
35
|
36
|
router = Blueprint("archive", __name__, url_prefix="/archive")
|
... |
... |
@@ -93,6 +94,9 @@ def archive_dir(archive, date, path='/'): |
93
|
94
|
except ArchiveError as e:
|
94
|
95
|
abort(404, str(e))
|
95
|
96
|
except ArchiveRedirect as e:
|
|
97
|
+ archive_dir.cache_timeout = current_app.config[
|
|
98
|
+ 'CACHE_TIMEOUT_ARCHIVE_REDIRECT'
|
|
99
|
+ ]
|
96
|
100
|
return redirect(str(e))
|
97
|
101
|
|
98
|
102
|
if isinstance(node, ArchiveDir):
|
... |
... |
@@ -109,7 +113,9 @@ def archive_dir(archive, date, path='/'): |
109
|
113
|
**node.info)
|
110
|
114
|
|
111
|
115
|
# node is an ArchiveFile
|
112
|
|
- send_file = send_from_directory(node.get_dir(), node.get_filename())
|
|
116
|
+ send_args = set_download_name(node.get_orig_name())
|
|
117
|
+ send_file = send_from_directory(node.get_dir(), node.get_filename(),
|
|
118
|
+ **send_args)
|
113
|
119
|
|
114
|
120
|
# Remove content type to mimic current snapshot behavior
|
115
|
121
|
response = make_response(send_file)
|
web/app/snapshot/views/file.py
... |
... |
@@ -25,6 +25,7 @@ from flask import Blueprint, abort, send_from_directory, make_response |
25
|
25
|
|
26
|
26
|
from snapshot.controllers.archive import ArchiveController, ArchiveError, \
|
27
|
27
|
ArchiveDeniedError
|
|
28
|
+from snapshot.lib.helpers import set_download_name
|
28
|
29
|
|
29
|
30
|
log = getLogger(__name__)
|
30
|
31
|
router = Blueprint("file", __name__, url_prefix="/file")
|
... |
... |
@@ -39,7 +40,9 @@ def file_index(digest): |
39
|
40
|
except ArchiveError as e:
|
40
|
41
|
abort(404, str(e))
|
41
|
42
|
|
42
|
|
- send_file = send_from_directory(node.get_dir(), node.get_filename())
|
|
43
|
+ send_args = set_download_name(node.orig_name)
|
|
44
|
+ send_file = send_from_directory(node.get_dir(), node.get_filename(),
|
|
45
|
+ **send_args)
|
43
|
46
|
|
44
|
47
|
# Remove content type to mimic current snapshot behavior
|
45
|
48
|
response = make_response(send_file)
|
|