Baptiste Beauplat pushed to branch master at snapshot / snapshot
Commits:
-
5aa020c0
by Philipp Kern at 2025-02-07T01:11:50+01:00
Create a "manually" maintained materialized view for node metadata
It's necessary to materialize node_with_ts to speed up queries
massively, but materialized views of this size (6 GB+) come with a hefty
penalty for the WAL - which hurts in both synchronization and backup
space. Instead add a manually maintained table that is updated whenever
a node is touched. This will still generate a lot of churn by design
(after all, it will be executed for every node that is still alive at
the current import time), but much less than regenerating the whole
view.
-
2620e7f1
by Baptiste Beauplat at 2025-02-11T18:54:36+01:00
Merge branch 'snapshot-db-fixes'
Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
2 changed files:
Changes:
db/upgrade_26.py
|
1
|
+#!/usr/bin/python3
|
|
2
|
+#
|
|
3
|
+# Copyright (c) 2024 Philipp Kern
|
|
4
|
+#
|
|
5
|
+# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+# of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+# in the Software without restriction, including without limitation the rights
|
|
8
|
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+# copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+# furnished to do so, subject to the following conditions:
|
|
11
|
+#
|
|
12
|
+# The above copyright notice and this permission notice shall be included in
|
|
13
|
+# all copies or substantial portions of the Software.
|
|
14
|
+#
|
|
15
|
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+# SOFTWARE.
|
|
22
|
+
|
|
23
|
+def upgrade(db):
|
|
24
|
+ db.execute("""
|
|
25
|
+ CREATE TABLE node_with_ts_materialized AS
|
|
26
|
+ SELECT node.node_id,
|
|
27
|
+ node.parent,
|
|
28
|
+ node.first,
|
|
29
|
+ node.last,
|
|
30
|
+ mirrorrun_first.run AS first_run,
|
|
31
|
+ mirrorrun_first.archive_id,
|
|
32
|
+ mirrorrun_last.run AS last_run
|
|
33
|
+ FROM node
|
|
34
|
+ JOIN mirrorrun mirrorrun_first ON mirrorrun_first.mirrorrun_id = node.first
|
|
35
|
+ JOIN mirrorrun mirrorrun_last ON mirrorrun_last.mirrorrun_id = node.last;
|
|
36
|
+ CREATE UNIQUE INDEX node_with_ts_materialized_node_id
|
|
37
|
+ ON node_with_ts_materialized USING btree (node_id);
|
|
38
|
+ CREATE INDEX node_with_ts_materialized_parent
|
|
39
|
+ ON node_with_ts_materialized USING btree (parent);
|
|
40
|
+ """)
|
|
41
|
+
|
|
42
|
+ db.execute("UPDATE config SET value='26' WHERE name='db_revision' AND value='25'")
|
|
43
|
+
|
|
44
|
+# vim:set et:
|
|
45
|
+# vim:set ts=4:
|
|
46
|
+# vim:set shiftwidth=4: |
snapshot
... |
... |
@@ -117,6 +117,7 @@ class SnapshotDB |
117
|
117
|
def initialize(conf, logger)
|
118
|
118
|
@dbh = PG.connect(conf['connectstring'])
|
119
|
119
|
@logger = logger
|
|
120
|
+ @updated_node_ids = []
|
120
|
121
|
end
|
121
|
122
|
|
122
|
123
|
def get_primarykey_name(table);
|
... |
... |
@@ -129,6 +130,7 @@ class SnapshotDB |
129
|
130
|
@dbh.exec("BEGIN")
|
130
|
131
|
end
|
131
|
132
|
def commit()
|
|
133
|
+ _materialize_updated_nodes()
|
132
|
134
|
@dbh.exec("COMMIT")
|
133
|
135
|
end
|
134
|
136
|
|
... |
... |
@@ -180,10 +182,13 @@ class SnapshotDB |
180
|
182
|
results = insert(table, values, [pk_name])
|
181
|
183
|
values[pk_name] = results[pk_name]
|
182
|
184
|
end
|
|
185
|
+ if table == "node"
|
|
186
|
+ enqueue_node_id(values["node_id"])
|
|
187
|
+ end
|
183
|
188
|
values
|
184
|
189
|
end
|
185
|
190
|
|
186
|
|
- def update(table, set, where, returning=nil)
|
|
191
|
+ def _update(table, set, where, returning=nil)
|
187
|
192
|
setclause = set.each_key.collect { |k, index| "#{k}=$#{index + 1}" }.join(",")
|
188
|
193
|
whereclause = where.each_key.collect { |k, index| where[k].nil? ? "#{k} IS NULL" : "#{k}=$#{index + set.each_key.length + 1}" }.join(" AND ")
|
189
|
194
|
|
... |
... |
@@ -200,7 +205,11 @@ class SnapshotDB |
200
|
205
|
end
|
201
|
206
|
|
202
|
207
|
def update_one(table, set, where)
|
203
|
|
- r = update(table, set, where)
|
|
208
|
+ r = _update(table, set, where)
|
|
209
|
+
|
|
210
|
+ if table == "node"
|
|
211
|
+ enqueue_node_id(where["node_id"])
|
|
212
|
+ end
|
204
|
213
|
|
205
|
214
|
raise "Did not update exactly one row in update_one" unless r.cmd_tuples() ==1
|
206
|
215
|
return r
|
... |
... |
@@ -229,6 +238,39 @@ class SnapshotDB |
229
|
238
|
return row
|
230
|
239
|
end
|
231
|
240
|
end
|
|
241
|
+
|
|
242
|
+ def enqueue_node_id(node_id)
|
|
243
|
+ @updated_node_ids.append(node_id)
|
|
244
|
+ end
|
|
245
|
+
|
|
246
|
+ def _materialize_updated_nodes
|
|
247
|
+ if @updated_node_ids.length == 0
|
|
248
|
+ return
|
|
249
|
+ end
|
|
250
|
+ node_placeholders = (1..@updated_node_ids.length).map { |i| "$#{i}" }.join(', ')
|
|
251
|
+ execute("WITH upd AS (
|
|
252
|
+ SELECT node.node_id,
|
|
253
|
+ node.parent,
|
|
254
|
+ node.first,
|
|
255
|
+ node.last,
|
|
256
|
+ mirrorrun_first.run AS first_run,
|
|
257
|
+ mirrorrun_first.archive_id,
|
|
258
|
+ mirrorrun_last.run AS last_run
|
|
259
|
+ FROM node
|
|
260
|
+ JOIN mirrorrun mirrorrun_first ON mirrorrun_first.mirrorrun_id = node.first
|
|
261
|
+ JOIN mirrorrun mirrorrun_last ON mirrorrun_last.mirrorrun_id = node.last
|
|
262
|
+ WHERE node.node_id IN (#{node_placeholders})
|
|
263
|
+ )
|
|
264
|
+ INSERT INTO node_with_ts_materialized
|
|
265
|
+ SELECT * FROM upd
|
|
266
|
+ ON CONFLICT (node_id) DO UPDATE
|
|
267
|
+ SET parent = EXCLUDED.parent,
|
|
268
|
+ first = EXCLUDED.first,
|
|
269
|
+ last = EXCLUDED.last,
|
|
270
|
+ first_run = EXCLUDED.first_run,
|
|
271
|
+ archive_id = EXCLUDED.archive_id,
|
|
272
|
+ last_run = EXCLUDED.last_run", *@updated_node_ids)
|
|
273
|
+ end
|
232
|
274
|
end
|
233
|
275
|
|
234
|
276
|
class FSNode
|
|