[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[Git][snapshot-team/snapshot][master] 2 commits: Create a "manually" maintained materialized view for node metadata



Title: GitLab

Baptiste Beauplat pushed to branch master at snapshot / snapshot

Commits:

  • 5aa020c0
    by Philipp Kern at 2025-02-07T01:11:50+01:00
    Create a "manually" maintained materialized view for node metadata
    
    It's necessary to materialize node_with_ts to speed up queries
    massively, but materialized views of this size (6 GB+) come with a hefty
    penalty for the WAL - which hurts in both synchronization and backup
    space. Instead add a manually maintained table that is updated whenever
    a node is touched. This will still generate a lot of churn by design
    (after all, it will be executed for every node that is still alive at
    the current import time), but much less than regenerating the whole
    view.
    
  • 2620e7f1
    by Baptiste Beauplat at 2025-02-11T18:54:36+01:00
    Merge branch 'snapshot-db-fixes'
    
    Signed-off-by: Baptiste Beauplat <lyknode@debian.org>
    

2 changed files:

Changes:

  • db/upgrade_26.py
    1
    +#!/usr/bin/python3
    
    2
    +#
    
    3
    +# Copyright (c) 2024 Philipp Kern
    
    4
    +#
    
    5
    +# Permission is hereby granted, free of charge, to any person obtaining a copy
    
    6
    +# of this software and associated documentation files (the "Software"), to deal
    
    7
    +# in the Software without restriction, including without limitation the rights
    
    8
    +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    
    9
    +# copies of the Software, and to permit persons to whom the Software is
    
    10
    +# furnished to do so, subject to the following conditions:
    
    11
    +#
    
    12
    +# The above copyright notice and this permission notice shall be included in
    
    13
    +# all copies or substantial portions of the Software.
    
    14
    +#
    
    15
    +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    
    16
    +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    
    17
    +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    
    18
    +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    
    19
    +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    
    20
    +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    
    21
    +# SOFTWARE.
    
    22
    +
    
    23
    +def upgrade(db):
    
    24
    +    db.execute("""
    
    25
    +        CREATE TABLE node_with_ts_materialized AS
    
    26
    +          SELECT node.node_id,
    
    27
    +            node.parent,
    
    28
    +            node.first,
    
    29
    +            node.last,
    
    30
    +            mirrorrun_first.run AS first_run,
    
    31
    +            mirrorrun_first.archive_id,
    
    32
    +            mirrorrun_last.run AS last_run
    
    33
    +          FROM node
    
    34
    +            JOIN mirrorrun mirrorrun_first ON mirrorrun_first.mirrorrun_id = node.first
    
    35
    +            JOIN mirrorrun mirrorrun_last ON mirrorrun_last.mirrorrun_id = node.last;
    
    36
    +        CREATE UNIQUE INDEX node_with_ts_materialized_node_id
    
    37
    +          ON node_with_ts_materialized USING btree (node_id);
    
    38
    +        CREATE INDEX node_with_ts_materialized_parent
    
    39
    +          ON node_with_ts_materialized USING btree (parent);
    
    40
    +        """)
    
    41
    +
    
    42
    +    db.execute("UPDATE config SET value='26' WHERE name='db_revision' AND value='25'")
    
    43
    +
    
    44
    +# vim:set et:
    
    45
    +# vim:set ts=4:
    
    46
    +# vim:set shiftwidth=4:

  • snapshot
    ... ... @@ -117,6 +117,7 @@ class SnapshotDB
    117 117
     	def initialize(conf, logger)
    
    118 118
     		@dbh = PG.connect(conf['connectstring'])
    
    119 119
     		@logger = logger
    
    120
    +		@updated_node_ids = []
    
    120 121
     	end
    
    121 122
     
    
    122 123
     	def get_primarykey_name(table);
    
    ... ... @@ -129,6 +130,7 @@ class SnapshotDB
    129 130
     		@dbh.exec("BEGIN")
    
    130 131
     	end
    
    131 132
     	def commit()
    
    133
    +		_materialize_updated_nodes()
    
    132 134
     		@dbh.exec("COMMIT")
    
    133 135
     	end
    
    134 136
     
    
    ... ... @@ -180,10 +182,13 @@ class SnapshotDB
    180 182
     			results = insert(table, values, [pk_name])
    
    181 183
     			values[pk_name] = results[pk_name]
    
    182 184
     		end
    
    185
    +		if table == "node"
    
    186
    +			enqueue_node_id(values["node_id"])
    
    187
    +		end
    
    183 188
     		values
    
    184 189
     	end
    
    185 190
     
    
    186
    -	def update(table, set, where, returning=nil)
    
    191
    +	def _update(table, set, where, returning=nil)
    
    187 192
     		setclause = set.each_key.collect { |k, index| "#{k}=$#{index + 1}" }.join(",")
    
    188 193
     		whereclause = where.each_key.collect { |k, index| where[k].nil? ? "#{k} IS NULL" : "#{k}=$#{index + set.each_key.length + 1}" }.join(" AND ")
    
    189 194
     
    
    ... ... @@ -200,7 +205,11 @@ class SnapshotDB
    200 205
     	end
    
    201 206
     
    
    202 207
     	def update_one(table, set, where)
    
    203
    -		r = update(table, set, where)
    
    208
    +		r = _update(table, set, where)
    
    209
    +
    
    210
    +		if table == "node"
    
    211
    +			enqueue_node_id(where["node_id"])
    
    212
    +		end
    
    204 213
     
    
    205 214
     		raise "Did not update exactly one row in update_one" unless r.cmd_tuples() ==1
    
    206 215
     		return r
    
    ... ... @@ -229,6 +238,39 @@ class SnapshotDB
    229 238
     			return row
    
    230 239
     		end
    
    231 240
     	end
    
    241
    +
    
    242
    +	def enqueue_node_id(node_id)
    
    243
    +		@updated_node_ids.append(node_id)
    
    244
    +	end
    
    245
    +
    
    246
    +	def _materialize_updated_nodes
    
    247
    +		if @updated_node_ids.length == 0
    
    248
    +			return
    
    249
    +		end
    
    250
    +		node_placeholders = (1..@updated_node_ids.length).map { |i| "$#{i}" }.join(', ')
    
    251
    +		execute("WITH upd AS (
    
    252
    +			SELECT node.node_id,
    
    253
    +				node.parent,
    
    254
    +				node.first,
    
    255
    +				node.last,
    
    256
    +				mirrorrun_first.run AS first_run,
    
    257
    +				mirrorrun_first.archive_id,
    
    258
    +				mirrorrun_last.run AS last_run
    
    259
    +			FROM node
    
    260
    +				JOIN mirrorrun mirrorrun_first ON mirrorrun_first.mirrorrun_id = node.first
    
    261
    +				JOIN mirrorrun mirrorrun_last ON mirrorrun_last.mirrorrun_id = node.last
    
    262
    +			WHERE node.node_id IN (#{node_placeholders})
    
    263
    +		)
    
    264
    +		INSERT INTO node_with_ts_materialized
    
    265
    +			SELECT * FROM upd
    
    266
    +		ON CONFLICT (node_id) DO UPDATE
    
    267
    +			SET parent = EXCLUDED.parent,
    
    268
    +				first = EXCLUDED.first,
    
    269
    +				last = EXCLUDED.last,
    
    270
    +				first_run = EXCLUDED.first_run,
    
    271
    +				archive_id = EXCLUDED.archive_id,
    
    272
    +				last_run = EXCLUDED.last_run", *@updated_node_ids)
    
    273
    +	end
    
    232 274
     end
    
    233 275
     
    
    234 276
     class FSNode
    


  • Reply to: