[snapshot/master] Deal with corrupt package-file.map.bz2 files
---
snapshot | 89 +++++++++++++++++++++++++++++++++++--------------------------
1 files changed, 51 insertions(+), 38 deletions(-)
diff --git a/snapshot b/snapshot
index a4ceffd..9bd330b 100755
--- a/snapshot
+++ b/snapshot
@@ -891,6 +891,8 @@ class PackageIndexer
def index_mirrorrun_from_index()
index = open_file('/indices/package-file.map.bz2')
return unless index
+ @sourcepkgs = {}
+ @binarypkgs = {}
previously_seen = nil
if (@quick)
@@ -905,9 +907,14 @@ class PackageIndexer
prev_index = open_file('/indices/package-file.map.bz2', prev_run_id)
unless prev_index.nil?
previously_seen = {}
- prev_index = BZ2::Reader.new(prev_index)
- prev_index.each_line(sep_string='') do |block|
- previously_seen[Digest::SHA1.digest(block)] = 1
+ begin
+ prev_index = BZ2::Reader.new(prev_index)
+ prev_index.each_line(sep_string='') do |block|
+ previously_seen[Digest::SHA1.digest(block)] = 1
+ end
+ rescue BZ2::EOZError => e
+ $logger.warn("[indexrun ##{@mirrorrun_id}] previous (##{prev_run_id}) package-file.map is corrupt (BZ2::EOZError): #{e.message}")
+ return
end
else
$logger.warn("[indexrun ##{@mirrorrun_id}] quick mode selected but no previous (##{prev_run_id}) package-file.map")
@@ -915,46 +922,54 @@ class PackageIndexer
end
end
- lineno = 0
- index = BZ2::Reader.new(index)
- index.each_line(sep_string='') do |block|
- next if previously_seen and previously_seen.has_key? Digest::SHA1.digest(block)
-
- e = {}
- block.split("\n").each do |line|
- key,value = line.split(/: */, 2)
- e[key] = value
+ @db.dbdo('SAVEPOINT startofindexing')
+ begin
+ lineno = 0
+ index = BZ2::Reader.new(index)
+ index.each_line(sep_string='') do |block|
+ next if previously_seen and previously_seen.has_key? Digest::SHA1.digest(block)
+
+ e = {}
+ block.split("\n").each do |line|
+ key,value = line.split(/: */, 2)
+ e[key] = value
+ lineno += 1
+ end
lineno += 1
- end
- lineno += 1
- unless hash_has_all_keys(e, %w(Path))
- $logger.warn("[indexrun ##{@mirrorrun_id}] Block has no path element before line #{lineno}")
- next
- end
-
- e['Path'][0..0] = '' if e['Path'][0..0] = '.'
+ unless hash_has_all_keys(e, %w(Path))
+ $logger.warn("[indexrun ##{@mirrorrun_id}] Block has no path element before line #{lineno}")
+ next
+ end
- unless hash_has_all_keys(e, %w(Source Source-Version))
- $logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete source information before line #{lineno}")
- next
- end
+ e['Path'][0..0] = '' if e['Path'][0..0] = '.'
- srcpkg = add_srcpkg(e['Source'], e['Source-Version'])
- if not hash_has_any_key(e, %w(Binary-Version Binary Architecture))
- inserted = insert_src_file_from_path(srcpkg, e['Path'])
- $logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for source #{e['Source']} #{e['Source-Version']}")
- else
- unless hash_has_all_keys(e, %w(Binary-Version Binary Architecture))
- $logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete binary information before line #{lineno}")
+ unless hash_has_all_keys(e, %w(Source Source-Version))
+ $logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete source information before line #{lineno}")
next
end
- binpkg = add_binpkg(e['Binary'], e['Binary-Version'], srcpkg)
- inserted = insert_bin_file_from_path(binpkg, e['Path'], e['Architecture'])
- $logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for binary #{e['Binary']} #{e['Binary-Version']}")
+ srcpkg = add_srcpkg(e['Source'], e['Source-Version'])
+ if not hash_has_any_key(e, %w(Binary-Version Binary Architecture))
+ inserted = insert_src_file_from_path(srcpkg, e['Path'])
+ $logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for source #{e['Source']} #{e['Source-Version']}")
+ else
+ unless hash_has_all_keys(e, %w(Binary-Version Binary Architecture))
+ $logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete binary information before line #{lineno}")
+ next
+ end
+
+ binpkg = add_binpkg(e['Binary'], e['Binary-Version'], srcpkg)
+ inserted = insert_bin_file_from_path(binpkg, e['Path'], e['Architecture'])
+ $logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for binary #{e['Binary']} #{e['Binary-Version']}")
+ end
end
+ rescue BZ2::EOZError => e
+ @db.dbdo('ROLLBACK TO startofindexing')
+ $logger.warn("[indexrun ##{@mirrorrun_id}] package-file.map is corrupt (BZ2::EOZError): #{e.message}")
+ return
end
+ @db.dbdo('RELEASE SAVEPOINT startofindexing')
source = "index"
source += '(Q)' if @quick
return source
@@ -1172,6 +1187,8 @@ class PackageIndexer
# If there is no /indices/package-file.map.bz2 we have to fall back to recursing over the tree
def index_mirrorrun_from_parsing()
+ @sourcepkgs = {}
+ @binarypkgs = {}
if not @quick
query = "SELECT path, name, hash FROM dirtree(?) WHERE filetype='-' AND name SIMILAR TO '%.(deb|udeb|dsc)' AND size != 0"
args = [@mirrorrun_id]
@@ -1218,8 +1235,6 @@ class PackageIndexer
barf("Mirrorrun ##{@only_this_mirrorrun} does not exist.")
end
$logger.info("Indexing mirrorrun ##{@only_this_mirrorrun} of #{row['archive']} from #{row['run']} as requested")
- @sourcepkgs = {}
- @binarypkgs = {}
source = index_mirrorrun(@only_this_mirrorrun)
@db.dbdo('DELETE FROM indexed_mirrorrun WHERE mirrorrun_id=?', @only_this_mirrorrun)
@db.insert('indexed_mirrorrun', {'mirrorrun_id' => @only_this_mirrorrun, 'source' => source })
@@ -1230,8 +1245,6 @@ class PackageIndexer
FROM mirrorrun WHERE NOT mirrorrun_id IN (SELECT mirrorrun_id FROM indexed_mirrorrun) ORDER BY run") do |row|
@db.begin
$logger.info("Indexing mirrorrun ##{row['mirrorrun_id']} of #{row['archive']} from #{row['run']}")
- @sourcepkgs = {}
- @binarypkgs = {}
source = index_mirrorrun(row['mirrorrun_id'])
@db.insert('indexed_mirrorrun', {'mirrorrun_id' => row['mirrorrun_id'], 'source' => source })
@db.commit
--
1.7.2.5
Reply to: