[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[snapshot/master] Deal with corrupt package-file.map.bz2 files



---
 snapshot |   89 +++++++++++++++++++++++++++++++++++--------------------------
 1 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/snapshot b/snapshot
index a4ceffd..9bd330b 100755
--- a/snapshot
+++ b/snapshot
@@ -891,6 +891,8 @@ class PackageIndexer
 	def index_mirrorrun_from_index()
 		index = open_file('/indices/package-file.map.bz2')
 		return unless index
+		@sourcepkgs = {}
+		@binarypkgs = {}
 
 		previously_seen = nil
 		if (@quick)
@@ -905,9 +907,14 @@ class PackageIndexer
 				prev_index = open_file('/indices/package-file.map.bz2', prev_run_id)
 				unless prev_index.nil?
 					previously_seen = {}
-					prev_index = BZ2::Reader.new(prev_index)
-					prev_index.each_line(sep_string='') do |block|
-						previously_seen[Digest::SHA1.digest(block)] = 1
+					begin
+						prev_index = BZ2::Reader.new(prev_index)
+						prev_index.each_line(sep_string='') do |block|
+							previously_seen[Digest::SHA1.digest(block)] = 1
+						end
+					rescue BZ2::EOZError => e
+						$logger.warn("[indexrun ##{@mirrorrun_id}] previous (##{prev_run_id}) package-file.map is corrupt (BZ2::EOZError): #{e.message}")
+						return
 					end
 				else
 					$logger.warn("[indexrun ##{@mirrorrun_id}] quick mode selected but no previous (##{prev_run_id}) package-file.map")
@@ -915,46 +922,54 @@ class PackageIndexer
 			end
 		end
 
-		lineno = 0
-		index = BZ2::Reader.new(index)
-		index.each_line(sep_string='') do |block|
-			next if previously_seen and previously_seen.has_key? Digest::SHA1.digest(block)
-
-			e = {}
-			block.split("\n").each do |line|
-				key,value = line.split(/: */, 2)
-				e[key] = value
+		@db.dbdo('SAVEPOINT startofindexing')
+		begin
+			lineno = 0
+			index = BZ2::Reader.new(index)
+			index.each_line(sep_string='') do |block|
+				next if previously_seen and previously_seen.has_key? Digest::SHA1.digest(block)
+
+				e = {}
+				block.split("\n").each do |line|
+					key,value = line.split(/: */, 2)
+					e[key] = value
+					lineno += 1
+				end
 				lineno += 1
-			end
-			lineno += 1
 
-			unless hash_has_all_keys(e, %w(Path))
-				$logger.warn("[indexrun ##{@mirrorrun_id}] Block has no path element before line #{lineno}")
-				next
-			end
-
-			e['Path'][0..0] = '' if e['Path'][0..0] = '.'
+				unless hash_has_all_keys(e, %w(Path))
+					$logger.warn("[indexrun ##{@mirrorrun_id}] Block has no path element before line #{lineno}")
+					next
+				end
 
-			unless hash_has_all_keys(e, %w(Source Source-Version))
-				$logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete source information before line #{lineno}")
-				next
-			end
+				e['Path'][0..0] = '' if e['Path'][0..0] = '.'
 
-			srcpkg = add_srcpkg(e['Source'], e['Source-Version'])
-			if not hash_has_any_key(e, %w(Binary-Version Binary Architecture))
-				inserted = insert_src_file_from_path(srcpkg, e['Path'])
-				$logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for source #{e['Source']} #{e['Source-Version']}")
-			else
-				unless hash_has_all_keys(e, %w(Binary-Version Binary Architecture))
-					$logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete binary information before line #{lineno}")
+				unless hash_has_all_keys(e, %w(Source Source-Version))
+					$logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete source information before line #{lineno}")
 					next
 				end
 
-				binpkg = add_binpkg(e['Binary'], e['Binary-Version'], srcpkg)
-				inserted = insert_bin_file_from_path(binpkg, e['Path'], e['Architecture'])
-				$logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for binary #{e['Binary']} #{e['Binary-Version']}")
+				srcpkg = add_srcpkg(e['Source'], e['Source-Version'])
+				if not hash_has_any_key(e, %w(Binary-Version Binary Architecture))
+					inserted = insert_src_file_from_path(srcpkg, e['Path'])
+					$logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for source #{e['Source']} #{e['Source-Version']}")
+				else
+					unless hash_has_all_keys(e, %w(Binary-Version Binary Architecture))
+						$logger.warn("[indexrun ##{@mirrorrun_id}] Block has incomplete binary information before line #{lineno}")
+						next
+					end
+
+					binpkg = add_binpkg(e['Binary'], e['Binary-Version'], srcpkg)
+					inserted = insert_bin_file_from_path(binpkg, e['Path'], e['Architecture'])
+					$logger.debug("[indexrun ##{@mirrorrun_id}] " + (inserted ? "Inserting" : "Skipping already existing") + " #{e['Path']} for binary #{e['Binary']} #{e['Binary-Version']}")
+				end
 			end
+		rescue BZ2::EOZError => e
+			@db.dbdo('ROLLBACK TO startofindexing')
+			$logger.warn("[indexrun ##{@mirrorrun_id}] package-file.map is corrupt (BZ2::EOZError): #{e.message}")
+			return
 		end
+		@db.dbdo('RELEASE SAVEPOINT startofindexing')
 		source = "index"
 		source += '(Q)' if @quick
 		return source
@@ -1172,6 +1187,8 @@ class PackageIndexer
 
 	# If there is no /indices/package-file.map.bz2 we have to fall back to recursing over the tree
 	def index_mirrorrun_from_parsing()
+		@sourcepkgs = {}
+		@binarypkgs = {}
 		if not @quick
 			query = "SELECT path, name, hash FROM dirtree(?) WHERE filetype='-' AND name SIMILAR TO '%.(deb|udeb|dsc)' AND size != 0"
 			args = [@mirrorrun_id]
@@ -1218,8 +1235,6 @@ class PackageIndexer
 					barf("Mirrorrun ##{@only_this_mirrorrun} does not exist.")
 				end
 				$logger.info("Indexing mirrorrun ##{@only_this_mirrorrun} of #{row['archive']} from #{row['run']} as requested")
-				@sourcepkgs = {}
-				@binarypkgs = {}
 				source = index_mirrorrun(@only_this_mirrorrun)
 				@db.dbdo('DELETE FROM indexed_mirrorrun WHERE mirrorrun_id=?', @only_this_mirrorrun)
 				@db.insert('indexed_mirrorrun', {'mirrorrun_id' => @only_this_mirrorrun, 'source' => source })
@@ -1230,8 +1245,6 @@ class PackageIndexer
 					   FROM mirrorrun WHERE NOT mirrorrun_id IN (SELECT mirrorrun_id FROM indexed_mirrorrun) ORDER BY run") do |row|
 				@db.begin
 				$logger.info("Indexing mirrorrun ##{row['mirrorrun_id']} of #{row['archive']} from #{row['run']}")
-				@sourcepkgs = {}
-				@binarypkgs = {}
 				source = index_mirrorrun(row['mirrorrun_id'])
 				@db.insert('indexed_mirrorrun', {'mirrorrun_id' => row['mirrorrun_id'], 'source' => source })
 				@db.commit
-- 
1.7.2.5


Reply to: