[snapshot/master] Enqueue directories in randomish order.
Previously we first checked 00/00/*, then 00/01/*, etc. This is
maybe not optimal, if all the 0*/*/* files live on disk 1, and the
1*/*/* files on disk 2 etc.
So now we enqueue all the */00/* files in random order, then the */01/*
files and so on until */ff/*. Hopefully this spreads operations out
over the disks somewhat better.
---
fsck/check-1/hash | 26 +++++++++++++++++++-------
1 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/fsck/check-1/hash b/fsck/check-1/hash
index 49c03d5..95ddd3a 100755
--- a/fsck/check-1/hash
+++ b/fsck/check-1/hash
@@ -26,6 +26,7 @@ import os
import re
import sys
import threading
+import random
import Queue
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0]))+'/lib/python2.6/site-packages')
import hasher
@@ -51,23 +52,33 @@ class HashingThread(threading.Thread):
actual_hash = hasher.hash_file(os.path.join(path, filename))
if actual_hash != filename:
errors.append((filename, 'Hash mismatch (%s)'%(actual_hash)))
- if verbose >= 2: print "%s: Hash mismatch (%s)"%(filename, actual_hash)
+ if verbose >= 2: print "%s: Hash mismatch (%s)"%(filename, actual_hash); sys.stdout.flush()
else:
if verbose >= 4: print "%s: OK"%(filename)
elif verbose >= 3: sys.stdout.write("."); sys.stdout.flush()
except Exception, e:
errors.append((filename, 'Exception: %s'%(e)))
- if verbose >= 2: print "%s: Exception (%s)"%(filename, e); sys.stdout.flush()
+ if verbose >= 2: print "%s: Exception (%s)"%(filename, e)
finally:
workqueue.task_done()
for x in xrange ( numthreads ):
HashingThread().start()
+def enqueue(q):
+ random.shuffle(q)
+ while len(q) > 0:
+ i = q.pop()
+ if verbose >= 6: print "queueing %s"%(i[1])
+ workqueue.put(i)
+
+inqueue = []
try:
- for byte1 in xrange(256):
- if verbose >= 1: print "queueing %02x:"%(byte1); sys.stdout.flush()
- for byte2 in xrange(256):
+ for byte2 in xrange(256):
+ if verbose >= 1: print "queueing ..%02x:"%(byte2); sys.stdout.flush()
+ b = range(256)
+ random.shuffle(b)
+ for byte1 in b:
if verbose >= 2: print "queueing %02x%02x:"%(byte1, byte2); sys.stdout.flush()
p = os.path.join(farmpath, '%02x'%byte1, '%02x'%byte2)
for filename in os.listdir(p):
@@ -75,8 +86,9 @@ try:
errors.append((filename, 'Invalid name'))
if verbose >= 2: print "Invalid name %s"%(filename); sys.stdout.flush()
continue
- if verbose >= 6: print "queueing %s"%(filename)
- workqueue.put((p, filename))
+ inqueue.append((p,filename))
+ if len(inqueue) > 2000: enqueue(inqueue)
+ enqueue(inqueue)
finally:
for x in xrange ( numthreads ):
workqueue.put(None)
--
1.7.2.5
Reply to: