[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[snapshot/master] Enqueue directories in randomish order.



Previously we first checked 00/00/*, then 00/01/*, etc.  This is
maybe not optimal, if all the 0*/*/* files live on disk 1, and the
1*/*/* files on disk 2 etc.

So now we enqueue all the */00/* files in random order, then the */01/*
files and so on until */ff/*.  Hopefully this spreads operations out
over the disks somewhat better.
---
 fsck/check-1/hash |   26 +++++++++++++++++++-------
 1 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/fsck/check-1/hash b/fsck/check-1/hash
index 49c03d5..95ddd3a 100755
--- a/fsck/check-1/hash
+++ b/fsck/check-1/hash
@@ -26,6 +26,7 @@ import os
 import re
 import sys
 import threading
+import random
 import Queue
 sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0]))+'/lib/python2.6/site-packages')
 import hasher
@@ -51,23 +52,33 @@ class HashingThread(threading.Thread):
                 actual_hash = hasher.hash_file(os.path.join(path, filename))
                 if actual_hash != filename:
                     errors.append((filename, 'Hash mismatch (%s)'%(actual_hash)))
-                    if verbose >= 2: print "%s: Hash mismatch (%s)"%(filename, actual_hash)
+                    if verbose >= 2: print "%s: Hash mismatch (%s)"%(filename, actual_hash); sys.stdout.flush()
                 else:
                     if verbose >= 4: print "%s: OK"%(filename)
                     elif verbose >= 3: sys.stdout.write("."); sys.stdout.flush()
             except Exception, e:
                 errors.append((filename, 'Exception: %s'%(e)))
-                if verbose >= 2: print "%s: Exception (%s)"%(filename, e); sys.stdout.flush()
+                if verbose >= 2: print "%s: Exception (%s)"%(filename, e)
             finally:
                 workqueue.task_done()
 
 for x in xrange ( numthreads ):
    HashingThread().start()
 
+def enqueue(q):
+    random.shuffle(q)
+    while len(q) > 0:
+        i = q.pop()
+        if verbose >= 6: print "queueing %s"%(i[1])
+        workqueue.put(i)
+
+inqueue = []
 try:
-    for byte1 in xrange(256):
-        if verbose >= 1: print "queueing %02x:"%(byte1); sys.stdout.flush()
-        for byte2 in xrange(256):
+    for byte2 in xrange(256):
+        if verbose >= 1: print "queueing ..%02x:"%(byte2); sys.stdout.flush()
+        b = range(256)
+        random.shuffle(b)
+        for byte1 in b:
             if verbose >= 2: print "queueing %02x%02x:"%(byte1, byte2); sys.stdout.flush()
             p = os.path.join(farmpath, '%02x'%byte1, '%02x'%byte2)
             for filename in os.listdir(p):
@@ -75,8 +86,9 @@ try:
                     errors.append((filename, 'Invalid name'))
                     if verbose >= 2: print "Invalid name %s"%(filename); sys.stdout.flush()
                     continue
-                if verbose >= 6: print "queueing %s"%(filename)
-                workqueue.put((p, filename))
+                inqueue.append((p,filename))
+            if len(inqueue) > 2000: enqueue(inqueue)
+    enqueue(inqueue)
 finally:
     for x in xrange ( numthreads ):
        workqueue.put(None)
-- 
1.7.2.5



Reply to: