[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[PATCH 2/2] dep11: Validate files in parallel when validating a directory



This patch validates all files in parallel when we check a whole directory.
This dramatically speeds up validation on systems with multiple CPUs.

(Patch is also attached, in case GMail screws with the mail)

>From 4cfcd0918206e0653f5ff5af0e8321c2efc38a9f Mon Sep 17 00:00:00 2001
From: Matthias Klumpp <matthias@tenstral.net>
Date: Tue, 20 Jun 2017 22:19:36 +0200
Subject: [PATCH 2/2] dep11: Validate files in parallel when validating a
 directory

---
 scripts/debian/dep11-basic-validate.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/scripts/debian/dep11-basic-validate.py
b/scripts/debian/dep11-basic-validate.py
index 6f338e1d2..40935da0d 100755
--- a/scripts/debian/dep11-basic-validate.py
+++ b/scripts/debian/dep11-basic-validate.py
@@ -22,6 +22,7 @@ import gzip
 import lzma
 from voluptuous import Schema, Required, All, Any, Length, Range, Match, Url
 from optparse import OptionParser
+import multiprocessing as mp

 schema_header = Schema({
     Required('File'): All(str, 'DEP-11', msg="Must be \"DEP-11\""),
@@ -162,6 +163,9 @@ def validate_file(fname):

 def validate_dir(dirname):
     ret = True
+    asfiles = []
+
+    # find interesting files
     for root, subfolders, files in os.walk(dirname):
         for fname in files:
             fpath = os.path.join(root, fname)
@@ -169,8 +173,14 @@ def validate_dir(dirname):
                 add_issue("FATAL: Symlinks are not allowed")
                 return False
             if fname.endswith(".yml.gz") or fname.endswith(".yml.xz"):
-                if not validate_file(fpath):
-                    ret = False
+                asfiles.append(fpath)
+
+    # validate the files, use multiprocessing to speed up the validation
+    with mp.Pool() as pool:
+        results = [pool.apply_async(validate_file, (fname,)) for
fname in asfiles]
+        for res in results:
+            if not res.get():
+                ret = False

     return ret

-- 
2.11.0
From 4cfcd0918206e0653f5ff5af0e8321c2efc38a9f Mon Sep 17 00:00:00 2001
From: Matthias Klumpp <matthias@tenstral.net>
Date: Tue, 20 Jun 2017 22:19:36 +0200
Subject: [PATCH 2/2] dep11: Validate files in parallel when validating a
 directory

---
 scripts/debian/dep11-basic-validate.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/scripts/debian/dep11-basic-validate.py b/scripts/debian/dep11-basic-validate.py
index 6f338e1d2..40935da0d 100755
--- a/scripts/debian/dep11-basic-validate.py
+++ b/scripts/debian/dep11-basic-validate.py
@@ -22,6 +22,7 @@ import gzip
 import lzma
 from voluptuous import Schema, Required, All, Any, Length, Range, Match, Url
 from optparse import OptionParser
+import multiprocessing as mp
 
 schema_header = Schema({
     Required('File'): All(str, 'DEP-11', msg="Must be \"DEP-11\""),
@@ -162,6 +163,9 @@ def validate_file(fname):
 
 def validate_dir(dirname):
     ret = True
+    asfiles = []
+
+    # find interesting files
     for root, subfolders, files in os.walk(dirname):
         for fname in files:
             fpath = os.path.join(root, fname)
@@ -169,8 +173,14 @@ def validate_dir(dirname):
                 add_issue("FATAL: Symlinks are not allowed")
                 return False
             if fname.endswith(".yml.gz") or fname.endswith(".yml.xz"):
-                if not validate_file(fpath):
-                    ret = False
+                asfiles.append(fpath)
+
+    # validate the files, use multiprocessing to speed up the validation
+    with mp.Pool() as pool:
+        results = [pool.apply_async(validate_file, (fname,)) for fname in asfiles]
+        for res in results:
+            if not res.get():
+                ret = False
 
     return ret
 
-- 
2.11.0


Reply to: