[PATCH 2/2] dep11: Validate files in parallel when validating a directory
This patch validates all files in parallel when we check a whole directory.
This dramatically speeds up validation on systems with multiple CPUs.
(Patch is also attached, in case GMail screws with the mail)
>From 4cfcd0918206e0653f5ff5af0e8321c2efc38a9f Mon Sep 17 00:00:00 2001
From: Matthias Klumpp <matthias@tenstral.net>
Date: Tue, 20 Jun 2017 22:19:36 +0200
Subject: [PATCH 2/2] dep11: Validate files in parallel when validating a
directory
---
scripts/debian/dep11-basic-validate.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/scripts/debian/dep11-basic-validate.py
b/scripts/debian/dep11-basic-validate.py
index 6f338e1d2..40935da0d 100755
--- a/scripts/debian/dep11-basic-validate.py
+++ b/scripts/debian/dep11-basic-validate.py
@@ -22,6 +22,7 @@ import gzip
import lzma
from voluptuous import Schema, Required, All, Any, Length, Range, Match, Url
from optparse import OptionParser
+import multiprocessing as mp
schema_header = Schema({
Required('File'): All(str, 'DEP-11', msg="Must be \"DEP-11\""),
@@ -162,6 +163,9 @@ def validate_file(fname):
def validate_dir(dirname):
ret = True
+ asfiles = []
+
+ # find interesting files
for root, subfolders, files in os.walk(dirname):
for fname in files:
fpath = os.path.join(root, fname)
@@ -169,8 +173,14 @@ def validate_dir(dirname):
add_issue("FATAL: Symlinks are not allowed")
return False
if fname.endswith(".yml.gz") or fname.endswith(".yml.xz"):
- if not validate_file(fpath):
- ret = False
+ asfiles.append(fpath)
+
+ # validate the files, use multiprocessing to speed up the validation
+ with mp.Pool() as pool:
+ results = [pool.apply_async(validate_file, (fname,)) for
fname in asfiles]
+ for res in results:
+ if not res.get():
+ ret = False
return ret
--
2.11.0
From 4cfcd0918206e0653f5ff5af0e8321c2efc38a9f Mon Sep 17 00:00:00 2001
From: Matthias Klumpp <matthias@tenstral.net>
Date: Tue, 20 Jun 2017 22:19:36 +0200
Subject: [PATCH 2/2] dep11: Validate files in parallel when validating a
directory
---
scripts/debian/dep11-basic-validate.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/scripts/debian/dep11-basic-validate.py b/scripts/debian/dep11-basic-validate.py
index 6f338e1d2..40935da0d 100755
--- a/scripts/debian/dep11-basic-validate.py
+++ b/scripts/debian/dep11-basic-validate.py
@@ -22,6 +22,7 @@ import gzip
import lzma
from voluptuous import Schema, Required, All, Any, Length, Range, Match, Url
from optparse import OptionParser
+import multiprocessing as mp
schema_header = Schema({
Required('File'): All(str, 'DEP-11', msg="Must be \"DEP-11\""),
@@ -162,6 +163,9 @@ def validate_file(fname):
def validate_dir(dirname):
ret = True
+ asfiles = []
+
+ # find interesting files
for root, subfolders, files in os.walk(dirname):
for fname in files:
fpath = os.path.join(root, fname)
@@ -169,8 +173,14 @@ def validate_dir(dirname):
add_issue("FATAL: Symlinks are not allowed")
return False
if fname.endswith(".yml.gz") or fname.endswith(".yml.xz"):
- if not validate_file(fpath):
- ret = False
+ asfiles.append(fpath)
+
+ # validate the files, use multiprocessing to speed up the validation
+ with mp.Pool() as pool:
+ results = [pool.apply_async(validate_file, (fname,)) for fname in asfiles]
+ for res in results:
+ if not res.get():
+ ret = False
return ret
--
2.11.0
Reply to: