[dak/master] gps timeout/error handling
return the result of the a-f call.
additionally get it using a timeout.
when either a TimeoutError occurs or the result is non-zero, complain about it
and then exit non-zero, so dinstall could stop.
Signed-off-by: Joerg Jaspert <joerg@debian.org>
---
dak/generate_packages_sources.py | 24 ++++++++++++++++++++++--
1 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/dak/generate_packages_sources.py b/dak/generate_packages_sources.py
index bdd3180..07a64b6 100755
--- a/dak/generate_packages_sources.py
+++ b/dak/generate_packages_sources.py
@@ -31,7 +31,7 @@ import sys
import apt_pkg
from tempfile import mkstemp, mkdtemp
import commands
-from multiprocessing import Pool
+from multiprocessing import Pool, TimeoutError
from daklib import daklog
from daklib.dbconn import *
@@ -341,6 +341,7 @@ tree "dists/oldstable-proposed-updates/main"
(result, output) = commands.getstatusoutput('apt-ftparchive -o APT::FTPArchive::Contents=off generate %s' % os.path.basename(ac_name))
sn="a-f %s,%s: " % (suite, arch)
print sn + output.replace('\n', '\n%s' % (sn))
+ return result
# Clean up any left behind files
finally:
@@ -401,6 +402,7 @@ def main ():
startdir = os.getcwd()
os.chdir(cnf["Dir::TempPath"])
+ broken=[]
# For each given suite, each architecture, run one apt-ftparchive
for s in suites:
# Setup a multiprocessing Pool. As many workers as we have CPU cores.
@@ -408,17 +410,35 @@ def main ():
arch_list=get_suite_architectures(s.suite_name, skipsrc=False, skipall=True, session=session)
Logger.log(['generating output for Suite %s, Architectures %s' % (s.suite_name, map(sname, arch_list))])
for a in arch_list:
- pool.apply_async(generate_packages_sources, (a.arch_string, s.suite_name, cnf["Dir::TempPath"]))
+ try:
+ result=pool.apply_async(generate_packages_sources, (a.arch_string, s.suite_name, cnf["Dir::TempPath"]))
+ # Get the result. Should it take too long (a-f hanging), break out.
+ r=result.get(timeout=3600)
+ except TimeoutError:
+ broken.append("Timeout: %s - %s" % (s.suite_name, a.arch_string))
+ # Now try the next architecture
+ continue
+
+ if r:
+ # As long as we get 0, we are fine. Otherwise we yell about it later.
+ broken.append("Breakage: %s - %s returned %s" % (s.suite_name, a.arch_string, r))
# No more work will be added to our pool, close it and then wait for all to finish
pool.close()
pool.join()
+ if len(broken) > 0:
+ Logger.log(['Trouble: %s' % (broken)])
+ print "Trouble: %s" % (broken)
+
os.chdir(startdir)
# this script doesn't change the database
session.close()
Logger.close()
+ if len(broken) > 0:
+ sys.exit(1)
+
#######################################################################################
if __name__ == '__main__':
--
1.7.2.5
Reply to: