[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Some patches for patch-tracker



Hi Sean.

I'm working on integrating your patch-tracker (which is a really useful
service, thanks for it!) with the PTS and with UDD (Ultimate Debian
Database). I've seen that you already prepared some JSON exporter for
putting data in the PTS, but it's probably not enough.

I'm attaching you a few patches. The comments on them should be
descriptive enough, but I'm adding a few comments here:

 * 0004: I couldn't make reprepro work without this patch? Did you
manage to use it without?

 * 0007: this bug is triggered, for example, by the package libcvs-perl,
that causes a server error in patch-tracker.d.o.

Of course, if you have any doubts, you can just ask me.

BTW, I've already tested these patches on a test deployment on my
system, and there shouldn't be any regressions.

Once your patches are applied to the deployed instance, I just ask you
to execute the export_for_udd.py script, capture the stdout to some file
an expose that file some way, so the UDD and PTS can download it. I
already worked on an UDD gatherer for that data and a PTS patch to show it.

The execution of export_for_udd.py requires about an hour on my laptop,
but can be made considerably quicker passing on the command line the
name of the file built at the previous execution, that is used as cache
for packages that didn't change since the previous run.

Thanks, Giovanni.
-- 
Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Pisa, Italy

Web: http://poisson.phc.unipi.it/~mascellani
Jabber: g.mascellani@jabber.org / giovanni@elabor.homelinux.org
From fa1aabe95e7ae5f567cf8f76ac31d11daf10ded6 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 11:33:35 +0200
Subject: [PATCH 1/9] Move makeDiffHandler in DB, so it can be used in other
 contexts.

---
 patchtracker/DB.py         |   17 +++++++++++++++++
 patchtracker/ReqHandler.py |   19 +++++--------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/patchtracker/DB.py b/patchtracker/DB.py
index 0bb0966..0913b24 100644
--- a/patchtracker/DB.py
+++ b/patchtracker/DB.py
@@ -55,6 +55,9 @@ def srcpkg_collection_factory(cursor, row):
     info[field] = d[col]
   return (SourceArchive.SourcePackage(info), rest)
 
+class PackageWithoutDiffException(Exception):
+  def __init__(self, msg):
+    Exception.__init__(self, msg)
 
 class PatchTrackerDB:
   def __init__(self, dbname=Conf.database):
@@ -167,6 +170,20 @@ class PatchTrackerDB:
     except:
       return None
 
+  # XXX this is kinda ugly...
+  def makeDiffHandler(self, pkgname, vers):
+    dfile = self.findDiffGz(pkgname,vers)
+    if dfile:
+      from DiffGzHandler import DiffGzHandler
+      return DiffGzHandler(dfile)
+    else:
+      dfile = self.findDebTar(pkgname, vers)
+      if dfile:
+        from DebTarHandler import DebTarHandler
+        return DebTarHandler(dfile)
+      else:
+        raise PackageWithoutDiffException("can not find diff file for %s / %s"%(pkgname,vers))
+
   def prune(self):
     q = "DELETE FROM package_rel_map WHERE marked != 1"
     cursor = self.db.cursor()
diff --git a/patchtracker/ReqHandler.py b/patchtracker/ReqHandler.py
index 90fc75a..2ac1afb 100755
--- a/patchtracker/ReqHandler.py
+++ b/patchtracker/ReqHandler.py
@@ -10,7 +10,7 @@ from patchtracker.DiffGzHandler import DiffGzHandler, DiffGzException
 from patchtracker.DebTarHandler import DebTarHandler
 from patchtracker.CacheObject import CacheObject, CacheMissException
 import patchtracker.DB as DB
-from patchtracker.DB import PatchTrackerDB
+from patchtracker.DB import PatchTrackerDB, PackageWithoutDiffException
 import pygments
 from pygments.lexers import DiffLexer
 from pygments.formatters import HtmlFormatter
@@ -41,7 +41,10 @@ class PatchCmd(Cmd):
     self.db = PatchTrackerDB()
     self.patchtype,mode,pkgname,version = args[0:4]
     self.parsemode(mode)
-    dh = self.make_diffhandler(pkgname,version)
+    try:
+      dh = self.db.makeDiffHandler(pkgname,version)
+    except PackageWithoutDiffException, e:
+      raise ReqHandlerException(str(e))
     if self.patchtype == "series":
       self.patchname = os.sep.join(args[4:])
       self.content = dh.series().fetch(self.patchname)
@@ -67,18 +70,6 @@ class PatchCmd(Cmd):
     if mode == "dl":
       self.content_type = "text/x-diff"
 
-  # XXX this is kinda ugly...
-  def make_diffhandler(self, pkgname, vers):
-    dfile = self.db.findDiffGz(pkgname,vers)
-    if dfile:
-      return DiffGzHandler(dfile)
-    else:
-      dfile = self.db.findDebTar(pkgname, vers)
-      if dfile:
-        return DebTarHandler(dfile)
-      else:
-        raise ReqHandlerException("can not find diff file for %s / %s"%(pkgname,vers))
-
   def output(self):
     if self.mode == "dl":
       return str(self.content)
-- 
1.7.5.4

From 0062ce5bb5ec3dff3036a65ffc5de3f0ebc692cf Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 11:36:45 +0200
Subject: [PATCH 2/9] Added ghost patches support.

The ghost options disable the actual loading in memory of the patches.
When you just want to know the number of patches or their stats,
this saves quite a lot of memory and CPU time.
---
 patchtracker/DebTarHandler.py |    4 +-
 patchtracker/DiffGzHandler.py |   13 ++++---
 patchtracker/Patch.py         |   68 ++++++++++++++++++++++++++++++-----------
 3 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/patchtracker/DebTarHandler.py b/patchtracker/DebTarHandler.py
index 311e39b..3564af2 100644
--- a/patchtracker/DebTarHandler.py
+++ b/patchtracker/DebTarHandler.py
@@ -10,8 +10,8 @@ class DebTarHandler:
     self.tarfile = fname
     self.size = os.stat(fname)[stat.ST_SIZE]
 
-  def series(self):
-    return Patch.Quilt30PatchSeries(self.tarfile)
+  def series(self, ghost=False):
+    return Patch.Quilt30PatchSeries(self.tarfile, ghost=ghost)
   
 if __name__ == "__main__":
   print "DebTarHandler testing"
diff --git a/patchtracker/DiffGzHandler.py b/patchtracker/DiffGzHandler.py
index 515b3ed..769ce18 100644
--- a/patchtracker/DiffGzHandler.py
+++ b/patchtracker/DiffGzHandler.py
@@ -14,7 +14,7 @@ class DiffGzHandler:
     self.diff = fname
     self.size = os.stat(fname)[stat.ST_SIZE]
 
-  def filterdiff(self, include=None, exclude=None):
+  def filterdiff(self, include=None, exclude=None, ghost=False):
     cmd = ["filterdiff","-z","-p","1"]
     if include:
       cmd += [ "-i", include]
@@ -24,7 +24,8 @@ class DiffGzHandler:
       raise Exception("DiffGzHandler.filterdiff called w/o include/exclude")
     i,o,e=os.popen3(cmd+[self.diff])
     i.close()
-    p = Patch(o)
+    p = Patch(o, ghost=ghost)
+    o.close()
     err = e.read()
     if len(err):
       raise DiffGzException("filterdiff gave errors: "+err)
@@ -33,10 +34,10 @@ class DiffGzHandler:
   def debiandir(self):
     return self.filterdiff(include='debian/*')
 
-  def nondebiandir(self):
-    return self.filterdiff(exclude='debian/*')
+  def nondebiandir(self, ghost=False):
+    return self.filterdiff(exclude='debian/*', ghost=ghost)
 
-  def series(self):
+  def series(self, ghost=False):
     patches = None
     embedded = self.filterdiff(include='debian/patches*')
 
@@ -50,7 +51,7 @@ class DiffGzHandler:
       err = e.read()
       if len(err):
         raise Exception("unable to extract series patches:\n"+err)
-      patches = PatchSeries(td)
+      patches = PatchSeries(td, ghost=ghost)
       os.system("rm -rf %s"%(td))
 
     return patches
diff --git a/patchtracker/Patch.py b/patchtracker/Patch.py
index 6533910..80ad8dc 100644
--- a/patchtracker/Patch.py
+++ b/patchtracker/Patch.py
@@ -3,21 +3,43 @@ import os
 import errno
 from glob import glob
 import tarfile
+from subprocess import Popen, PIPE
+from shutil import copyfileobj
 
 class Diffstat:
-  def __init__(self, patch):
+  def __init__(self, patch, ghost=False, patch_fh=None):
     self.patch = patch
-    i,o = os.popen2("diffstat -p1")
-    i.write(str(patch))
-    i.close()
-    self.output = o.readlines()
+    self.ghost = ghost
+    if not ghost:
+      i,o = os.popen2("diffstat -p1")
+      i.write(str(patch))
+      i.close()
+      self.output = o.readlines()
+    else:
+      popen = Popen("diffstat -p1 -t", shell=True, close_fds=True,
+        stdin=PIPE, stdout=PIPE, stderr=PIPE)
+      popen.stderr.close()
+      copyfileobj(patch_fh, popen.stdin)
+      popen.stdin.close()
+      popen.stdout.readline()
+      self._stats = map(lambda x: map(lambda y: y.strip(), x.split(',')), popen.stdout.readlines())
+      popen.wait()
 
   def stats(self):
-    i,o = os.popen2("diffstat -p1 -t")
-    i.write(str(self.patch))
-    i.close()
-    o.readline()
-    return [map(lambda x: x.strip(), l.split(",")) for l in o.readlines()]
+    if not self.ghost:
+      i,o = os.popen2("diffstat -p1 -t")
+      i.write(str(self.patch))
+      i.close()
+      o.readline()
+      return [map(lambda x: x.strip(), l.split(",")) for l in o.readlines()]
+    else:
+      return self._stats
+
+  def lines(self):
+    added, removed, modified = 0, 0, 0
+    for [added2, removed2, modified2, name] in self.stats():
+      added, removed, modified = added + int(added2), removed + int(removed2), modified + int(modified2)
+    return (added, removed, modified)
 
   def summary(self):
     return self.output[-1]
@@ -26,8 +48,13 @@ class Diffstat:
     return "".join(self.output)
 
 class Patch:
-  def __init__(self, fh, level=1):
-    self.p = fh.readlines()
+  def __init__(self, fh, level=1, ghost=False):
+    self.ghost = ghost
+    if not ghost:
+      self.p = fh.readlines()
+    else:
+      self.p = []
+      self._diffstat = Diffstat(self, ghost=True, patch_fh=fh)
     self.lvl = level
 
   def __str__(self):
@@ -37,7 +64,10 @@ class Patch:
     return len(self.p)
 
   def diffstat(self):
-    return Diffstat(self)
+    if not self.ghost:
+      return Diffstat(self)
+    else:
+      return self._diffstat
 
 class GenericPatchSeries (list):
   def blank(self):
@@ -68,7 +98,8 @@ class GenericPatchSeries (list):
 
 # XXX this entire __init__ stuff is way to ugly
 class PatchSeries (GenericPatchSeries):
-  def __init__(self, dir):
+  def __init__(self, dir, ghost=False):
+    self.ghost = ghost
     fd = None
     self.blank()
     self.style = "simple"
@@ -118,11 +149,11 @@ class PatchSeries (GenericPatchSeries):
     removelater=[]
     for p in self.names:
       try:
-        self.patches[p] = Patch(file(os.sep.join([dir, p])))
+        self.patches[p] = Patch(file(os.sep.join([dir, p])), ghost=ghost)
       except IOError, e:
         if e.errno == errno.ENOENT and self.style == "dpatch":
           try:
-            self.patches[p] = Patch(file(os.sep.join([dir, p+".dpatch"])))
+            self.patches[p] = Patch(file(os.sep.join([dir, p+".dpatch"])), ghost=ghost)
           except:
             #print "ERROR: could not find patch",p
             self.blank()
@@ -138,8 +169,9 @@ class PatchSeries (GenericPatchSeries):
       self.names.remove(p)
 
 class Quilt30PatchSeries (GenericPatchSeries):
-  def __init__(self, tarBall):
+  def __init__(self, tarBall, ghost=False):
     self.blank()
+    self.ghost = ghost
     self.style = "quilt (3.0)"
     self.tarfh = tarfile.open(tarBall, 'r:*')
     try:
@@ -162,7 +194,7 @@ class Quilt30PatchSeries (GenericPatchSeries):
 
     # XXX to lazy eval this might be better
     for name in self.names:
-      self.patches[name] = Patch(self.tarfh.extractfile("debian/patches/"+name))
+      self.patches[name] = Patch(self.tarfh.extractfile("debian/patches/"+name), ghost=ghost)
 
 if __name__ == "__main__":
   print "Patch.py testing"
-- 
1.7.5.4

From 38803ef902d1a3882817f046779923d185c79993 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 11:55:58 +0200
Subject: [PATCH 3/9] Save memory by not loading the filtered patch.

---
 patchtracker/DiffGzHandler.py |   21 +++++++++------------
 1 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/patchtracker/DiffGzHandler.py b/patchtracker/DiffGzHandler.py
index 769ce18..75b6037 100644
--- a/patchtracker/DiffGzHandler.py
+++ b/patchtracker/DiffGzHandler.py
@@ -39,20 +39,17 @@ class DiffGzHandler:
 
   def series(self, ghost=False):
     patches = None
-    embedded = self.filterdiff(include='debian/patches*')
 
     # XXX *cough* cache *cough*
-    if embedded.lines():
-      td = tempfile.mkdtemp()
-      i,o,e=os.popen3("patch -d %s -p3"%(td))
-      o.close()
-      i.write(str(embedded))
-      i.close()
-      err = e.read()
-      if len(err):
-        raise Exception("unable to extract series patches:\n"+err)
-      patches = PatchSeries(td, ghost=ghost)
-      os.system("rm -rf %s"%(td))
+    td = tempfile.mkdtemp()
+    i,o,e=os.popen3("filterdiff -z -p 1 -i 'debian/patches*' %s | patch -d %s -p3" % (self.diff, td))
+    o.close()
+    i.close()
+    err = e.read()
+    if len(err):
+      raise Exception("unable to extract series patches:\n"+err)
+    patches = PatchSeries(td, ghost=ghost)
+    os.system("rm -rf %s"%(td))
 
     return patches
   
-- 
1.7.5.4

From 66f8ded58a5b624c39e4672775c34c449c4aa7fd Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 11:38:39 +0200
Subject: [PATCH 4/9] Added UTF-8 support to the reprepro filter.

---
 reprepro/conf/diffsonly.py |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/reprepro/conf/diffsonly.py b/reprepro/conf/diffsonly.py
index d2f02ea..f8edbe0 100755
--- a/reprepro/conf/diffsonly.py
+++ b/reprepro/conf/diffsonly.py
@@ -4,6 +4,7 @@ from debian_bundle import deb822
 from gzip import GzipFile
 from fnmatch import fnmatch
 import sys
+import codecs
 
 import patchtracker.Conf as Conf
 
@@ -28,7 +29,7 @@ if __name__ == '__main__':
   else:
     slist = deb822.Sources.iter_paragraphs(fh)
 
-  outf = file(sys.argv[2], "w")
+  outf = codecs.open(sys.argv[2], mode="w", encoding='utf-8')
   print "filtering %s for .diff.gz/.dsc files..."%(inf)
   for ent in slist:
     for k in ['Files','Checksums-Sha1','Checksums-Sha256']:
@@ -39,5 +40,5 @@ if __name__ == '__main__':
             if fnmatch(f['name'], wanted_glob):
               newfiles.append(f)
         ent[k] = newfiles
-    outf.write(str(ent))
+    outf.write(unicode(ent))
     outf.write("\n")
-- 
1.7.5.4

From 296668570d8a6e60d58f2f4ae33d71d5b57ea2b5 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 11:38:59 +0200
Subject: [PATCH 5/9] Small FIXME.

---
 patchtracker/Patch.py |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/patchtracker/Patch.py b/patchtracker/Patch.py
index 80ad8dc..0f90e2f 100644
--- a/patchtracker/Patch.py
+++ b/patchtracker/Patch.py
@@ -177,6 +177,8 @@ class Quilt30PatchSeries (GenericPatchSeries):
     try:
       try:
         series_fh = self.tarfh.extractfile("debian/patches/debian.series")
+        # FIXME - From dpkg-source manpage it appears that when both files
+        # exist, patch listed in both must be applied
       except KeyError:
         series_fh = self.tarfh.extractfile("debian/patches/series")
     except KeyError:
-- 
1.7.5.4

From 6d0b6be449abf7366d8efd6d51f9664c1b3c9995 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Sun, 31 Jul 2011 12:01:47 +0200
Subject: [PATCH 6/9] Added export_for_udd.py script.

---
 export_for_udd.py    |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 patchtracker/Util.py |    9 ++++++
 2 files changed, 80 insertions(+), 0 deletions(-)
 create mode 100755 export_for_udd.py
 create mode 100644 patchtracker/Util.py

diff --git a/export_for_udd.py b/export_for_udd.py
new file mode 100755
index 0000000..4220e2f
--- /dev/null
+++ b/export_for_udd.py
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+
+from patchtracker.DB import PatchTrackerDB, PackageWithoutDiffException
+from patchtracker.Util import print_used_memory
+import sys
+import json
+
+def enumerate_packages(db):
+	for packages_per_letter in db.findCollection().pkgs.itervalues():
+		for package_name, package_data in packages_per_letter.iteritems():
+			for suite, package in package_data.iteritems():
+				yield (package_name, suite, package.version)
+
+def main():
+	db = PatchTrackerDB()
+	packages = []
+	try:
+		i = 0
+		for package, suite, version in enumerate_packages(db):
+
+			# Just for testing and debugging...
+			#if package not in ['geogebra', 'wotsap', 'netrw', 'haskell-devscripts',
+			#	'haskell-filestore', 'openide-utils']:
+			#	continue
+
+			print >> sys.stderr, "%d %s %s %s" % (i, package, suite, version)
+			i += 1
+
+			try:
+				dh = db.makeDiffHandler(package, version)
+				series = dh.series(ghost=True)
+				if series:
+					series_type = series.style
+				else:
+					series_type = "no_series"
+				try:
+					nondebian_diff = dh.nondebiandir(ghost=True)
+					nondebian = nondebian_diff.diffstat().lines()
+				except AttributeError:
+					# This means that the handler is a DebTarHandler
+					nondebian = [0, 0, 0]
+			except PackageWithoutDiffException:
+				# The package is native
+				series_type = "native"
+				nondebian = [0, 0, 0]
+
+			patches = [(patch, patch_data.diffstat().lines()) for (patch, patch_data) in series]
+
+			packages.append({'package': package, 'suite': suite, 'version': version,
+				'series_type': series_type, 'nondebian': nondebian,
+				'patches': patches})
+
+	# This is mainly intended for debugging: if you want to interrupt the
+	# process, you still can obtain the JSON with the packages processed so far.
+	# Consistency is assured by the fact that append() is atomic in Python
+	except KeyboardInterrupt:
+		pass
+
+    # TODO We don't need to keep all the packages dictionary in memory; there are
+    # libraries to access JSON in a stream (DOM-like) fashion, but Python
+    # bindings don't appear to be available in Debian (libyajl)
+
+	# Pretty printing:
+	#json.dump(packages, sys.stdout, sort_keys=True, indent=4)
+
+	# Awful printing:
+	json.dump(packages, sys.stdout)
+
+if __name__ == '__main__':
+	main()
+
diff --git a/patchtracker/Util.py b/patchtracker/Util.py
new file mode 100644
index 0000000..31a200c
--- /dev/null
+++ b/patchtracker/Util.py
@@ -0,0 +1,9 @@
+
+import resource
+import sys
+
+def print_used_memory(msg):
+	"""Small utility function to find where RAM is used at most."""
+	r = resource.getrusage(resource.RUSAGE_SELF)
+	print >> sys.stderr,  "%s; used memory = %d" % (msg, r.ru_maxrss)
+
-- 
1.7.5.4

From 9f85e7cfaf087c5a9a337f2fead8f3f1e9a67e02 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Mon, 1 Aug 2011 13:02:18 +0200
Subject: [PATCH 7/9] Fix small bug with filenames that contain commas.

When tokenizing the "diffstat -t" output, commas inside the filename
shouldn't be considered. We achive it by splitting only on the
first three commas.
---
 patchtracker/Patch.py |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/patchtracker/Patch.py b/patchtracker/Patch.py
index 0f90e2f..ff18bc7 100644
--- a/patchtracker/Patch.py
+++ b/patchtracker/Patch.py
@@ -22,7 +22,7 @@ class Diffstat:
       copyfileobj(patch_fh, popen.stdin)
       popen.stdin.close()
       popen.stdout.readline()
-      self._stats = map(lambda x: map(lambda y: y.strip(), x.split(',')), popen.stdout.readlines())
+      self._stats = map(lambda x: map(lambda y: y.strip(), x.split(',', 3)), popen.stdout.readlines())
       popen.wait()
 
   def stats(self):
@@ -31,7 +31,7 @@ class Diffstat:
       i.write(str(self.patch))
       i.close()
       o.readline()
-      return [map(lambda x: x.strip(), l.split(",")) for l in o.readlines()]
+      return [map(lambda x: x.strip(), l.split(",", 3)) for l in o.readlines()]
     else:
       return self._stats
 
-- 
1.7.5.4

From 9307c70d39ee342aa89cce327c788bbbf97c24c7 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Tue, 2 Aug 2011 14:43:09 +0200
Subject: [PATCH 8/9] enumerate_packages() moved in class PatchTrackerDB.

---
 export_for_udd.py  |    8 +-------
 patchtracker/DB.py |    7 +++++++
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/export_for_udd.py b/export_for_udd.py
index 4220e2f..8f331ca 100755
--- a/export_for_udd.py
+++ b/export_for_udd.py
@@ -5,18 +5,12 @@ from patchtracker.Util import print_used_memory
 import sys
 import json
 
-def enumerate_packages(db):
-	for packages_per_letter in db.findCollection().pkgs.itervalues():
-		for package_name, package_data in packages_per_letter.iteritems():
-			for suite, package in package_data.iteritems():
-				yield (package_name, suite, package.version)
-
 def main():
 	db = PatchTrackerDB()
 	packages = []
 	try:
 		i = 0
-		for package, suite, version in enumerate_packages(db):
+		for package, suite, version in db.enumerate_packages():
 
 			# Just for testing and debugging...
 			#if package not in ['geogebra', 'wotsap', 'netrw', 'haskell-devscripts',
diff --git a/patchtracker/DB.py b/patchtracker/DB.py
index 0913b24..d146613 100644
--- a/patchtracker/DB.py
+++ b/patchtracker/DB.py
@@ -122,6 +122,13 @@ class PatchTrackerDB:
     self.db.row_factory = oldfactory
     return toc
 
+  # TODO Probably this can made more efficient
+  def enumerate_packages(self):
+    for packages_per_letter in self.findCollection().pkgs.itervalues():
+      for package_name, package_data in packages_per_letter.iteritems():
+        for suite, package in package_data.iteritems():
+          yield (package_name, suite, package.version)
+
   def findLetterToc(self, letter):
     return self.findCollection(package=letter+"%").getletter(letter)
 
-- 
1.7.5.4

From faf60de972605b795b6cffbc2b39f14e895e17e6 Mon Sep 17 00:00:00 2001
From: Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
Date: Tue, 2 Aug 2011 15:08:34 +0200
Subject: [PATCH 9/9] Add cache for export_for_udd.py.

export_for_udd.py reads the file listed on the command lines,
assuming that are JSON files generated by previous run of
export_for_udd.py. The patches for packages listed in such files
are not computed again.
---
 export_for_udd.py |   54 ++++++++++++++++++++++++++++++++++------------------
 1 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/export_for_udd.py b/export_for_udd.py
index 8f331ca..86171a6 100755
--- a/export_for_udd.py
+++ b/export_for_udd.py
@@ -8,6 +8,18 @@ import json
 def main():
 	db = PatchTrackerDB()
 	packages = []
+	previous = {}
+
+	# Load data from previous calculations
+	for fn in sys.argv[1:]:
+		with open(fn) as f:
+			data = json.load(f)
+			for line in data:
+				previous[(line['package'], line['version'])] = \
+					{'series_type': line['series_type'],
+					'nondebian': line['nondebian'],
+					'patches': line['patches']}
+
 	try:
 		i = 0
 		for package, suite, version in db.enumerate_packages():
@@ -20,29 +32,33 @@ def main():
 			print >> sys.stderr, "%d %s %s %s" % (i, package, suite, version)
 			i += 1
 
-			try:
-				dh = db.makeDiffHandler(package, version)
-				series = dh.series(ghost=True)
-				if series:
-					series_type = series.style
-				else:
-					series_type = "no_series"
+			if (package, version) not in previous:
+				prev_package = previous[(package, version)] = {}
 				try:
-					nondebian_diff = dh.nondebiandir(ghost=True)
-					nondebian = nondebian_diff.diffstat().lines()
-				except AttributeError:
-					# This means that the handler is a DebTarHandler
-					nondebian = [0, 0, 0]
-			except PackageWithoutDiffException:
-				# The package is native
-				series_type = "native"
-				nondebian = [0, 0, 0]
+					dh = db.makeDiffHandler(package, version)
+					series = dh.series(ghost=True)
+					if series:
+						prev_package['series_type'] = series.style
+					else:
+						prev_package['series_type'] = "no_series"
+					try:
+						nondebian_diff = dh.nondebiandir(ghost=True)
+						prev_package['nondebian'] = nondebian_diff.diffstat().lines()
+					except AttributeError:
+						# This means that the handler is a DebTarHandler
+						prev_package['nondebian'] = [0, 0, 0]
+				except PackageWithoutDiffException:
+					# The package is native
+					prev_package['series_type'] = "native"
+					prev_package['nondebian'] = [0, 0, 0]
 
-			patches = [(patch, patch_data.diffstat().lines()) for (patch, patch_data) in series]
+				prev_package['patches'] = [(patch, patch_data.diffstat().lines()) for (patch, patch_data) in series]
 
+			prev_package = previous[(package, version)]
 			packages.append({'package': package, 'suite': suite, 'version': version,
-				'series_type': series_type, 'nondebian': nondebian,
-				'patches': patches})
+				'series_type': prev_package['series_type'],
+				'nondebian': prev_package['nondebian'],
+				'patches': prev_package['patches']})
 
 	# This is mainly intended for debugging: if you want to interrupt the
 	# process, you still can obtain the JSON with the packages processed so far.
-- 
1.7.5.4

Attachment: signature.asc
Description: OpenPGP digital signature


Reply to: