[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[dak/master 09/29] Add support for multiple orig tarballs



Structure of Upload.pkg is adapted to handle multiple upstream tarballs.
This also means that the .dak format is modified.

Since the .dak format has been modified, improve the object .dak parser
funtion to be able to read both the new and the old format. When
reading the old format, it auto-converts the data to the new structure.
It uses python's type() introspection to know what format is used.

This allows in-place upgrade even when old .dak are still stored
in various queues.

Several other adaptations are also required in other methods of Upload object,
in process-accepted, in autobuild_queue().

Signed-off-by: Raphaël Hertzog <hertzog@debian.org>
---
 dak/process_accepted.py |   71 +++++++++++++++++++++++++---------------------
 daklib/changes.py       |   43 +++++++++++++++++++++++-----
 daklib/dbconn.py        |   25 +++++++++-------
 daklib/queue.py         |   48 +++++++++++++++++++------------
 4 files changed, 117 insertions(+), 70 deletions(-)

diff --git a/dak/process_accepted.py b/dak/process_accepted.py
index d7db117..51c6a5d 100755
--- a/dak/process_accepted.py
+++ b/dak/process_accepted.py
@@ -210,7 +210,7 @@ def add_dsc_to_db(u, filename, session):
         df = DSCFile()
         df.source_id = source.source_id
 
-        # If the .orig.tar.gz is already in the pool, it's
+        # If the .orig tarball is already in the pool, it's
         # files id is stored in dsc_files by check_dsc().
         files_id = dentry.get("files id", None)
 
@@ -353,32 +353,37 @@ def install(u, session, log_urgency=True):
             add_deb_to_db(u, newfile, session)
 
     # If this is a sourceful diff only upload that is moving
-    # cross-component we need to copy the .orig.tar.gz into the new
+    # cross-component we need to copy the .orig files into the new
     # component too for the same reasons as above.
-    #
-    if u.pkg.changes["architecture"].has_key("source") and u.pkg.orig_tar_id and \
-       u.pkg.orig_tar_location != dsc_location_id:
-
-        oldf = get_poolfile_by_id(u.pkg.orig_tar_id, session)
-        old_filename = os.path.join(oldf.location.path, oldf.filename)
-        old_dat = {'size': oldf.filesize,   'md5sum': oldf.md5sum,
-                   'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum}
-
-        new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename))
-
-        # TODO: Care about size/md5sum collisions etc
-        (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session)
-
-        if newf is None:
-            utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename))
-            newf = add_poolfile(new_filename, old_dat, dsc_location_id, session)
-
-            # TODO: Check that there's only 1 here
-            source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0]
-            dscf = get_dscfiles(source_id = source.source_id, poolfile_id=u.pkg.orig_tar_id, session=session)[0]
-            dscf.poolfile_id = newf.file_id
-            session.add(dscf)
-            session.flush()
+    if u.pkg.changes["architecture"].has_key("source"):
+        for orig_file in u.pkg.orig_files.keys():
+            if not u.pkg.orig_files[orig_file].has_key("id"):
+                continue # Skip if it's not in the pool
+            orig_file_id = u.pkg.orig_files[orig_file]["id"]
+            if u.pkg.orig_files[orig_file]["location"] == dsc_location_id:
+                continue # Skip if the location didn't change
+
+            # Do the move
+            oldf = get_poolfile_by_id(orig_file_id, session)
+            old_filename = os.path.join(oldf.location.path, oldf.filename)
+            old_dat = {'size': oldf.filesize,   'md5sum': oldf.md5sum,
+                       'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum}
+
+            new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename))
+
+            # TODO: Care about size/md5sum collisions etc
+            (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session)
+
+            if newf is None:
+                utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename))
+                newf = add_poolfile(new_filename, old_dat, dsc_location_id, session)
+
+                # TODO: Check that there's only 1 here
+                source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0]
+                dscf = get_dscfiles(source_id=source.source_id, poolfile_id=orig_file_id, session=session)[0]
+                dscf.poolfile_id = newf.file_id
+                session.add(dscf)
+                session.flush()
 
     # Install the files into the pool
     for newfile, entry in u.pkg.files.items():
@@ -452,15 +457,17 @@ def install(u, session, log_urgency=True):
                     os.unlink(dest)
                 os.symlink(src, dest)
 
-        # Update last_used on any non-upload .orig.tar.gz symlink
-        if u.pkg.orig_tar_id:
+        # Update last_used on any non-uploaded .orig symlink
+        for orig_file in u.pkg.orig_files.keys():
             # Determine the .orig.tar.gz file name
-            for dsc_file in u.pkg.dsc_files.keys():
-                if dsc_file.endswith(".orig.tar.gz"):
-                    u.pkg.orig_tar_gz = os.path.join(dest_dir, dsc_file)
+            if not u.pkg.orig_files[orig_file].has_key("id"):
+                continue # Skip files not in the pool
+            # XXX: do we really want to update the orig_files dict here
+            # instead of using a temporary variable?
+            u.pkg.orig_files[orig_file]["path"] = os.path.join(dest_dir, orig_file)
 
             # Remove it from the list of packages for later processing by apt-ftparchive
-            qb = get_queue_build(u.pkg.orig_tar_gz, suite.suite_id, session)
+            qb = get_queue_build(u.pkg.orig_files[orig_file]["path"], suite.suite_id, session)
             if qb:
                 qb.in_queue = False
                 qb.last_used = now_date
diff --git a/daklib/changes.py b/daklib/changes.py
index 1bb9075..59c7da1 100755
--- a/daklib/changes.py
+++ b/daklib/changes.py
@@ -76,6 +76,10 @@ CHANGESFIELDS_DSCFILES_OPTIONAL = [ "files id" ]
 
 __all__.append('CHANGESFIELDS_DSCFILES_OPTIONAL')
 
+CHANGESFIELDS_ORIGFILES = [ "id", "location" ]
+
+__all__.append('CHANGESFIELDS_ORIGFILES')
+
 ###############################################################################
 
 class Changes(object):
@@ -91,10 +95,7 @@ class Changes(object):
         self.dsc = {}
         self.files = {}
         self.dsc_files = {}
-
-        self.orig_tar_id = None
-        self.orig_tar_location = ""
-        self.orig_tar_gz = None
+        self.orig_files = {}
 
     def file_summary(self):
         # changes["distribution"] may not exist in corner cases
@@ -189,8 +190,24 @@ class Changes(object):
         self.files.update(p.load())
         self.dsc_files.update(p.load())
 
-        self.orig_tar_id = p.load()
-        self.orig_tar_location = p.load()
+        next_obj = p.load()
+        if type(next_obj) is DictType:
+            self.pkg.orig_files.update(next_obj)
+        else:
+            # Auto-convert old dak files to new format supporting
+            # multiple tarballs
+            orig_tar_gz = None
+            for dsc_file in self.dsc_files.keys():
+                if dsc_file.endswith(".orig.tar.gz"):
+                    orig_tar_gz = dsc_file
+            self.orig_files[orig_tar_gz] = {}
+            if next_obj != None:
+                self.orig_files[orig_tar_gz]["id"] = next_obj
+            next_obj = p.load()
+            if next_obj != None and next_obj != "":
+                self.orig_files[orig_tar_gz]["location"] = next_obj
+            if len(self.orig_files[orig_tar_gz]) == 0:
+                del self.orig_files[orig_tar_gz]
 
         dump_file.close()
 
@@ -240,6 +257,17 @@ class Changes(object):
 
         return ret
 
+    def sanitised_orig_files(self):
+        ret = {}
+        for name, entry in self.orig_files.items():
+            ret[name] = {}
+            # Optional orig_files fields
+            for i in CHANGESFIELDS_ORIGFILES:
+                if entry.has_key(i):
+                    ret[name][i] = entry[i]
+
+        return ret
+
     def write_dot_dak(self, dest_dir):
         """
         Dump ourself into a cPickle file.
@@ -281,8 +309,7 @@ class Changes(object):
         p.dump(self.sanitised_dsc())
         p.dump(self.sanitised_files())
         p.dump(self.sanitised_dsc_files())
-        p.dump(self.orig_tar_id)
-        p.dump(self.orig_tar_location)
+        p.dump(self.sanitised_orig_files())
 
         dump_file.close()
 
diff --git a/daklib/dbconn.py b/daklib/dbconn.py
index ff00135..c0facc4 100755
--- a/daklib/dbconn.py
+++ b/daklib/dbconn.py
@@ -1609,23 +1609,26 @@ class Queue(object):
 
                 session.add(qb)
 
-            # If the .orig.tar.gz is in the pool, create a symlink to
-            # it (if one doesn't already exist)
-            if changes.orig_tar_id:
-                # Determine the .orig.tar.gz file name
-                for dsc_file in changes.dsc_files.keys():
-                    if dsc_file.endswith(".orig.tar.gz"):
-                        filename = dsc_file
-
-                dest = os.path.join(dest_dir, filename)
+            # If the .orig tarballs are in the pool, create a symlink to
+            # them (if one doesn't already exist)
+            for dsc_file in changes.dsc_files.keys():
+                # Skip all files except orig tarballs
+                if not re_is_orig_source.match(dsc_file):
+                    continue
+                # Skip orig files not identified in the pool
+                if not (changes.orig_files.has_key(dsc_file) and
+                        changes.orig_files[dsc_file].has_key("id")):
+                    continue
+                orig_file_id = changes.orig_files[dsc_file]["id"]
+                dest = os.path.join(dest_dir, dsc_file)
 
                 # If it doesn't exist, create a symlink
                 if not os.path.exists(dest):
                     q = session.execute("SELECT l.path, f.filename FROM location l, files f WHERE f.id = :id and f.location = l.id",
-                                        {'id': changes.orig_tar_id})
+                                        {'id': orig_file_id})
                     res = q.fetchone()
                     if not res:
-                        return "[INTERNAL ERROR] Couldn't find id %s in files table." % (changes.orig_tar_id)
+                        return "[INTERNAL ERROR] Couldn't find id %s in files table." % (orig_file_id)
 
                     src = os.path.join(res[0], res[1])
                     os.symlink(src, dest)
diff --git a/daklib/queue.py b/daklib/queue.py
index 96bf37d..03bc7e0 100755
--- a/daklib/queue.py
+++ b/daklib/queue.py
@@ -1055,16 +1055,19 @@ class Upload(object):
                 if not os.path.exists(src):
                     return
                 ftype = m.group(3)
-                if ftype == "orig.tar.gz" and self.pkg.orig_tar_gz:
+                if re_is_orig_source.match(f) and pkg.orig_files.has_key(f) and \
+                   pkg.orig_files[f].has_key("path"):
                     continue
                 dest = os.path.join(os.getcwd(), f)
                 os.symlink(src, dest)
 
-        # If the orig.tar.gz is not a part of the upload, create a symlink to the
-        # existing copy.
-        if self.pkg.orig_tar_gz:
-            dest = os.path.join(os.getcwd(), os.path.basename(self.pkg.orig_tar_gz))
-            os.symlink(self.pkg.orig_tar_gz, dest)
+        # If the orig files are not a part of the upload, create symlinks to the
+        # existing copies.
+        for orig_file in self.pkg.orig_files.keys():
+            if not self.pkg.orig_files[orig_file].has_key("path"):
+                continue
+            dest = os.path.join(os.getcwd(), os.path.basename(orig_file))
+            os.symlink(self.pkg.orig_files[orig_file]["path"], dest)
 
         # Extract the source
         cmd = "dpkg-source -sn -x %s" % (dsc_filename)
@@ -1107,10 +1110,11 @@ class Upload(object):
         #      We should probably scrap or rethink the whole reprocess thing
         # Bail out if:
         #    a) there's no source
-        # or b) reprocess is 2 - we will do this check next time when orig.tar.gz is in 'files'
-        # or c) the orig.tar.gz is MIA
+        # or b) reprocess is 2 - we will do this check next time when orig
+        #       tarball is in 'files'
+        # or c) the orig files are MIA
         if not self.pkg.changes["architecture"].has_key("source") or self.reprocess == 2 \
-           or self.pkg.orig_tar_gz == -1:
+           or len(self.pkg.orig_files) == 0:
             return
 
         tmpdir = utils.temp_dirname()
@@ -2047,7 +2051,7 @@ distribution."""
         """
 
         @warning: NB: this function can remove entries from the 'files' index [if
-         the .orig.tar.gz is a duplicate of the one in the archive]; if
+         the orig tarball is a duplicate of the one in the archive]; if
          you're iterating over 'files' and call this function as part of
          the loop, be sure to add a check to the top of the loop to
          ensure you haven't just tried to dereference the deleted entry.
@@ -2055,7 +2059,8 @@ distribution."""
         """
 
         Cnf = Config()
-        self.pkg.orig_tar_gz = None
+        self.pkg.orig_files = {} # XXX: do we need to clear it?
+        orig_files = self.pkg.orig_files
 
         # Try and find all files mentioned in the .dsc.  This has
         # to work harder to cope with the multiple possible
@@ -2089,7 +2094,7 @@ distribution."""
                 if len(ql) > 0:
                     # Ignore exact matches for .orig.tar.gz
                     match = 0
-                    if dsc_name.endswith(".orig.tar.gz"):
+                    if re_is_orig_source.match(dsc_name):
                         for i in ql:
                             if self.pkg.files.has_key(dsc_name) and \
                                int(self.pkg.files[dsc_name]["size"]) == int(i.filesize) and \
@@ -2099,13 +2104,15 @@ distribution."""
                                 # This would fix the stupidity of changing something we often iterate over
                                 # whilst we're doing it
                                 del self.pkg.files[dsc_name]
-                                self.pkg.orig_tar_gz = os.path.join(i.location.path, i.filename)
+                                if not orig_files.has_key(dsc_name):
+                                    orig_files[dsc_name] = {}
+                                orig_files[dsc_name]["path"] = os.path.join(i.location.path, i.filename)
                                 match = 1
 
                     if not match:
                         self.rejects.append("can not overwrite existing copy of '%s' already in the archive." % (dsc_name))
 
-            elif dsc_name.endswith(".orig.tar.gz"):
+            elif re_is_orig_source.match(dsc_name):
                 # Check in the pool
                 ql = get_poolfile_like_name(dsc_name, session)
 
@@ -2143,9 +2150,11 @@ distribution."""
                     # need this for updating dsc_files in install()
                     dsc_entry["files id"] = x.file_id
                     # See install() in process-accepted...
-                    self.pkg.orig_tar_id = x.file_id
-                    self.pkg.orig_tar_gz = old_file
-                    self.pkg.orig_tar_location = x.location.location_id
+                    if not orig_files.has_key(dsc_name):
+                        orig_files[dsc_name] = {}
+                    orig_files[dsc_name]["id"] = x.file_id
+                    orig_files[dsc_name]["path"] = old_file
+                    orig_files[dsc_name]["location"] = x.location.location_id
                 else:
                     # TODO: Record the queues and info in the DB so we don't hardcode all this crap
                     # Not there? Check the queue directories...
@@ -2159,11 +2168,12 @@ distribution."""
                             in_otherdir_fh.close()
                             actual_size = os.stat(in_otherdir)[stat.ST_SIZE]
                             found = in_otherdir
-                            self.pkg.orig_tar_gz = in_otherdir
+                            if not orig_files.has_key(dsc_name):
+                                orig_files[dsc_name] = {}
+                            orig_files[dsc_name]["path"] = in_otherdir
 
                     if not found:
                         self.rejects.append("%s refers to %s, but I can't find it in the queue or in the pool." % (file, dsc_name))
-                        self.pkg.orig_tar_gz = -1
                         continue
             else:
                 self.rejects.append("%s refers to %s, but I can't find it in the queue." % (file, dsc_name))
-- 
1.6.3.3



Reply to: