[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Fwd: [PATCH] Modify blends_prospective_gatherer.py : Avoid duplicate entries in table blends_prospectivepackages. Insert in UDD the package which has the latest chlog_date



Hi,

Is there any update on this patch ?

---------- Forwarded message ----------
From: <zenith158@gmail.com>
Date: Sat, Aug 29, 2015 at 12:39 AM
Subject: [PATCH] Modify blends_prospective_gatherer.py : Avoid duplicate entries in table blends_prospectivepackages. Insert in UDD the package which has the latest chlog_date
To: debian-blends@lists.debian.org
Cc: zenith158@gmail.com, Akshita Jha <akshita-guest@users.alioth.debian.org>


From: Akshita Jha <akshita-guest@users.alioth.debian.org>

---
 udd/blends_prospective_gatherer.py | 60 +++++++++++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/udd/blends_prospective_gatherer.py b/udd/blends_prospective_gatherer.py
index 69652e8..ddf81d9 100644
--- a/udd/blends_prospective_gatherer.py
+++ b/udd/blends_prospective_gatherer.py
@@ -19,6 +19,7 @@ from debian import deb822
 import email.Utils

 from bibref_gatherer import upstream_reader
+from datetime import datetime

 debug=0

@@ -414,14 +415,57 @@ class blends_prospective_gatherer(gatherer):
        %(vcs_type)s, %(vcs_url)s, %(vcs_browser)s,
        %(wnpp)s, %(wnpp_type)s, %(wnpp_desc)s,
        %(license)s, %(chlog_date)s, %(chlog_version)s)"""
-    try:
-      cur.executemany(pkgquery, pkgs)
-    except ProgrammingError:
-      print "Error while inserting packages"
-      raise
-    except KeyError, err:
-      print "Error while inserting packages", err
-      raise
+
+
+    dup_pkgs = []  # List containing packages which violate Primary Key Condition
+    for p in pkgs:
+        try:
+            cur.execute(pkgquery, p)
+            #cur.executemany(pkgquery, pkgs)
+        except ProgrammingError:
+            print "Error while inserting packages"
+            raise
+        except KeyError, err:
+            print "Error while inserting packages", err
+            raise
+        except IntegrityError as err:
+            dup_pkgs.append(p)     # If IntegrityError => package already exists in UDD
+                                   # Append this duplicate record to dup_pkgs
+            print "Duplicate Key Error while inserting packages", err
+            self.connection.rollback()
+        else:
+            self.connection.commit()
+
+    # Once all the prospective packages are inserted into UDD, make sure that
+    # these packages are the ones that have the latest chlog_date .
+    # If the package inserted in UDD, has a chlog_date earlier than its duplicate,
+    # Delete the record of this package from UDD and insert its duplicate in UDD.
+    for d in dup_pkgs:
+
+        dup_query = "SELECT package, chlog_date FROM %s WHERE package='%s'" %(my_config['table'], d['package'])
+
+        cur.execute(dup_query)
+        c = cur.fetchone()
+
+        # chlog_date of package inserted in UDD
+        udd_date = " ".join(c[1].split()[:-1])
+        udd_date = datetime.strptime(udd_date, '%a, %d %b %Y %H:%M:%S')
+
+        # chlog_date for duplicated package not in UDD
+        dup_date = " ".join(d['chlog_date'].split()[:-1])
+        dup_date = datetime.strptime(dup_date, '%a, %d %b %Y %H:%M:%S')
+
+        # compare the chlog_date of the UDD package and its duplicate
+        # if udd_date > dup_date -> do nothing
+        # else: delete the udd package and insert its duplicate
+        if udd_date < dup_date:
+            del_query = "DELETE FROM ONLY %s WHERE package='%s'" %(my_config['table'], d['package'])
+            cur.execute(del_query)
+            cur.execute(pkgquery, d)
+
+        # test_query = "SELECT package, chlog_date FROM %s WHERE package='%s'" %(my_config['table'], d['package'])
+        # cur.execute(test_query)
+        # c = cur.fetchone()

     cur.execute("DEALLOCATE package_insert")

--
1.9.1




--
Akshita Jha

Reply to: