[dak/master] Added content import, merged master, added update2 script, added new quotes file, and added commands to dak.py
Signed-off-by: Michael Casadevall <sonicmctails@gmail.com>
---
dak/.generate_contents.py.swp | Bin 12288 -> 0 bytes
dak/dak.py | 2 +
dak/dakdb/update2.py | 80 +++++++++++++++++++
dak/generate_contents.py | 2 +-
dak/import_contents.py | 171 +++++++++++++++++++++++++++++++++++++++++
dak/update_db.py | 2 +-
daklib/database.py | 23 +++++-
docs/README.quotes | 6 ++
8 files changed, 281 insertions(+), 5 deletions(-)
delete mode 100644 dak/.generate_contents.py.swp
create mode 100644 dak/dakdb/update2.py
create mode 100755 dak/import_contents.py
diff --git a/dak/.generate_contents.py.swp b/dak/.generate_contents.py.swp
deleted file mode 100644
index d4e83290431f9beed6b625eb22bb28b4ce8ee265..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc-jL100001
literal 12288
zc-rk+TW=dh6rSEGP(ZXVyuhhL6&tM`Cjr_zO@$Mh#8Phcg({6AGxm<{A?w}M?yOzE
z6oD!MuSkVTT>b$dBm@t<AR$2T&Tjzv0uu0w_-4J%r73MyB@e)U(r0^S&YU^t%sFRT
zwYBU^g#|jD8D+p@jHR5*g)h2?S?vU4CSPaw0?4IJ(Gk{N<gP++L<={i(%&OZVGEBd
zQLQ_+61MU)ZjZ%?5o6a8s7CtCvC8<^XjTu05+~{S<CQorF=E7s5hF&77%^h}kD%Or
z>{Wd4kH#y}_r6=cW5kFNBSwrEF=E7s5hF&782?^yld&TQ7<=O&3;+Fp@b~|lk23ZH
z_&xYK_!^i4e}9CrPr$dpH^CY35cth8#@+$1gBQW$;CBx*_AMBI&x41+pB`fD1~?1O
zfPX&7*dO56;Ah}_U>EFwr@*JdC%~iNUq>1H4g3}S5c~kV23`iwgOlK4a6fqS0mgm-
zZ-DQDSHU@O7TgE^d<3?DUxJ^5pMo!gRj>#?3qA!t2_69VgI^tH>|^jcco94S9tVFs
z1Y5xO!E2xbeefhm;1TdJ_%UMq5%~7ODBdyt^PpSN=F(24m~2qBYV)?JR%vpQhO1S*
zP^}K<7#q41c0<R8qP;@<zBcWKSNDbfHSPG{NXZoSS6t6|Mby<4O_JNpVA;kfIc+yq
z6E9BXa*HrFv1ZW5#GK<RJt6HaQp=8q$y91HCjy^01w7Wj;q{WMq+|Q5i8*1piM3o`
zFq*~D(3B|!wX!EfGBFz&o1Tu0&E+D=L|>sFNG0?(?y9Q0-pnj>&ll59yUlHLUfM!i
zX5_tQ(AL`xzvpMT*X*RW=WJ;HKOdIMo$?cOijp$5hO-ihjm>b#c2sC|i~H4e(M#T&
zo>VT>(QrJHh#;*}qNM{uYq=fOv=+{6H~|Y*lf$iHqrbzr<AGt6Q%2uin2F7Sl9r!o
z3Z+w^k&Hs&1PW_9KE5(B=qHQi)VQVh85`+gXi;X0MwrT!OzlC&hKFx{4AHDmIM$2q
z+aTO7sg@w_)mxaUtH2XJi-b2PVSA1H(vKD{39^O2PKUaVw;pOi970Wla_H-JJ!CaW
z6^6yJiIXe5f&Qg+W7`(oAMUZub;wETVYqB^#c{d_V|AOd+l#pMivpJ@u1J;CA2`qy
zDVB(mHr+BPoz{u7qciy}rwha3K)9^KpcV9W4swhJIYtd>2|sEaI#@WNqDwso$JaOf
zO%MDHqY(B2+O%sjyEjqDyQ-h-i;gW=KefZ3TY6kL-XgUJW;8sft)pSdS_WTLaiuPz
zMkm`YIwd``Rc0_57AT7bQDYknaknTy&~;p4k0B97+tpWLkgvGHShn%oZSI9XyRLDF
z&dqYXGmXZ1avCV80Nooo$Q23{ITa?N?vXZ9v)dA$pzdZ34{}UbTbpvJZR%RQBkd-&
z9qiHdnc245m8ylJwncZKMb|D&@Pm4*pW53usPJ&qhOTUB3a$<CEvIYs$P=!`>s$Ik
zzcltB{xstRwiz00P{FQe40`S?m5~CLpk-}@K_%THX?#3u&=g*1ExC{<%8reWj;2S)
zv!{tFr95<T%Rzv`NQ707+Gu*bJrO->26a8H34UsyXYe3X$!VxA_XKvYIDzgMfo1AY
z3k0DqWCs_+5eK)YW2v{zE>@!6VbO9>vt*s-WnI|5F1S0ahPNJGo+%X94;I81E)=Kd
z@{8sCRAIhQULlCg6v~SiN+p_EDpH=7^Tl#ux-y?H(sHG^yi~f7Au5S5L$JSwMsTT6
zo?EJvDZjWvm-59T)U8Z}QP6EdbVR>-queYBZ{V%RZKeHzg7TI)>0~gauRIC0kc_&1
z3w7DgV7wk(Y2gJD=zF3OScY~viItvy%ABwhq|j%3WFd<^>Il!rl=gH-#JAl11PQ6r
zaUf4)gJ4IoLxb?31LtE4<n$8sxBa_c3UT<SXZ#jis`YN;b3>cuM`lfEU#QM)-2Y}U
z^wD@eM#3vJWuvDZpGH=Ave{CUoKDeLHhad<pJ!=7)>~ZQPZ`|jrs!~l>|DFPF?hig
oHOcLa<2BERdOV?PUW;vegrlN6ITHBZNKM)!!tRjU!@b(<Z?Q(J`2YX_
diff --git a/dak/dak.py b/dak/dak.py
index 92753ec..d04eebc 100755
--- a/dak/dak.py
+++ b/dak/dak.py
@@ -138,6 +138,8 @@ def init():
"Check for users with no packages in the archive"),
("import-archive",
"Populate SQL database based from an archive tree"),
+ ("import-contents",
+ "Populate SQL database with Contents files"),
("import-keyring",
"Populate fingerprint/uid table based on a new/updated keyring"),
("import-ldap-fingerprints",
diff --git a/dak/dakdb/update2.py b/dak/dakdb/update2.py
new file mode 100644
index 0000000..ec9650b
--- /dev/null
+++ b/dak/dakdb/update2.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Debian Archive Kit Database Update Script 2
+# Copyright (C) 2009 Michael Casadevall <mcasadevall@debian.org>
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+# <tomv_w> really, if we want to screw ourselves, let's find a better way.
+# <Ganneff> rm -rf /srv/ftp.debian.org
+
+################################################################################
+
+import psycopg2, time
+
+################################################################################
+
+def do_update(self):
+ print "Adding content fields to database"
+
+ try:
+ c = self.db.cursor()
+ c.execute("""CREATE TABLE content_file_paths (
+ id serial primary key not null,
+ path text unique not null
+ )""")
+
+ c.execute("""CREATE TABLE content_file_names (
+ id serial primary key not null,
+ file text unique not null
+ )""")
+
+ c.execute("""CREATE TABLE content_associations (
+ id serial not null,
+ binary_pkg int4 not null references binaries(id) on delete cascade,
+ filepath int4 not null references content_file_paths(id) on delete cascade,
+ filename int4 not null references content_file_names(id) on delete cascade
+ );""")
+
+ c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text
+ AS $_$select case
+ WHEN $2 is null or $2 = '' THEN $1
+ WHEN $1 is null or $1 = '' THEN $2
+ ELSE $1 || ',' || $2
+ END$_$
+ LANGUAGE sql""")
+
+ c.execute("""CREATE AGGREGATE comma_separated_list (
+ BASETYPE = text,
+ SFUNC = comma_concat,
+ STYPE = text,
+ INITCOND = ''
+ );""")
+
+ c.execute("UPDATE config SET value = '2' WHERE name = 'db_revision'")
+ self.db.commit()
+
+ print "REMINDER: Remember to fully regenerate the Contents files before running import-contents"
+ print ""
+ print "Pausing for five seconds ..."
+ time.sleep (5)
+
+ except psycopg2.ProgrammingError, msg:
+ self.db.rollback()
+ print "FATAL: Unable to apply content table update 2!"
+ print "Error Message: " + str(msg)
+ print "Database changes have been rolled back."
diff --git a/dak/generate_contents.py b/dak/generate_contents.py
index 54b70bd..6d84d16 100755
--- a/dak/generate_contents.py
+++ b/dak/generate_contents.py
@@ -66,7 +66,7 @@ def generate_contents(suites):
h.close()
# Get our suites, and the architectures
- for s in suites:
+ for s in [i.lower() for i in suites]:
suite_id = database.get_suite_id(s)
q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
diff --git a/dak/import_contents.py b/dak/import_contents.py
new file mode 100755
index 0000000..945b9ea
--- /dev/null
+++ b/dak/import_contents.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# Import contents files
+
+# Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+################################################################################
+
+################################################################################
+
+import sys, os, popen2, tempfile, stat, time, pg
+import re, gzip, apt_pkg
+from daklib import database, utils
+from daklib.dak_exceptions import *
+
+################################################################################
+
+Cnf = None
+projectB = None
+out = None
+AptCnf = None
+
+################################################################################
+
+def usage (exit_code=0):
+ print """Usage: dak import-contents
+Import Contents files
+
+ -h, --help show this help and exit
+ -s, --suite=SUITE only write file lists for this suite
+"""
+ sys.exit(exit_code)
+
+################################################################################
+
+def import_contents(suites):
+ global projectB, Cnf
+
+ # Start transaction
+ projectB.query("BEGIN WORK")
+
+ # Needed to make sure postgreSQL doesn't freak out on some of the data
+ projectB.query("SET CLIENT_ENCODING TO 'LATIN1'")
+
+ # Get our suites, and the architectures
+ for s in suites:
+ suite_id = database.get_suite_id(s)
+
+ q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
+
+ arch_list = [ ]
+ for r in q.getresult():
+ if r[1] != "source" and r[1] != "all":
+ arch_list.append((r[0], r[1]))
+
+ arch_all_id = database.get_architecture_id("all")
+
+ for arch in arch_list:
+ print "Processing %s/%s" % (s, arch[1])
+ arch_id = database.get_architecture_id(arch[1])
+ f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch[1]), "r")
+
+ # Get line count
+ lines = f.readlines()
+ num_of_lines = len(lines)
+
+ # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit
+ lines_processed = 0
+ found_header = False
+
+ for line in lines:
+ if found_header == False:
+ if not line:
+ print "Unable to find end of Contents-%s.gz header!" % ( arch[1])
+ sys.exit(255)
+
+ lines_processed += 1
+ p = re.compile('^FILE')
+ if p.match(line):
+ found_header = True
+ continue
+
+ # The format is simple enough, *filename*, *section/package1,section/package2,etc*
+ # Each file appears once per Contents file, so first, use some regex match
+ # to split the two bits
+
+ # Print out progress bar
+ print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, (float(lines_processed)/num_of_lines)),
+
+ # regex lifted from packages.d.o code
+ p = re.compile('^(.+?)\s+(\S+)$')
+ matchs = p.findall(line)
+ filename = matchs[0][0]
+ packages = matchs[0][1].split(',')
+
+ # Iterate through each file's packages
+ for package in packages:
+ p = re.compile('(\S+)/(\S+)$')
+ matchs = p.findall(package)
+
+ # Needed since the DB is unicode, and these files
+ # are ASCII
+ section_name = matchs[0][0]
+ package_name = matchs[0][1]
+
+ section_id = database.get_section_id(section_name)
+ package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id)
+
+ if package_id == None:
+ # Likely got an arch all package
+ package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_all_id)
+
+ database.insert_content_path(package_id, filename)
+
+ lines_processed += 1
+ f.close()
+
+ # Commit work
+ print "Committing to database ..."
+ projectB.query("COMMIT")
+
+################################################################################
+
+def main ():
+ global Cnf, projectB, out
+ out = sys.stdout
+
+ Cnf = utils.get_conf()
+
+ Arguments = [('h',"help","Import-Contents::Options::Help"),
+ ('s',"suite","Import-Contents::Options::Suite","HasArg"),
+ ]
+
+ for i in [ "help", "suite" ]:
+ if not Cnf.has_key("Import-Contents::Options::%s" % (i)):
+ Cnf["Import-Contents::Options::%s" % (i)] = ""
+
+ suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
+ Options = Cnf.SubTree("Import-Contents::Options")
+
+ if Options["Help"]:
+ usage()
+
+ if Options["Suite"]:
+ suites = utils.split_args(Options["Suite"])
+ else:
+ suites = Cnf.SubTree("Suite").List()
+
+ projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
+ database.init(Cnf, projectB)
+
+ import_contents(suites)
+
+#######################################################################################
+
+if __name__ == '__main__':
+ main()
diff --git a/dak/update_db.py b/dak/update_db.py
index e59a558..7d89e6b 100755
--- a/dak/update_db.py
+++ b/dak/update_db.py
@@ -36,7 +36,7 @@ from daklib import utils
Cnf = None
projectB = None
-required_database_schema = 1
+required_database_schema = 2
################################################################################
diff --git a/daklib/database.py b/daklib/database.py
index 1f65960..c39c83b 100755
--- a/daklib/database.py
+++ b/daklib/database.py
@@ -45,6 +45,7 @@ suite_version_cache = {}
suite_bin_version_cache = {}
content_path_id_cache = {}
content_file_id_cache = {}
+insert_contents_file_cache = {}
################################################################################
@@ -250,14 +251,14 @@ def get_suite_version(source, suite, arch):
return version
-def get_latest_binary_version_id(binary, suite, arch):
+def get_latest_binary_version_id(binary, section, suite, arch):
global suite_bin_version_cache
- cache_key = "%s_%s" % (binary, suite)
+ cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
if suite_bin_version_cache.has_key(cache_key):
return suite_bin_version_cache[cache_key]
- q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d'" % (binary, int(arch), int(suite)))
+ q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
highest_bid, highest_version = None, None
@@ -266,6 +267,7 @@ def get_latest_binary_version_id(binary, suite, arch):
highest_bid = bi[0]
highest_version = bi[1]
+ suite_bin_version_cache[cache_key] = highest_bid
return highest_bid
################################################################################
@@ -459,6 +461,14 @@ def get_or_set_contents_path_id(path):
################################################################################
def insert_content_path(bin_id, fullpath):
+ global insert_contents_file_cache
+ cache_key = "%s_%s" % (bin_id, fullpath)
+
+ # have we seen this contents before?
+ # probably only revelant during package import
+ if insert_contents_file_cache.has_key(cache_key):
+ return
+
# split the path into basename, and pathname
(path, file) = os.path.split(fullpath)
@@ -466,6 +476,13 @@ def insert_content_path(bin_id, fullpath):
file_id = get_or_set_contents_file_id(file)
path_id = get_or_set_contents_path_id(path)
+ # Determine if we're inserting a duplicate row
+ q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
+ if q.getresult():
+ # Yes we are, return without doing the insert
+ print "Inserting dup row"
+ return
+
# Put them into content_assiocations
projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
return
diff --git a/docs/README.quotes b/docs/README.quotes
index 3568ae7..c696fbe 100644
--- a/docs/README.quotes
+++ b/docs/README.quotes
@@ -344,3 +344,9 @@ Canadians: This is a lighthouse. Your call.
<helix> elmo: I can't believe people pay you to fix computers
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+* Ganneff ponders how to best write the text to -devel. (need to tell em in
+ case they find more bugs). "We fixed the fucking idiotic broken implementation
+ to be less so" is probably not the nicest, even if perfect valid, way to say so
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--
1.5.6.5
Reply to: