[PATCH v3] Add by-hash support
Add a per-suite boolean to enable by-hash; store the by-hash files in
the db, and record when they stopped being referenced, so that
clean-suites can delete them after the archive's stayofexecution time.
In generate-release, where we have checksums for all the things, move
files to the by-hash dir for the suite's strongest hash function, and
add symlinks in the original location and in the other by-hash dirs.
Signed-off-by: Julien Cristau <jcristau@debian.org>
---
This is also at http://anonscm.debian.org/cgit/users/jcristau/dak.git/commit/?h=by-hash-v3
Review comments welcome :)
changes in v2:
- use archive.stayofexecution as delay before removing files from by-hash
- don't assume any particular ordering for suite.checksums
changes in v3:
- rebase on latest master, update115 is now update116
- handle missing files in clean_byhash
dak/clean_suites.py | 28 ++++++++++++++++++++++++
dak/dakdb/update116.py | 39 ++++++++++++++++++++++++++++++++++
dak/generate_releases.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 121 insertions(+), 1 deletion(-)
create mode 100644 dak/dakdb/update116.py
diff --git a/dak/clean_suites.py b/dak/clean_suites.py
index d5b0fc4..51800a8 100755
--- a/dak/clean_suites.py
+++ b/dak/clean_suites.py
@@ -384,6 +384,33 @@ SELECT f.id, f.fingerprint FROM fingerprint f
################################################################################
+def clean_byhash(now_date, session):
+ Logger.log(["Cleaning out unused by-hash files..."])
+
+ q = session.execute("""
+ DELETE FROM hashfile h
+ USING suite s, archive a
+ WHERE s.id = h.suite_id
+ AND a.id = s.archive_id
+ AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
+ RETURNING a.path, s.suite_name, h.path""")
+ count = q.rowcount
+
+ if not Options["No-Action"]:
+ for base, suite, path in q.fetchall():
+ filename = os.path.join(base, 'dists', suite, path)
+ if os.path.isfile(filename):
+ Logger.log(['delete hashfile', suite, path])
+ os.unlink(filename)
+ else:
+ Logger.log(['database referred to non-existing file', filename])
+ session.commit()
+
+ if count > 0:
+ Logger.log(["total", count])
+
+################################################################################
+
def clean_empty_directories(session):
"""
Removes empty directories from pool directories.
@@ -486,6 +513,7 @@ def main():
clean(now_date, archives, max_delete, session)
clean_maintainers(now_date, session)
clean_fingerprints(now_date, session)
+ clean_byhash(now_date, session)
clean_empty_directories(session)
session.rollback()
diff --git a/dak/dakdb/update116.py b/dak/dakdb/update116.py
new file mode 100644
index 0000000..d5fa3cb
--- /dev/null
+++ b/dak/dakdb/update116.py
@@ -0,0 +1,39 @@
+"""
+Add column to store whether to generate by-hash things per suite
+Add table to store when by-hash files stopped being referenced
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2016, Julien Cristau <jcristau@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+import psycopg2
+from daklib.dak_exceptions import DBUpdateError
+from daklib.config import Config
+
+def do_update(self):
+ """Add column to store whether to generate by-hash things per suite,
+ add table to store when by-hash files stopped being referenced
+ """
+ print __doc__
+ try:
+ c = self.db.cursor()
+
+ c.execute("ALTER TABLE suite ADD COLUMN byhash BOOLEAN DEFAULT false")
+
+ c.execute("""
+ CREATE TABLE hashfile (
+ suite_id INTEGER NOT NULL REFERENCES suite(id) ON DELETE CASCADE,
+ path TEXT NOT NULL,
+ unreferenced TIMESTAMP,
+ PRIMARY KEY (suite_id, path)
+ )
+ """)
+
+ c.execute("UPDATE config SET value = '116' WHERE name = 'db_revision'")
+
+ self.db.commit()
+
+ except psycopg2.ProgrammingError as msg:
+ self.db.rollback()
+ raise DBUpdateError('Unable to apply sick update 116, rollback issued. Error message : %s' % (str(msg)))
diff --git a/dak/generate_releases.py b/dak/generate_releases.py
index c359177..b557093 100755
--- a/dak/generate_releases.py
+++ b/dak/generate_releases.py
@@ -37,6 +37,7 @@ import stat
import time
import gzip
import bz2
+import errno
import apt_pkg
import subprocess
from tempfile import mkstemp, mkdtemp
@@ -151,7 +152,9 @@ class ReleaseWriter(object):
# Boolean stuff. If we find it true in database, write out "yes" into the release file
boolattrs = ( ('NotAutomatic', 'notautomatic'),
- ('ButAutomaticUpgrades', 'butautomaticupgrades') )
+ ('ButAutomaticUpgrades', 'butautomaticupgrades'),
+ ('Acquire-By-Hash', 'byhash'),
+ )
cnf = Config()
@@ -284,6 +287,56 @@ class ReleaseWriter(object):
out.close()
os.rename(outfile + '.new', outfile)
+ if suite.byhash:
+ query = """
+ UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
+ WHERE suite_id = :id AND unreferenced IS NULL"""
+ session.execute(query, {'id': suite.suite_id})
+
+ hashes = filter(lambda h: h in hashfuncs, ('MD5Sum', 'SHA1', 'SHA256'))
+ strong_hash = hashes[-1]
+ for filename in fileinfo:
+ if not os.path.exists(filename):
+ # probably an uncompressed index we didn't generate
+ continue
+
+ for h in hashes:
+ hashfile = os.path.join(os.path.dirname(filename), 'by-hash', h, fileinfo[filename][h])
+ query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id"
+ q = session.execute(
+ query,
+ {'p': hashfile, 'id': suite.suite_id})
+ if q.rowcount:
+ session.execute('''
+ UPDATE hashfile SET unreferenced = NULL
+ WHERE path = :p and suite_id = :id''',
+ {'p': hashfile, 'id': suite.suite_id})
+ else:
+ session.execute('''
+ INSERT INTO hashfile (path, suite_id, unreferenced)
+ VALUES (:p, :id, NULL)''',
+ {'p': hashfile, 'id': suite.suite_id})
+
+ try:
+ os.makedirs(os.path.dirname(hashfile))
+ except OSError as exc:
+ if exc.errno != errno.EEXIST:
+ raise
+ if h == strong_hash:
+ with open(filename, 'rb') as src:
+ contents = src.read()
+ with open(hashfile + '.new', 'wb') as dst:
+ dst.write(contents)
+ os.rename(hashfile + '.new', hashfile)
+ os.unlink(filename)
+ os.symlink(os.path.join('by-hash', h, fileinfo[filename][h]),
+ filename)
+ elif not os.path.exists(hashfile):
+ os.symlink(os.path.join('..', strong_hash, fileinfo[filename][strong_hash]),
+ hashfile)
+
+ session.commit()
+
sign_release_dir(suite, os.path.dirname(outfile))
os.chdir(oldcwd)
--
2.8.1
Reply to: