[dak/master] Add by-hash support
Add a per-suite boolean to enable by-hash; store the by-hash files in
the db, and record when they stopped being referenced, so that
clean-suites can delete them after the archive's stayofexecution time.
In generate-release, where we have checksums for all the things,
hardlink files to the by-hash dir for each of the suite's configured
hash methods.
Signed-off-by: Julien Cristau <jcristau@debian.org>
---
changes in v2:
- use archive.stayofexecution as delay before removing files from
by-hash
- don't assume any particular ordering for suite.checksums
changes in v3:
- rebase on latest master, update115 is now update116
- handle missing files in clean_byhash
changes in v4:
- use hardlinks instead of symlinks
- don't initialize `unreferenced` to its default value
- in clean_byhash, remove useless fetchall, and handle ENOENT from
unlink instead of checking for existence beforehand
---
dak/clean_suites.py | 32 ++++++++++++++++++++++++++++++++
dak/dakdb/update116.py | 38 ++++++++++++++++++++++++++++++++++++++
dak/generate_releases.py | 46 +++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 115 insertions(+), 1 deletion(-)
create mode 100644 dak/dakdb/update116.py
diff --git a/dak/clean_suites.py b/dak/clean_suites.py
index d5b0fc4..ac35437 100755
--- a/dak/clean_suites.py
+++ b/dak/clean_suites.py
@@ -34,6 +34,7 @@
################################################################################
+import errno
import os
import stat
import sys
@@ -384,6 +385,36 @@ SELECT f.id, f.fingerprint FROM fingerprint f
################################################################################
+def clean_byhash(now_date, session):
+ Logger.log(["Cleaning out unused by-hash files..."])
+
+ q = session.execute("""
+ DELETE FROM hashfile h
+ USING suite s, archive a
+ WHERE s.id = h.suite_id
+ AND a.id = s.archive_id
+ AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
+ RETURNING a.path, s.suite_name, h.path""")
+ count = q.rowcount
+
+ if not Options["No-Action"]:
+ for base, suite, path in q:
+ filename = os.path.join(base, 'dists', suite, path)
+ try:
+ os.unlink(filename)
+ except OSError as exc:
+ if exc.errno != errno.ENOENT:
+ raise
+ Logger.log(['database referred to non-existing file', filename])
+ else:
+ Logger.log(['delete hashfile', suite, path])
+ session.commit()
+
+ if count > 0:
+ Logger.log(["total", count])
+
+################################################################################
+
def clean_empty_directories(session):
"""
Removes empty directories from pool directories.
@@ -486,6 +517,7 @@ def main():
clean(now_date, archives, max_delete, session)
clean_maintainers(now_date, session)
clean_fingerprints(now_date, session)
+ clean_byhash(now_date, session)
clean_empty_directories(session)
session.rollback()
diff --git a/dak/dakdb/update116.py b/dak/dakdb/update116.py
new file mode 100644
index 0000000..7baf052
--- /dev/null
+++ b/dak/dakdb/update116.py
@@ -0,0 +1,38 @@
+"""
+Add support for by-hash with a new table and per-suite boolean
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2016, Julien Cristau <jcristau@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+import psycopg2
+from daklib.dak_exceptions import DBUpdateError
+from daklib.config import Config
+
+def do_update(self):
+ """Add column to store whether to generate by-hash things per suite,
+ add table to store when by-hash files stopped being referenced
+ """
+ print __doc__
+ try:
+ c = self.db.cursor()
+
+ c.execute("ALTER TABLE suite ADD COLUMN byhash BOOLEAN DEFAULT false")
+
+ c.execute("""
+ CREATE TABLE hashfile (
+ suite_id INTEGER NOT NULL REFERENCES suite(id) ON DELETE CASCADE,
+ path TEXT NOT NULL,
+ unreferenced TIMESTAMP,
+ PRIMARY KEY (suite_id, path)
+ )
+ """)
+
+ c.execute("UPDATE config SET value = '116' WHERE name = 'db_revision'")
+
+ self.db.commit()
+
+ except psycopg2.ProgrammingError as msg:
+ self.db.rollback()
+ raise DBUpdateError('Unable to apply sick update 116, rollback issued. Error message : %s' % (str(msg)))
diff --git a/dak/generate_releases.py b/dak/generate_releases.py
index c359177..82ff639 100755
--- a/dak/generate_releases.py
+++ b/dak/generate_releases.py
@@ -37,6 +37,7 @@ import stat
import time
import gzip
import bz2
+import errno
import apt_pkg
import subprocess
from tempfile import mkstemp, mkdtemp
@@ -151,7 +152,9 @@ class ReleaseWriter(object):
# Boolean stuff. If we find it true in database, write out "yes" into the release file
boolattrs = ( ('NotAutomatic', 'notautomatic'),
- ('ButAutomaticUpgrades', 'butautomaticupgrades') )
+ ('ButAutomaticUpgrades', 'butautomaticupgrades'),
+ ('Acquire-By-Hash', 'byhash'),
+ )
cnf = Config()
@@ -284,6 +287,47 @@ class ReleaseWriter(object):
out.close()
os.rename(outfile + '.new', outfile)
+ if suite.byhash:
+ query = """
+ UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
+ WHERE suite_id = :id AND unreferenced IS NULL"""
+ session.execute(query, {'id': suite.suite_id})
+
+ for filename in fileinfo:
+ if not os.path.exists(filename):
+ # probably an uncompressed index we didn't generate
+ continue
+
+ for h in hashfuncs:
+ hashfile = os.path.join(os.path.dirname(filename), 'by-hash', h, fileinfo[filename][h])
+ query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id"
+ q = session.execute(
+ query,
+ {'p': hashfile, 'id': suite.suite_id})
+ if q.rowcount:
+ session.execute('''
+ UPDATE hashfile SET unreferenced = NULL
+ WHERE path = :p and suite_id = :id''',
+ {'p': hashfile, 'id': suite.suite_id})
+ else:
+ session.execute('''
+ INSERT INTO hashfile (path, suite_id)
+ VALUES (:p, :id)''',
+ {'p': hashfile, 'id': suite.suite_id})
+
+ try:
+ os.makedirs(os.path.dirname(hashfile))
+ except OSError as exc:
+ if exc.errno != errno.EEXIST:
+ raise
+ try:
+ os.link(filename, hashfile)
+ except OSError as exc:
+ if exc.errno != errno.EEXIST:
+ raise
+
+ session.commit()
+
sign_release_dir(suite, os.path.dirname(outfile))
os.chdir(oldcwd)
--
2.1.4
Reply to: