From 391f5ec09a119131dc846b796ca791f4cecc69e4 Mon Sep 17 00:00:00 2001 From: Julien Cristau Date: Wed, 27 Apr 2016 10:06:08 +0200 Subject: [PATCH] Add by-hash support Add a per-suite boolean to enable by-hash; store the by-hash files in the db, and record when they stopped being referenced, so that clean-suites can delete them after the archive's stayofexecution time. In generate-release, where we have checksums for all the things, hardlink files to the by-hash dir for each of the suite's configured hash methods. Signed-off-by: Julien Cristau --- changes in v2: - use archive.stayofexecution as delay before removing files from by-hash - don't assume any particular ordering for suite.checksums changes in v3: - rebase on latest master, update115 is now update116 - handle missing files in clean_byhash changes in v4: - use hardlinks instead of symlinks - don't initialize `unreferenced` to its default value - in clean_byhash, remove useless fetchall, and handle ENOENT from unlink instead of checking for existence beforehand --- dak/clean_suites.py | 32 ++++++++++++++++++++++++++++ dak/dakdb/update116.py | 38 +++++++++++++++++++++++++++++++++ dak/generate_releases.py | 46 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 dak/dakdb/update116.py diff --git a/dak/clean_suites.py b/dak/clean_suites.py index d5b0fc4e..ac354371 100755 --- a/dak/clean_suites.py +++ b/dak/clean_suites.py @@ -34,6 +34,7 @@ ################################################################################ +import errno import os import stat import sys @@ -384,6 +385,36 @@ SELECT f.id, f.fingerprint FROM fingerprint f ################################################################################ +def clean_byhash(now_date, session): + Logger.log(["Cleaning out unused by-hash files..."]) + + q = session.execute(""" + DELETE FROM hashfile h + USING suite s, archive a + WHERE s.id = h.suite_id + AND a.id = s.archive_id + AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP + RETURNING a.path, s.suite_name, h.path""") + count = q.rowcount + + if not Options["No-Action"]: + for base, suite, path in q: + filename = os.path.join(base, 'dists', suite, path) + try: + os.unlink(filename) + except OSError as exc: + if exc.errno != errno.ENOENT: + raise + Logger.log(['database referred to non-existing file', filename]) + else: + Logger.log(['delete hashfile', suite, path]) + session.commit() + + if count > 0: + Logger.log(["total", count]) + +################################################################################ + def clean_empty_directories(session): """ Removes empty directories from pool directories. @@ -486,6 +517,7 @@ def main(): clean(now_date, archives, max_delete, session) clean_maintainers(now_date, session) clean_fingerprints(now_date, session) + clean_byhash(now_date, session) clean_empty_directories(session) session.rollback() diff --git a/dak/dakdb/update116.py b/dak/dakdb/update116.py new file mode 100644 index 00000000..7baf0526 --- /dev/null +++ b/dak/dakdb/update116.py @@ -0,0 +1,38 @@ +""" +Add support for by-hash with a new table and per-suite boolean + +@contact: Debian FTP Master +@copyright: 2016, Julien Cristau +@license: GNU General Public License version 2 or later +""" + +import psycopg2 +from daklib.dak_exceptions import DBUpdateError +from daklib.config import Config + +def do_update(self): + """Add column to store whether to generate by-hash things per suite, + add table to store when by-hash files stopped being referenced + """ + print __doc__ + try: + c = self.db.cursor() + + c.execute("ALTER TABLE suite ADD COLUMN byhash BOOLEAN DEFAULT false") + + c.execute(""" + CREATE TABLE hashfile ( + suite_id INTEGER NOT NULL REFERENCES suite(id) ON DELETE CASCADE, + path TEXT NOT NULL, + unreferenced TIMESTAMP, + PRIMARY KEY (suite_id, path) + ) + """) + + c.execute("UPDATE config SET value = '116' WHERE name = 'db_revision'") + + self.db.commit() + + except psycopg2.ProgrammingError as msg: + self.db.rollback() + raise DBUpdateError('Unable to apply sick update 116, rollback issued. Error message : %s' % (str(msg))) diff --git a/dak/generate_releases.py b/dak/generate_releases.py index c3591772..82ff6394 100755 --- a/dak/generate_releases.py +++ b/dak/generate_releases.py @@ -37,6 +37,7 @@ import stat import time import gzip import bz2 +import errno import apt_pkg import subprocess from tempfile import mkstemp, mkdtemp @@ -151,7 +152,9 @@ class ReleaseWriter(object): # Boolean stuff. If we find it true in database, write out "yes" into the release file boolattrs = ( ('NotAutomatic', 'notautomatic'), - ('ButAutomaticUpgrades', 'butautomaticupgrades') ) + ('ButAutomaticUpgrades', 'butautomaticupgrades'), + ('Acquire-By-Hash', 'byhash'), + ) cnf = Config() @@ -284,6 +287,47 @@ class ReleaseWriter(object): out.close() os.rename(outfile + '.new', outfile) + if suite.byhash: + query = """ + UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP + WHERE suite_id = :id AND unreferenced IS NULL""" + session.execute(query, {'id': suite.suite_id}) + + for filename in fileinfo: + if not os.path.exists(filename): + # probably an uncompressed index we didn't generate + continue + + for h in hashfuncs: + hashfile = os.path.join(os.path.dirname(filename), 'by-hash', h, fileinfo[filename][h]) + query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id" + q = session.execute( + query, + {'p': hashfile, 'id': suite.suite_id}) + if q.rowcount: + session.execute(''' + UPDATE hashfile SET unreferenced = NULL + WHERE path = :p and suite_id = :id''', + {'p': hashfile, 'id': suite.suite_id}) + else: + session.execute(''' + INSERT INTO hashfile (path, suite_id) + VALUES (:p, :id)''', + {'p': hashfile, 'id': suite.suite_id}) + + try: + os.makedirs(os.path.dirname(hashfile)) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + try: + os.link(filename, hashfile) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + + session.commit() + sign_release_dir(suite, os.path.dirname(outfile)) os.chdir(oldcwd) -- 2.39.5