]> git.decadent.org.uk Git - dak.git/commitdiff
Add by-hash support
authorJulien Cristau <jcristau@debian.org>
Wed, 27 Apr 2016 08:06:08 +0000 (10:06 +0200)
committerJulien Cristau <jcristau@debian.org>
Mon, 16 May 2016 15:35:52 +0000 (17:35 +0200)
Add a per-suite boolean to enable by-hash; store the by-hash files in
the db, and record when they stopped being referenced, so that
clean-suites can delete them after the archive's stayofexecution time.

In generate-release, where we have checksums for all the things,
hardlink files to the by-hash dir for each of the suite's configured
hash methods.

Signed-off-by: Julien Cristau <jcristau@debian.org>
---
changes in v2:
- use archive.stayofexecution as delay before removing files from
  by-hash
- don't assume any particular ordering for suite.checksums

changes in v3:
- rebase on latest master, update115 is now update116
- handle missing files in clean_byhash

changes in v4:
- use hardlinks instead of symlinks
- don't initialize `unreferenced` to its default value
- in clean_byhash, remove useless fetchall, and handle ENOENT from
  unlink instead of checking for existence beforehand

dak/clean_suites.py
dak/dakdb/update116.py [new file with mode: 0644]
dak/generate_releases.py

index d5b0fc4e4eada96a973a7c5c418307cae0e73780..ac354371fee0a8fdd3b43ae437a96405dcae7b52 100755 (executable)
@@ -34,6 +34,7 @@
 
 ################################################################################
 
+import errno
 import os
 import stat
 import sys
@@ -384,6 +385,36 @@ SELECT f.id, f.fingerprint FROM fingerprint f
 
 ################################################################################
 
+def clean_byhash(now_date, session):
+    Logger.log(["Cleaning out unused by-hash files..."])
+
+    q = session.execute("""
+        DELETE FROM hashfile h
+        USING suite s, archive a
+        WHERE s.id = h.suite_id
+          AND a.id = s.archive_id
+          AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
+        RETURNING a.path, s.suite_name, h.path""")
+    count = q.rowcount
+
+    if not Options["No-Action"]:
+        for base, suite, path in q:
+            filename = os.path.join(base, 'dists', suite, path)
+            try:
+                os.unlink(filename)
+            except OSError as exc:
+                if exc.errno != errno.ENOENT:
+                    raise
+                Logger.log(['database referred to non-existing file', filename])
+            else:
+                Logger.log(['delete hashfile', suite, path])
+        session.commit()
+
+    if count > 0:
+        Logger.log(["total", count])
+
+################################################################################
+
 def clean_empty_directories(session):
     """
     Removes empty directories from pool directories.
@@ -486,6 +517,7 @@ def main():
     clean(now_date, archives, max_delete, session)
     clean_maintainers(now_date, session)
     clean_fingerprints(now_date, session)
+    clean_byhash(now_date, session)
     clean_empty_directories(session)
 
     session.rollback()
diff --git a/dak/dakdb/update116.py b/dak/dakdb/update116.py
new file mode 100644 (file)
index 0000000..7baf052
--- /dev/null
@@ -0,0 +1,38 @@
+"""
+Add support for by-hash with a new table and per-suite boolean
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2016, Julien Cristau <jcristau@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+import psycopg2
+from daklib.dak_exceptions import DBUpdateError
+from daklib.config import Config
+
+def do_update(self):
+    """Add column to store whether to generate by-hash things per suite,
+    add table to store when by-hash files stopped being referenced
+    """
+    print __doc__
+    try:
+        c = self.db.cursor()
+
+        c.execute("ALTER TABLE suite ADD COLUMN byhash BOOLEAN DEFAULT false")
+
+        c.execute("""
+            CREATE TABLE hashfile (
+                suite_id INTEGER NOT NULL REFERENCES suite(id) ON DELETE CASCADE,
+                path TEXT NOT NULL,
+                unreferenced TIMESTAMP,
+                PRIMARY KEY (suite_id, path)
+            )
+             """)
+
+        c.execute("UPDATE config SET value = '116' WHERE name = 'db_revision'")
+
+        self.db.commit()
+
+    except psycopg2.ProgrammingError as msg:
+        self.db.rollback()
+        raise DBUpdateError('Unable to apply sick update 116, rollback issued. Error message : %s' % (str(msg)))
index c359177235810258910b788ea09a85857ebc9cfe..82ff6394bffa3b92765d6f403a121c3b939c10cc 100755 (executable)
@@ -37,6 +37,7 @@ import stat
 import time
 import gzip
 import bz2
+import errno
 import apt_pkg
 import subprocess
 from tempfile import mkstemp, mkdtemp
@@ -151,7 +152,9 @@ class ReleaseWriter(object):
 
         # Boolean stuff. If we find it true in database, write out "yes" into the release file
         boolattrs = ( ('NotAutomatic',         'notautomatic'),
-                      ('ButAutomaticUpgrades', 'butautomaticupgrades') )
+                      ('ButAutomaticUpgrades', 'butautomaticupgrades'),
+                      ('Acquire-By-Hash',      'byhash'),
+                    )
 
         cnf = Config()
 
@@ -284,6 +287,47 @@ class ReleaseWriter(object):
         out.close()
         os.rename(outfile + '.new', outfile)
 
+        if suite.byhash:
+            query = """
+                UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
+                WHERE suite_id = :id AND unreferenced IS NULL"""
+            session.execute(query, {'id': suite.suite_id})
+
+            for filename in fileinfo:
+                if not os.path.exists(filename):
+                    # probably an uncompressed index we didn't generate
+                    continue
+
+                for h in hashfuncs:
+                    hashfile = os.path.join(os.path.dirname(filename), 'by-hash', h, fileinfo[filename][h])
+                    query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id"
+                    q = session.execute(
+                            query,
+                            {'p': hashfile, 'id': suite.suite_id})
+                    if q.rowcount:
+                        session.execute('''
+                            UPDATE hashfile SET unreferenced = NULL
+                            WHERE path = :p and suite_id = :id''',
+                            {'p': hashfile, 'id': suite.suite_id})
+                    else:
+                        session.execute('''
+                            INSERT INTO hashfile (path, suite_id)
+                            VALUES (:p, :id)''',
+                            {'p': hashfile, 'id': suite.suite_id})
+
+                    try:
+                        os.makedirs(os.path.dirname(hashfile))
+                    except OSError as exc:
+                        if exc.errno != errno.EEXIST:
+                            raise
+                    try:
+                        os.link(filename, hashfile)
+                    except OSError as exc:
+                        if exc.errno != errno.EEXIST:
+                            raise
+
+                session.commit()
+
         sign_release_dir(suite, os.path.dirname(outfile))
 
         os.chdir(oldcwd)