]> git.decadent.org.uk Git - dak.git/blobdiff - dak/generate_releases.py
Use correct db_name for MD5 hash
[dak.git] / dak / generate_releases.py
index 8a371e8a8d3dd5a3ff925d4009950614afbea03d..45f172d20df287f8d776d44f2107c5bffce6b4c3 100755 (executable)
@@ -37,10 +37,11 @@ import stat
 import time
 import gzip
 import bz2
+import errno
 import apt_pkg
+import subprocess
 from tempfile import mkstemp, mkdtemp
 import commands
-from multiprocessing import Pool, TimeoutError
 from sqlalchemy.orm import object_session
 
 from daklib import utils, daklog
@@ -48,10 +49,11 @@ from daklib.regexes import re_gensubrelease, re_includeinrelease
 from daklib.dak_exceptions import *
 from daklib.dbconn import *
 from daklib.config import Config
+from daklib.dakmultiprocessing import DakProcessPool, PROC_STATUS_SUCCESS
+import daklib.daksubprocess
 
 ################################################################################
 Logger = None                  #: Our logging object
-results = []                   #: Results of the subprocesses
 
 ################################################################################
 
@@ -61,24 +63,21 @@ def usage (exit_code=0):
     print """Usage: dak generate-releases [OPTIONS]
 Generate the Release files
 
+  -a, --archive=ARCHIVE      process suites in ARCHIVE
   -s, --suite=SUITE(s)       process this suite
                              Default: All suites not marked 'untouchable'
   -f, --force                Allow processing of untouchable suites
                              CAREFUL: Only to be used at (point) release time!
   -h, --help                 show this help and exit
+  -q, --quiet                Don't output progress
 
-SUITE can be a space seperated list, e.g.
+SUITE can be a space separated list, e.g.
    --suite=unstable testing
   """
     sys.exit(exit_code)
 
 ########################################################################
 
-def get_result(arg):
-    global results
-    if arg:
-        results.append(arg)
-
 def sign_release_dir(suite, dirname):
     cnf = Config()
 
@@ -87,7 +86,7 @@ def sign_release_dir(suite, dirname):
         if cnf.has_key("Dinstall::SigningPubKeyring"):
             keyring += " --keyring \"%s\"" % cnf["Dinstall::SigningPubKeyring"]
 
-        arguments = "--no-options --batch --no-tty --armour"
+        arguments = "--no-options --batch --no-tty --armour --personal-digest-preferences=SHA256"
 
         relname = os.path.join(dirname, 'Release')
 
@@ -99,25 +98,165 @@ def sign_release_dir(suite, dirname):
         if os.path.exists(inlinedest):
             os.unlink(inlinedest)
 
-        # We can only use one key for inline signing so use the first one in
-        # the array for consistency
-        firstkey = False
+        defkeyid=""
+        for keyid in suite.signingkeys or []:
+            defkeyid += "--local-user %s " % keyid
+
+        os.system("gpg %s %s %s --detach-sign <%s >>%s" %
+                  (keyring, defkeyid, arguments, relname, dest))
+        os.system("gpg %s %s %s --clearsign <%s >>%s" %
+                  (keyring, defkeyid, arguments, relname, inlinedest))
+
+class XzFile(object):
+    def __init__(self, filename, mode='r'):
+        self.filename = filename
+    def read(self):
+        cmd = ("xz", "-d")
+        with open(self.filename, 'r') as stdin:
+            process = daklib.daksubprocess.Popen(cmd, stdin=stdin, stdout=subprocess.PIPE)
+            (stdout, stderr) = process.communicate()
+            return stdout
+
 
-        for keyid in suite.signingkeys:
-            defkeyid = "--default-key %s" % keyid
+class HashFunc(object):
+    def __init__(self, release_field, func, db_name):
+        self.release_field = release_field
+        self.func = func
+        self.db_name = db_name
 
-            os.system("gpg %s %s %s --detach-sign <%s >>%s" %
-                    (keyring, defkeyid, arguments, relname, dest))
+RELEASE_HASHES = [
+    HashFunc('MD5Sum', apt_pkg.md5sum, 'md5sum'),
+    HashFunc('SHA1', apt_pkg.sha1sum, 'sha1'),
+    HashFunc('SHA256', apt_pkg.sha256sum, 'sha256'),
+]
 
-            if firstkey:
-                os.system("gpg %s %s %s --clearsign <%s >>%s" %
-                        (keyring, defkeyid, arguments, relname, inlinedest))
-                firstkey = False
 
 class ReleaseWriter(object):
     def __init__(self, suite):
         self.suite = suite
 
+    def suite_path(self):
+        """
+        Absolute path to the suite-specific files.
+        """
+        cnf = Config()
+        suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
+
+        return os.path.join(self.suite.archive.path, 'dists',
+                            self.suite.suite_name, suite_suffix)
+
+    def suite_release_path(self):
+        """
+        Absolute path where Release files are physically stored.
+        This should be a path that sorts after the dists/ directory.
+        """
+        # TODO: Eventually always create Release in `zzz-dists` to avoid
+        # special cases. However we don't want to move existing Release files
+        # for released suites.
+        # See `create_release_symlinks` below.
+        if not self.suite.byhash:
+            return self.suite_path()
+
+        cnf = Config()
+        suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
+
+        return os.path.join(self.suite.archive.path, 'zzz-dists',
+                            self.suite.suite_name, suite_suffix)
+
+    def create_release_symlinks(self):
+        """
+        Create symlinks for Release files.
+        This creates the symlinks for Release files in the `suite_path`
+        to the actual files in `suite_release_path`.
+        """
+        # TODO: Eventually always create the links.
+        # See `suite_release_path` above.
+        if not self.suite.byhash:
+            return
+
+        relpath = os.path.relpath(self.suite_release_path(), self.suite_path())
+        for f in ("Release", "Release.gpg", "InRelease"):
+            source = os.path.join(relpath, f)
+            dest = os.path.join(self.suite_path(), f)
+            if not os.path.islink(dest):
+                os.unlink(dest)
+            elif os.readlink(dest) == source:
+                continue
+            else:
+                os.unlink(dest)
+            os.symlink(source, dest)
+
+    def create_output_directories(self):
+        for path in (self.suite_path(), self.suite_release_path()):
+            try:
+                os.makedirs(path)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise
+
+    def _update_hashfile_table(self, session, fileinfo, hashes):
+        # Mark all by-hash files as obsolete.  We will undo that for the ones
+        # we still reference later.
+        query = """
+            UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
+            WHERE suite_id = :id AND unreferenced IS NULL"""
+        session.execute(query, {'id': self.suite.suite_id})
+
+        if self.suite.byhash:
+            query = "SELECT path FROM hashfile WHERE suite_id = :id"
+            q = session.execute(query, {'id': self.suite.suite_id})
+            known_hashfiles = set(row[0] for row in q)
+            updated = []
+            new = []
+
+            # Update the hashfile table with new or updated files
+            for filename in fileinfo:
+                if not os.path.exists(filename):
+                    # probably an uncompressed index we didn't generate
+                    continue
+                byhashdir = os.path.join(os.path.dirname(filename), 'by-hash')
+                for h in hashes:
+                    field = h.release_field
+                    hashfile = os.path.join(byhashdir, field, fileinfo[filename][field])
+                    if hashfile in known_hashfiles:
+                        updated.append(hashfile)
+                    else:
+                        new.append(hashfile)
+
+            if updated:
+                session.execute("""
+                    UPDATE hashfile SET unreferenced = NULL
+                    WHERE path = ANY(:p) AND suite_id = :id""",
+                    {'p': updated, 'id': self.suite.suite_id})
+            if new:
+                session.execute("""
+                    INSERT INTO hashfile (path, suite_id)
+                    VALUES (:p, :id)""",
+                    [{'p': hashfile, 'id': self.suite.suite_id} for hashfile in new])
+
+        session.commit()
+
+    def _make_byhash_links(self, fileinfo, hashes):
+        # Create hardlinks in by-hash directories
+        for filename in fileinfo:
+            if not os.path.exists(filename):
+                # probably an uncompressed index we didn't generate
+                continue
+
+            for h in hashes:
+                field = h.release_field
+                hashfile = os.path.join(os.path.dirname(filename), 'by-hash', field, fileinfo[filename][field])
+                try:
+                    os.makedirs(os.path.dirname(hashfile))
+                except OSError as exc:
+                    if exc.errno != errno.EEXIST:
+                        raise
+                try:
+                    os.link(filename, hashfile)
+                except OSError as exc:
+                    if exc.errno != errno.EEXIST:
+                        raise
+
     def generate_release_files(self):
         """
         Generate Release files for the given suite
@@ -135,9 +274,11 @@ class ReleaseWriter(object):
         # fill them in
         attribs = ( ('Origin',      'origin'),
                     ('Label',       'label'),
-                    ('Suite',       'suite_name'),
+                    ('Suite',       'release_suite_output'),
                     ('Version',     'version'),
-                    ('Codename',    'codename') )
+                    ('Codename',    'codename'),
+                    ('Changelogs',  'changelog_url'),
+                  )
 
         # A "Sub" Release file has slightly different fields
         subattribs = ( ('Archive',  'suite_name'),
@@ -147,18 +288,27 @@ class ReleaseWriter(object):
 
         # Boolean stuff. If we find it true in database, write out "yes" into the release file
         boolattrs = ( ('NotAutomatic',         'notautomatic'),
-                      ('ButAutomaticUpgrades', 'butautomaticupgrades') )
+                      ('ButAutomaticUpgrades', 'butautomaticupgrades'),
+                      ('Acquire-By-Hash',      'byhash'),
+                    )
 
         cnf = Config()
 
-        suite_suffix = "%s" % (cnf.Find("Dinstall::SuiteSuffix"))
+        suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
+
+        self.create_output_directories()
+        self.create_release_symlinks()
 
-        outfile = os.path.join(cnf["Dir::Root"], 'dists', "%s/%s" % (suite.suite_name, suite_suffix), "Release")
-        out = open(outfile, "w")
+        outfile = os.path.join(self.suite_release_path(), "Release")
+        out = open(outfile + ".new", "w")
 
         for key, dbfield in attribs:
-            if getattr(suite, dbfield) is not None:
-                out.write("%s: %s\n" % (key, getattr(suite, dbfield)))
+            # Hack to skip NULL Version fields as we used to do this
+            # We should probably just always ignore anything which is None
+            if key in ("Version", "Changelogs") and getattr(suite, dbfield) is None:
+                continue
+
+            out.write("%s: %s\n" % (key, getattr(suite, dbfield)))
 
         out.write("Date: %s\n" % (time.strftime("%a, %d %b %Y %H:%M:%S UTC", time.gmtime(time.time()))))
 
@@ -172,11 +322,9 @@ class ReleaseWriter(object):
 
         out.write("Architectures: %s\n" % (" ".join([a.arch_string for a in architectures])))
 
-        ## FIXME: Components need to be adjusted to whatever will be in the db
-        ## Needs putting in the DB
-        components = ['main', 'contrib', 'non-free']
+        components = [ c.component_name for c in suite.components ]
 
-        out.write("Components: %s\n" % ( " ".join(map(lambda x: "%s%s" % (suite_suffix, x), components ))))
+        out.write("Components: %s\n" % (" ".join(components)))
 
         # For exact compatibility with old g-r, write out Description here instead
         # of with the rest of the DB fields above
@@ -184,7 +332,7 @@ class ReleaseWriter(object):
             out.write("Description: %s\n" % suite.description)
 
         for comp in components:
-            for dirpath, dirnames, filenames in os.walk("%sdists/%s/%s%s" % (cnf["Dir::Root"], suite.suite_name, suite_suffix, comp), topdown=True):
+            for dirpath, dirnames, filenames in os.walk(os.path.join(self.suite_path(), comp), topdown=True):
                 if not re_gensubrelease.match(dirpath):
                     continue
 
@@ -216,11 +364,9 @@ class ReleaseWriter(object):
         # their checksums to the main Release file
         oldcwd = os.getcwd()
 
-        os.chdir("%sdists/%s/%s" % (cnf["Dir::Root"], suite.suite_name, suite_suffix))
+        os.chdir(self.suite_path())
 
-        hashfuncs = { 'MD5Sum' : apt_pkg.md5sum,
-                      'SHA1' : apt_pkg.sha1sum,
-                      'SHA256' : apt_pkg.sha256sum }
+        hashes = [x for x in RELEASE_HASHES if x.db_name in suite.checksums]
 
         fileinfo = {}
 
@@ -242,25 +388,22 @@ class ReleaseWriter(object):
                 # If we find a file for which we have a compressed version and
                 # haven't yet seen the uncompressed one, store the possibility
                 # for future use
-                if entry.endswith(".gz") and entry[:-3] not in uncompnotseen.keys():
+                if entry.endswith(".gz") and filename[:-3] not in uncompnotseen:
                     uncompnotseen[filename[:-3]] = (gzip.GzipFile, filename)
-                elif entry.endswith(".bz2") and entry[:-4] not in uncompnotseen.keys():
+                elif entry.endswith(".bz2") and filename[:-4] not in uncompnotseen:
                     uncompnotseen[filename[:-4]] = (bz2.BZ2File, filename)
+                elif entry.endswith(".xz") and filename[:-3] not in uncompnotseen:
+                    uncompnotseen[filename[:-3]] = (XzFile, filename)
 
                 fileinfo[filename]['len'] = len(contents)
 
-                for hf, func in hashfuncs.items():
-                    fileinfo[filename][hf] = func(contents)
+                for hf in hashes:
+                    fileinfo[filename][hf.release_field] = hf.func(contents)
 
         for filename, comp in uncompnotseen.items():
             # If we've already seen the uncompressed file, we don't
             # need to do anything again
-            if filename in fileinfo.keys():
-                continue
-
-            # Skip uncompressed Contents files as they're huge, take ages to
-            # checksum and we checksum the compressed ones anyways
-            if os.path.basename(filename).startswith("Contents"):
+            if filename in fileinfo:
                 continue
 
             fileinfo[filename] = {}
@@ -270,16 +413,21 @@ class ReleaseWriter(object):
 
             fileinfo[filename]['len'] = len(contents)
 
-            for hf, func in hashfuncs.items():
-                fileinfo[filename][hf] = func(contents)
+            for hf in hashes:
+                fileinfo[filename][hf.release_field] = hf.func(contents)
 
 
-        for h in sorted(hashfuncs.keys()):
-            out.write('%s:\n' % h)
+        for field in sorted(h.release_field for h in hashes):
+            out.write('%s:\n' % field)
             for filename in sorted(fileinfo.keys()):
-                out.write(" %s %8d %s\n" % (fileinfo[filename][h], fileinfo[filename]['len'], filename))
+                out.write(" %s %8d %s\n" % (fileinfo[filename][field], fileinfo[filename]['len'], filename))
 
         out.close()
+        os.rename(outfile + '.new', outfile)
+
+        self._update_hashfile_table(session, fileinfo, hashes)
+        if suite.byhash:
+            self._make_byhash_links(fileinfo, hashes)
 
         sign_release_dir(suite, os.path.dirname(outfile))
 
@@ -289,25 +437,29 @@ class ReleaseWriter(object):
 
 
 def main ():
-    global Logger, results
+    global Logger
 
     cnf = Config()
 
-    for i in ["Help", "Suite", "Force"]:
+    for i in ["Help", "Suite", "Force", "Quiet"]:
         if not cnf.has_key("Generate-Releases::Options::%s" % (i)):
             cnf["Generate-Releases::Options::%s" % (i)] = ""
 
     Arguments = [('h',"help","Generate-Releases::Options::Help"),
+                 ('a','archive','Generate-Releases::Options::Archive','HasArg'),
                  ('s',"suite","Generate-Releases::Options::Suite"),
-                 ('f',"force","Generate-Releases::Options::Force")]
+                 ('f',"force","Generate-Releases::Options::Force"),
+                 ('q',"quiet","Generate-Releases::Options::Quiet"),
+                 ('o','option','','ArbItem')]
 
-    suite_names = apt_pkg.ParseCommandLine(cnf.Cnf, Arguments, sys.argv)
-    Options = cnf.SubTree("Generate-Releases::Options")
+    suite_names = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
+    Options = cnf.subtree("Generate-Releases::Options")
 
     if Options["Help"]:
         usage()
 
-    Logger = daklog.Logger(cnf, 'generate-releases')
+    Logger = daklog.Logger('generate-releases')
+    pool = DakProcessPool()
 
     session = DBConn().session()
 
@@ -321,13 +473,12 @@ def main ():
                 print "cannot find suite %s" % s
                 Logger.log(['cannot find suite %s' % s])
     else:
-        suites = session.query(Suite).filter(Suite.untouchable == False).all()
+        query = session.query(Suite).filter(Suite.untouchable == False)
+        if 'Archive' in Options:
+            query = query.join(Suite.archive).filter(Archive.archive_name==Options['Archive'])
+        suites = query.all()
 
     broken=[]
-    # For each given suite, run one process
-    results = []
-
-    pool = Pool()
 
     for s in suites:
         # Setup a multiprocessing Pool. As many workers as we have CPU cores.
@@ -335,20 +486,20 @@ def main ():
             print "Skipping %s (untouchable)" % s.suite_name
             continue
 
-        print "Processing %s" % s.suite_name
+        if not Options["Quiet"]:
+            print "Processing %s" % s.suite_name
         Logger.log(['Processing release file for Suite: %s' % (s.suite_name)])
-        pool.apply_async(generate_helper, (s.suite_id, ), callback=get_result)
+        pool.apply_async(generate_helper, (s.suite_id, ))
 
     # No more work will be added to our pool, close it and then wait for all to finish
     pool.close()
     pool.join()
 
-    retcode = 0
+    retcode = pool.overall_status()
 
-    if len(results) > 0:
-        Logger.log(['Release file generation broken: %s' % (results)])
-        print "Release file generation broken:\n", '\n'.join(results)
-        retcode = 1
+    if retcode > 0:
+        # TODO: CENTRAL FUNCTION FOR THIS / IMPROVE LOGGING
+        Logger.log(['Release file generation broken: %s' % (','.join([str(x[1]) for x in pool.results]))])
 
     Logger.close()
 
@@ -360,13 +511,12 @@ def generate_helper(suite_id):
     '''
     session = DBConn().session()
     suite = Suite.get(suite_id, session)
-    try:
-        rw = ReleaseWriter(suite)
-        rw.generate_release_files()
-    except Exception, e:
-        return str(e)
 
-    return
+    # We allow the process handler to catch and deal with any exceptions
+    rw = ReleaseWriter(suite)
+    rw.generate_release_files()
+
+    return (PROC_STATUS_SUCCESS, 'Release file written for %s' % suite.suite_name)
 
 #######################################################################################