]> git.decadent.org.uk Git - dak.git/blobdiff - dak/generate_index_diffs.py
Read files in chunks
[dak.git] / dak / generate_index_diffs.py
index 21c631b9a39ba25eef58d3f879bf7bdcc9a406c0..485d3770ff6c60bb3e70cf89ba10f9110e241126 100755 (executable)
 import sys
 import os
 import tempfile
-import subprocess
 import time
 import apt_pkg
+import glob
 
 from daklib import utils
-from daklib.dbconn import get_suite, get_suite_architectures
+from daklib.dbconn import Archive, Component, DBConn, Suite, get_suite, get_suite_architectures
+#from daklib.regexes import re_includeinpdiff
+import re
+re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))")
 
 ################################################################################
 
@@ -54,13 +57,15 @@ def usage (exit_code=0):
 Write out ed-style diffs to Packages/Source lists
 
   -h, --help            show this help and exit
+  -a <archive>          generate diffs for suites in <archive>
   -c                    give the canonical path of the file
   -p                    name for the patch (defaults to current time)
+  -d                    name for the hardlink farm for status
+  -m                    how many diffs to generate
   -n                    take no action
     """
     sys.exit(exit_code)
 
-
 def tryunlink(file):
     try:
         os.unlink(file)
@@ -68,7 +73,7 @@ def tryunlink(file):
         print "warning: removing of %s denied" % (file)
 
 def smartstat(file):
-    for ext in ["", ".gz", ".bz2"]:
+    for ext in ["", ".gz", ".bz2", ".xz"]:
         if os.path.isfile(file + ext):
             return (ext, os.stat(file + ext))
     return (None, None)
@@ -80,9 +85,11 @@ def smartlink(f, t):
         os.system("gzip -d < %s.gz > %s" % (f, t))
     elif os.path.isfile("%s.bz2" % (f)):
         os.system("bzip2 -d < %s.bz2 > %s" % (f, t))
+    elif os.path.isfile("%s.xz" % (f)):
+        os.system("xz -d < %s.xz > %s" % (f, t))
     else:
         print "missing: %s" % (f)
-        raise IOError, f
+        raise IOError(f)
 
 def smartopen(file):
     if os.path.isfile(file):
@@ -91,6 +98,8 @@ def smartopen(file):
         f = create_temp_file(os.popen("zcat %s.gz" % file, "r"))
     elif os.path.isfile("%s.bz2" % file):
         f = create_temp_file(os.popen("bzcat %s.bz2" % file, "r"))
+    elif os.path.isfile("%s.xz" % file):
+        f = create_temp_file(os.popen("xzcat %s.xz" % file, "r"))
     else:
         f = None
     return f
@@ -98,34 +107,42 @@ def smartopen(file):
 def pipe_file(f, t):
     f.seek(0)
     while 1:
-        l = f.read()
+        l = f.read(65536)
         if not l: break
         t.write(l)
     t.close()
 
 class Updates:
-    def __init__(self, readpath = None, max = 14):
+    def __init__(self, readpath = None, max = 56):
         self.can_path = None
         self.history = {}
         self.history_order = []
         self.max = max
         self.readpath = readpath
-        self.filesizesha1 = None
+        self.filesizehashes = None
 
         if readpath:
             try:
                 f = open(readpath + "/Index")
                 x = f.readline()
 
-                def read_hashs(ind, f, self, x=x):
+                def read_hashs(ind, hashind, f, self, x=x):
                     while 1:
                         x = f.readline()
                         if not x or x[0] != " ": break
                         l = x.split()
-                        if not self.history.has_key(l[2]):
-                            self.history[l[2]] = [None,None]
-                            self.history_order.append(l[2])
-                        self.history[l[2]][ind] = (l[0], int(l[1]))
+                        fname = l[2]
+                        if fname.endswith('.gz'):
+                            fname = fname[:-3]
+                        if not self.history.has_key(fname):
+                            self.history[fname] = [None,None,None]
+                            self.history_order.append(fname)
+                        if not self.history[fname][ind]:
+                            self.history[fname][ind] = (int(l[1]), None, None)
+                        if hashind == 1:
+                            self.history[fname][ind] = (int(self.history[fname][ind][0]), l[0], self.history[fname][ind][2])
+                        else:
+                            self.history[fname][ind] = (int(self.history[fname][ind][0]), self.history[fname][ind][1], l[0])
                     return x
 
                 while x:
@@ -136,18 +153,41 @@ class Updates:
                         continue
 
                     if l[0] == "SHA1-History:":
-                        x = read_hashs(0,f,self)
+                        x = read_hashs(0,1,f,self)
+                        continue
+
+                    if l[0] == "SHA256-History:":
+                        x = read_hashs(0,2,f,self)
                         continue
 
                     if l[0] == "SHA1-Patches:":
-                        x = read_hashs(1,f,self)
+                        x = read_hashs(1,1,f,self)
+                        continue
+
+                    if l[0] == "SHA256-Patches:":
+                        x = read_hashs(1,2,f,self)
+                        continue
+
+                    if l[0] == "SHA1-Download:":
+                        x = read_hashs(2,1,f,self)
+                        continue
+
+                    if l[0] == "SHA256-Download:":
+                        x = read_hashs(2,2,f,self)
                         continue
 
                     if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
                         self.can_path = l[1]
 
                     if l[0] == "SHA1-Current:" and len(l) == 3:
-                        self.filesizesha1 = (l[1], int(l[2]))
+                        if not self.filesizehashes:
+                            self.filesizehashes = (int(l[2]), None, None)
+                        self.filesizehashes = (int(self.filesizehashes[0]), l[1], self.filesizehashes[2])
+
+                    if l[0] == "SHA256-Current:" and len(l) == 3:
+                        if not self.filesizehashes:
+                            self.filesizehashes = (int(l[2]), None, None)
+                        self.filesizehashes = (int(self.filesizehashes[0]), self.filesizehashes[2], l[1])
 
                     x = f.readline()
 
@@ -158,8 +198,11 @@ class Updates:
         if self.can_path:
             out.write("Canonical-Path: %s\n" % (self.can_path))
 
-        if self.filesizesha1:
-            out.write("SHA1-Current: %s %7d\n" % (self.filesizesha1))
+        if self.filesizehashes:
+            if self.filesizehashes[1]:
+                out.write("SHA1-Current: %s %7d\n" % (self.filesizehashes[1], self.filesizehashes[0]))
+            if self.filesizehashes[2]:
+                out.write("SHA256-Current: %s %7d\n" % (self.filesizehashes[2], self.filesizehashes[0]))
 
         hs = self.history
         l = self.history_order[:]
@@ -174,15 +217,33 @@ class Updates:
 
         out.write("SHA1-History:\n")
         for h in l:
-            out.write(" %s %7d %s\n" % (hs[h][0][0], hs[h][0][1], h))
+            if hs[h][0] and hs[h][0][1]:
+                out.write(" %s %7d %s\n" % (hs[h][0][1], hs[h][0][0], h))
+        out.write("SHA256-History:\n")
+        for h in l:
+            if hs[h][0] and hs[h][0][2]:
+                out.write(" %s %7d %s\n" % (hs[h][0][2], hs[h][0][0], h))
         out.write("SHA1-Patches:\n")
         for h in l:
-            out.write(" %s %7d %s\n" % (hs[h][1][0], hs[h][1][1], h))
+            if hs[h][1] and hs[h][1][1]:
+                out.write(" %s %7d %s\n" % (hs[h][1][1], hs[h][1][0], h))
+        out.write("SHA256-Patches:\n")
+        for h in l:
+            if hs[h][1] and hs[h][1][2]:
+                out.write(" %s %7d %s\n" % (hs[h][1][2], hs[h][1][0], h))
+        out.write("SHA1-Download:\n")
+        for h in l:
+            if hs[h][2] and hs[h][2][1]:
+                out.write(" %s %7d %s.gz\n" % (hs[h][2][1], hs[h][2][0], h))
+        out.write("SHA256-Download:\n")
+        for h in l:
+            if hs[h][2] and hs[h][2][2]:
+                out.write(" %s %7d %s.gz\n" % (hs[h][2][2], hs[h][2][0], h))
 
 def create_temp_file(r):
     f = tempfile.TemporaryFile()
     while 1:
-        x = r.readline()
+        x = r.read(65536)
         if not x: break
         f.write(x)
     r.close()
@@ -191,14 +252,17 @@ def create_temp_file(r):
     f.seek(0)
     return f
 
-def sizesha1(f):
+def sizehashes(f):
     size = os.fstat(f.fileno())[6]
     f.seek(0)
     sha1sum = apt_pkg.sha1sum(f)
-    return (sha1sum, size)
+    f.seek(0)
+    sha256sum = apt_pkg.sha256sum(f)
+    return (size, sha1sum, sha256sum)
 
-def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 14):
+def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 56):
     if Options.has_key("NoAct"):
+        print "Not acting on: od: %s, oldf: %s, origf: %s, md: %s" % (outdir, oldfile, origfile, maxdiffs)
         return
 
     patchname = Options["PatchName"]
@@ -226,77 +290,81 @@ def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 14):
         return
 
     if oldstat[1:3] == origstat[1:3]:
-        print "%s: hardlink unbroken, assuming unchanged" % (origfile)
+        #print "%s: hardlink unbroken, assuming unchanged" % (origfile)
         return
 
     oldf = smartopen(oldfile)
-    oldsizesha1 = sizesha1(oldf)
+    oldsizehashes = sizehashes(oldf)
 
     # should probably early exit if either of these checks fail
     # alternatively (optionally?) could just trim the patch history
 
-    if upd.filesizesha1:
-        if upd.filesizesha1 != oldsizesha1:
-            print "info: old file " + oldfile + " changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
+    #if upd.filesizesha1:
+    #    if upd.filesizesha1 != oldsizesha1:
+    #        print "info: old file " + oldfile + " changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
 
     if Options.has_key("CanonicalPath"): upd.can_path=Options["CanonicalPath"]
 
     if os.path.exists(newfile): os.unlink(newfile)
     smartlink(origfile, newfile)
     newf = open(newfile, "r")
-    newsizesha1 = sizesha1(newf)
+    newsizehashes = sizehashes(newf)
     newf.close()
 
-    if newsizesha1 == oldsizesha1:
+    if newsizehashes == oldsizehashes:
         os.unlink(newfile)
         oldf.close()
-        print "%s: unchanged" % (origfile)
+        #print "%s: unchanged" % (origfile)
     else:
         if not os.path.isdir(outdir):
             os.mkdir(outdir)
 
-        w = os.popen("diff --ed - %s | gzip -c -9 > %s.gz" %
+        w = os.popen("diff --ed - %s | gzip --rsyncable -c -9 > %s.gz" %
                      (newfile, difffile), "w")
         pipe_file(oldf, w)
         oldf.close()
 
         difff = smartopen(difffile)
-        difsizesha1 = sizesha1(difff)
+        difsizehashes = sizehashes(difff)
         difff.close()
 
-        upd.history[patchname] = (oldsizesha1, difsizesha1)
+        difffgz = open(difffile + ".gz", "r")
+        difgzsizehashes = sizehashes(difffgz)
+        difffgz.close()
+
+        upd.history[patchname] = (oldsizehashes, difsizehashes, difgzsizehashes)
         upd.history_order.append(patchname)
 
-        upd.filesizesha1 = newsizesha1
+        upd.filesizehashes = newsizehashes
 
         os.unlink(oldfile + oldext)
         os.link(origfile + origext, oldfile + origext)
         os.unlink(newfile)
 
-        f = open(outdir + "/Index", "w")
-        upd.dump(f)
-        f.close()
+        with open(outdir + "/Index.new", "w") as f:
+            upd.dump(f)
+        os.rename(outdir + "/Index.new", outdir + "/Index")
 
 
 def main():
     global Cnf, Options, Logger
 
-    os.umask(0002)
+    os.umask(0o002)
 
     Cnf = utils.get_conf()
     Arguments = [ ('h', "help", "Generate-Index-Diffs::Options::Help"),
+                  ('a', 'archive', 'Generate-Index-Diffs::Options::Archive', 'hasArg'),
                   ('c', None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
                   ('p', "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
-                  ('r', "rootdir", "Generate-Index-Diffs::Options::RootDir", "hasArg"),
                   ('d', "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
                   ('m', "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
                   ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
                 ]
-    suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
-    Options = Cnf.SubTree("Generate-Index-Diffs::Options")
+    suites = apt_pkg.parse_commandline(Cnf,Arguments,sys.argv)
+    Options = Cnf.subtree("Generate-Index-Diffs::Options")
     if Options.has_key("Help"): usage()
 
-    maxdiffs = Options.get("MaxDiffs::Default", "14")
+    maxdiffs = Options.get("MaxDiffs::Default", "56")
     maxpackages = Options.get("MaxDiffs::Packages", maxdiffs)
     maxcontents = Options.get("MaxDiffs::Contents", maxdiffs)
     maxsources = Options.get("MaxDiffs::Sources", maxdiffs)
@@ -305,19 +373,18 @@ def main():
         format = "%Y-%m-%d-%H%M.%S"
         Options["PatchName"] = time.strftime( format )
 
-    AptCnf = apt_pkg.newConfiguration()
-    apt_pkg.ReadConfigFileISC(AptCnf,utils.which_apt_conf_file())
-
-    if Options.has_key("RootDir"): Cnf["Dir::Root"] = Options["RootDir"]
+    session = DBConn().session()
 
     if not suites:
-        suites = Cnf.SubTree("Suite").List()
+        query = session.query(Suite.suite_name)
+        if Options.get('Archive'):
+            query = query.join(Suite.archive).filter(Archive.archive_name == Options['Archive'])
+        suites = [ s.suite_name for s in query ]
 
     for suitename in suites:
         print "Processing: " + suitename
-        SuiteBlock = Cnf.SubTree("Suite::" + suitename)
 
-        suiteobj = get_suite(suitename.lower())
+        suiteobj = get_suite(suitename.lower(), session=session)
 
         # Use the canonical version of the suite name
         suite = suiteobj.suite_name
@@ -326,60 +393,58 @@ def main():
             print "Skipping: " + suite + " (untouchable)"
             continue
 
-        architectures = get_suite_architectures(suite, skipall=True)
+        architectures = get_suite_architectures(suite, skipall=True, session=session)
+        components = [ c.component_name for c in session.query(Component.component_name) ]
 
-        if SuiteBlock.has_key("Components"):
-            components = SuiteBlock.ValueList("Components")
-        else:
-            components = []
-
-        suite_suffix = Cnf.Find("Dinstall::SuiteSuffix")
+        suite_suffix = Cnf.find("Dinstall::SuiteSuffix")
         if components and suite_suffix:
             longsuite = suite + "/" + suite_suffix
         else:
             longsuite = suite
 
-        tree = SuiteBlock.get("Tree", "dists/%s" % (longsuite))
-
-        if AptCnf.has_key("tree::%s" % (tree)):
-            sections = AptCnf["tree::%s::Sections" % (tree)].split()
-        elif AptCnf.has_key("bindirectory::%s" % (tree)):
-            sections = AptCnf["bindirectory::%s::Sections" % (tree)].split()
-        else:
-            aptcnf_filename = os.path.basename(utils.which_apt_conf_file())
-            print "ALERT: suite %s not in %s, nor untouchable!" % (suite, aptcnf_filename)
-            continue
+        tree = os.path.join(suiteobj.archive.path, 'dists', longsuite)
+
+        # See if there are Translations which might need a new pdiff
+        cwd = os.getcwd()
+        for component in components:
+            #print "DEBUG: Working on %s" % (component)
+            workpath=os.path.join(tree, component, "i18n")
+            if os.path.isdir(workpath):
+                os.chdir(workpath)
+                for dirpath, dirnames, filenames in os.walk(".", followlinks=True, topdown=True):
+                    for entry in filenames:
+                        if not re_includeinpdiff.match(entry):
+                            #print "EXCLUDING %s" % (entry)
+                            continue
+                        (fname, fext) = os.path.splitext(entry)
+                        processfile=os.path.join(workpath, fname)
+                        #print "Working: %s" % (processfile)
+                        storename="%s/%s_%s_%s" % (Options["TempDir"], suite, component, fname)
+                        #print "Storefile: %s" % (storename)
+                        genchanges(Options, processfile + ".diff", storename, processfile, maxdiffs)
+        os.chdir(cwd)
 
         for archobj in architectures:
             architecture = archobj.arch_string
 
-            if architecture != "source":
-                # Process Contents
-                file = "%s/Contents-%s" % (Cnf["Dir::Root"] + tree,
-                        architecture)
-                storename = "%s/%s_contents_%s" % (Options["TempDir"], suite, architecture)
-                genchanges(Options, file + ".diff", storename, file, \
-                  Cnf.get("Suite::%s::Generate-Index-Diffs::MaxDiffs::Contents" % (suite), maxcontents))
-
-            # use sections instead of components since dak.conf
-            # treats "foo/bar main" as suite "foo", suitesuffix "bar" and
-            # component "bar/main". suck.
-
-            for component in sections:
+            for component in components:
                 if architecture == "source":
                     longarch = architecture
                     packages = "Sources"
                     maxsuite = maxsources
                 else:
-                    longarch = "binary-%s"% (architecture)
+                    longarch = "binary-%s" % (architecture)
                     packages = "Packages"
                     maxsuite = maxpackages
 
-                file = "%s/%s/%s/%s" % (Cnf["Dir::Root"] + tree,
-                           component, longarch, packages)
+                # Process Contents
+                file = "%s/%s/Contents-%s" % (tree, component, architecture)
+                storename = "%s/%s_%s_contents_%s" % (Options["TempDir"], suite, component, architecture)
+                genchanges(Options, file + ".diff", storename, file, maxcontents)
+
+                file = "%s/%s/%s/%s" % (tree, component, longarch, packages)
                 storename = "%s/%s_%s_%s" % (Options["TempDir"], suite, component, architecture)
-                genchanges(Options, file + ".diff", storename, file, \
-                  Cnf.get("Suite::%s::Generate-Index-Diffs::MaxDiffs::%s" % (suite, packages), maxsuite))
+                genchanges(Options, file + ".diff", storename, file, maxsuite)
 
 ################################################################################