]> git.decadent.org.uk Git - dak.git/blobdiff - dak/generate_index_diffs.py
Merge commit 'stew/popen2-must-die' into merge
[dak.git] / dak / generate_index_diffs.py
index 3981f6d3d982b47a689508113c095cc4c559a94b..acf6b5f36f153e69372e1fcca9bdc4b75e608df9 100755 (executable)
@@ -32,7 +32,8 @@
 
 import sys, os, tempfile
 import apt_pkg
-import daklib.utils
+import bz2, gzip, time
+from daklib import utils
 
 ################################################################################
 
@@ -78,16 +79,29 @@ def smartlink(f, t):
         print "missing: %s" % (f)
         raise IOError, f
 
-def smartopen(file):
-    if os.path.isfile(file):
-        f = open(file, "r")
-    elif os.path.isfile("%s.gz" % file):
-        f = create_temp_file(os.popen("zcat %s.gz" % file, "r"))
-    elif os.path.isfile("%s.bz2" % file):
-        f = create_temp_file(os.popen("bzcat %s.bz2" % file, "r"))
+def smartread(filename):
+    """
+    If filename exists, slurp the contents into a string.
+    if filename.gz or filename.bz2 exists instead, decompress and slurp
+    It returns a tuple of (filename, filecontents)
+    """
+    actual_filename = None
+    contents = None
+    if os.path.isfile(filename):
+        f = open(filename, "r")
+    elif os.path.isfile("%s.gz" % filename):
+        actual_filename = "%s.gz" % filename
+        f = decompressors['zcat'](actual_filename)
+    elif os.path.isfile("%s.bz2" % filename):
+        actual_filename = "%s.bz2" % filename
+        f = decompressors['bzcat'](actual_filename)
     else:
         f = None
-    return f
+
+    if f:
+        contents = f.read()
+
+    return (actual_filename, contents)
 
 def pipe_file(f, t):
     f.seek(0)
@@ -107,51 +121,51 @@ class Updates:
         self.filesizesha1 = None
 
         if readpath:
-          try:
-            f = open(readpath + "/Index")
-            x = f.readline()
+            try:
+                f = open(readpath + "/Index")
+                x = f.readline()
 
-            def read_hashs(ind, f, self, x=x):
-                while 1:
-                    x = f.readline()
-                    if not x or x[0] != " ": break
+                def read_hashs(ind, f, self, x=x):
+                    while 1:
+                        x = f.readline()
+                        if not x or x[0] != " ": break
+                        l = x.split()
+                        if not self.history.has_key(l[2]):
+                            self.history[l[2]] = [None,None]
+                            self.history_order.append(l[2])
+                        self.history[l[2]][ind] = (l[0], int(l[1]))
+                    return x
+
+                while x:
                     l = x.split()
-                    if not self.history.has_key(l[2]):
-                        self.history[l[2]] = [None,None]
-                       self.history_order.append(l[2])
-                    self.history[l[2]][ind] = (l[0], int(l[1]))
-                return x
-
-            while x:
-                l = x.split()
 
-                if len(l) == 0:
-                    x = f.readline()
-                    continue
+                    if len(l) == 0:
+                        x = f.readline()
+                        continue
 
-                if l[0] == "SHA1-History:":
-                    x = read_hashs(0,f,self)
-                    continue
+                    if l[0] == "SHA1-History:":
+                        x = read_hashs(0,f,self)
+                        continue
 
-                if l[0] == "SHA1-Patches:":
-                    x = read_hashs(1,f,self)
-                    continue
+                    if l[0] == "SHA1-Patches:":
+                        x = read_hashs(1,f,self)
+                        continue
 
-                if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
-                    self.can_path = l[1]
+                    if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
+                        self.can_path = l[1]
 
-                if l[0] == "SHA1-Current:" and len(l) == 3:
-                    self.filesizesha1 = (l[1], int(l[2]))
+                    if l[0] == "SHA1-Current:" and len(l) == 3:
+                        self.filesizesha1 = (l[1], int(l[2]))
 
-                x = f.readline()
+                    x = f.readline()
 
-          except IOError:
-            0
+            except IOError:
+                0
 
     def dump(self, out=sys.stdout):
         if self.can_path:
             out.write("Canonical-Path: %s\n" % (self.can_path))
-        
+
         if self.filesizesha1:
             out.write("SHA1-Current: %s %7d\n" % (self.filesizesha1))
 
@@ -164,7 +178,7 @@ class Updates:
                 tryunlink("%s/%s.gz" % (self.readpath, h))
                 del hs[h]
             l = l[cnt-self.max:]
-           self.history_order = l[:]
+            self.history_order = l[:]
 
         out.write("SHA1-History:\n")
         for h in l:
@@ -173,17 +187,14 @@ class Updates:
         for h in l:
             out.write(" %s %7d %s\n" % (hs[h][1][0], hs[h][1][1], h))
 
-def create_temp_file(r):
-    f = tempfile.TemporaryFile()
-    while 1:
-        x = r.readline()
-        if not x: break
-        f.write(x)
-    r.close()
-    del x,r
-    f.flush()
-    f.seek(0)
-    return f
+decompressors = { 'zcat' : gzip.GzipFile,
+                  'bzip2' : bz2.BZ2File }
+
+def sizesha1_str(s):
+    """
+    given a string, return a tuple containing its (sha1sum, length)
+    """
+    return (apt_pkg.sha1sum(s), len( s ) )
 
 def sizesha1(f):
     size = os.fstat(f.fileno())[6]
@@ -192,8 +203,7 @@ def sizesha1(f):
     return (sha1sum, size)
 
 def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 14):
-    if Options.has_key("NoAct"): 
-        print "not doing anything"
+    if Options.has_key("NoAct"):
         return
 
     patchname = Options["PatchName"]
@@ -213,28 +223,29 @@ def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 14):
     (oldext, oldstat) = smartstat(oldfile)
     (origext, origstat) = smartstat(origfile)
     if not origstat:
-        print "%s doesn't exist" % (origfile)
+        print "%s: doesn't exist" % (origfile)
         return
     if not oldstat:
-        print "initial run"
+        print "%s: initial run" % (origfile)
         os.link(origfile + origext, oldfile + origext)
         return
 
     if oldstat[1:3] == origstat[1:3]:
-        print "hardlink unbroken, assuming unchanged"
+        print "%s: hardlink unbroken, assuming unchanged" % (origfile)
         return
 
-    oldf = smartopen(oldfile)
-    oldsizesha1 = sizesha1(oldf)
+    (oldf,contents) = smartread(oldfile)
+    oldsizesha1 = sizesha1_str(contents)
 
     # should probably early exit if either of these checks fail
     # alternatively (optionally?) could just trim the patch history
 
     if upd.filesizesha1:
         if upd.filesizesha1 != oldsizesha1:
-            print "old file seems to have changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
+            print "warning: old file seems to have changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
 
     # XXX this should be usable now
+    # stew: whatever this is, it won't be usable now that i removed smartopen
     #
     #for d in upd.history.keys():
     #    df = smartopen("%s/%s" % (outdir,d))
@@ -255,19 +266,17 @@ def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 14):
 
     if newsizesha1 == oldsizesha1:
         os.unlink(newfile)
-        oldf.close()
-        print "file unchanged, not generating diff"
+#        oldf.close()
+        print "%s: unchanged" % (origfile)
     else:
         if not os.path.isdir(outdir): os.mkdir(outdir)
-        print "generating diff"
-        w = os.popen("diff --ed - %s | gzip -c -9 > %s.gz" % 
-                         (newfile, difffile), "w")
-        pipe_file(oldf, w)
-        oldf.close()
+        os.popen("diff --ed %s %s | gzip -c -9 > %s.gz" %
+                 (oldf, newfile, difffile))
+#        pipe_file(oldf, w)
+#        oldf.close()
 
-        difff = smartopen(difffile)
-        difsizesha1 = sizesha1(difff)
-        difff.close()
+        (oldf,contents) = smartread(difffile)
+        difsizesha1 = sizesha1_str(contents)
 
         upd.history[patchname] = (oldsizesha1, difsizesha1)
         upd.history_order.append(patchname)
@@ -288,14 +297,14 @@ def main():
 
     os.umask(0002)
 
-    Cnf = daklib.utils.get_conf()
+    Cnf = utils.get_conf()
     Arguments = [ ('h', "help", "Generate-Index-Diffs::Options::Help"),
                   ('c', None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
                   ('p', "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
                   ('r', "rootdir", "Generate-Index-Diffs::Options::RootDir", "hasArg"),
                   ('d', "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
                   ('m', "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
-                 ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
+                  ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
                 ]
     suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
     Options = Cnf.SubTree("Generate-Index-Diffs::Options")
@@ -308,13 +317,10 @@ def main():
 
     if not Options.has_key("PatchName"):
         format = "%Y-%m-%d-%H%M.%S"
-        i,o = os.popen2("date +%s" % (format))
-        i.close()
-        Options["PatchName"] = o.readline()[:-1]
-        o.close()
+        Options["PatchName"] = time.strftime( format )
 
     AptCnf = apt_pkg.newConfiguration()
-    apt_pkg.ReadConfigFileISC(AptCnf,daklib.utils.which_apt_conf_file())
+    apt_pkg.ReadConfigFileISC(AptCnf,utils.which_apt_conf_file())
 
     if Options.has_key("RootDir"): Cnf["Dir::Root"] = Options["RootDir"]
 
@@ -322,8 +328,6 @@ def main():
         suites = Cnf.SubTree("Suite").List()
 
     for suite in suites:
-        if suite == "Experimental": continue
-
         print "Processing: " + suite
         SuiteBlock = Cnf.SubTree("Suite::" + suite)
 
@@ -353,7 +357,7 @@ def main():
         elif AptCnf.has_key("bindirectory::%s" % (tree)):
             sections = AptCnf["bindirectory::%s::Sections" % (tree)].split()
         else:
-            aptcnf_filename = os.path.basename(daklib.utils.which_apt_conf_file())
+            aptcnf_filename = os.path.basename(utils.which_apt_conf_file())
             print "ALERT: suite %s not in %s, nor untouchable!" % (suite, aptcnf_filename)
             continue
 
@@ -366,7 +370,6 @@ def main():
                 file = "%s/Contents-%s" % (Cnf["Dir::Root"] + tree,
                         architecture)
                 storename = "%s/%s_contents_%s" % (Options["TempDir"], suite, architecture)
-                print "running contents for %s %s : " % (suite, architecture),
                 genchanges(Options, file + ".diff", storename, file, \
                   Cnf.get("Suite::%s::Generate-Index-Diffs::MaxDiffs::Contents" % (suite), maxcontents))
 
@@ -387,7 +390,6 @@ def main():
                 file = "%s/%s/%s/%s" % (Cnf["Dir::Root"] + tree,
                            component, longarch, packages)
                 storename = "%s/%s_%s_%s" % (Options["TempDir"], suite, component, architecture)
-                print "running for %s %s %s : " % (suite, component, architecture),
                 genchanges(Options, file + ".diff", storename, file, \
                   Cnf.get("Suite::%s::Generate-Index-Diffs::MaxDiffs::%s" % (suite, packages), maxsuite))