]> git.decadent.org.uk Git - dak.git/blob - dak/generate_index_diffs.py
Hopefully enable g-i-d to deal with .xz files
[dak.git] / dak / generate_index_diffs.py
1 #!/usr/bin/env python
2
3 """ generates partial package updates list"""
4
5 ###########################################################
6
7 # idea and basic implementation by Anthony, some changes by Andreas
8 # parts are stolen from 'dak generate-releases'
9 #
10 # Copyright (C) 2004, 2005, 2006  Anthony Towns <aj@azure.humbug.org.au>
11 # Copyright (C) 2004, 2005  Andreas Barth <aba@not.so.argh.org>
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27
28 # < elmo> bah, don't bother me with annoying facts
29 # < elmo> I was on a roll
30
31
32 ################################################################################
33
34 import sys
35 import os
36 import tempfile
37 import time
38 import apt_pkg
39 import glob
40
41 from daklib import utils
42 from daklib.dbconn import Archive, Component, DBConn, Suite, get_suite, get_suite_architectures
43 #from daklib.regexes import re_includeinpdiff
44 import re
45 re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))")
46
47 ################################################################################
48
49 Cnf = None
50 Logger = None
51 Options = None
52
53 ################################################################################
54
55 def usage (exit_code=0):
56     print """Usage: dak generate-index-diffs [OPTIONS] [suites]
57 Write out ed-style diffs to Packages/Source lists
58
59   -h, --help            show this help and exit
60   -a <archive>          generate diffs for suites in <archive>
61   -c                    give the canonical path of the file
62   -p                    name for the patch (defaults to current time)
63   -d                    name for the hardlink farm for status
64   -m                    how many diffs to generate
65   -n                    take no action
66     """
67     sys.exit(exit_code)
68
69 def tryunlink(file):
70     try:
71         os.unlink(file)
72     except OSError:
73         print "warning: removing of %s denied" % (file)
74
75 def smartstat(file):
76     for ext in ["", ".gz", ".bz2", ".xz"]:
77         if os.path.isfile(file + ext):
78             return (ext, os.stat(file + ext))
79     return (None, None)
80
81 def smartlink(f, t):
82     if os.path.isfile(f):
83         os.link(f,t)
84     elif os.path.isfile("%s.gz" % (f)):
85         os.system("gzip -d < %s.gz > %s" % (f, t))
86     elif os.path.isfile("%s.bz2" % (f)):
87         os.system("bzip2 -d < %s.bz2 > %s" % (f, t))
88     elif os.path.isfile("%s.xz" % (f)):
89         os.system("xz -d < %s.xz > %s" % (f, t))
90     else:
91         print "missing: %s" % (f)
92         raise IOError(f)
93
94 def smartopen(file):
95     if os.path.isfile(file):
96         f = open(file, "r")
97     elif os.path.isfile("%s.gz" % file):
98         f = create_temp_file(os.popen("zcat %s.gz" % file, "r"))
99     elif os.path.isfile("%s.bz2" % file):
100         f = create_temp_file(os.popen("bzcat %s.bz2" % file, "r"))
101     elif os.path.isfile("%s.xz" % file):
102         f = create_temp_file(os.popen("xzcat %s.xz" % file, "r"))
103     else:
104         f = None
105     return f
106
107 def pipe_file(f, t):
108     f.seek(0)
109     while 1:
110         l = f.read()
111         if not l: break
112         t.write(l)
113     t.close()
114
115 class Updates:
116     def __init__(self, readpath = None, max = 56):
117         self.can_path = None
118         self.history = {}
119         self.history_order = []
120         self.max = max
121         self.readpath = readpath
122         self.filesizehashes = None
123
124         if readpath:
125             try:
126                 f = open(readpath + "/Index")
127                 x = f.readline()
128
129                 def read_hashs(ind, hashind, f, self, x=x):
130                     while 1:
131                         x = f.readline()
132                         if not x or x[0] != " ": break
133                         l = x.split()
134                         fname = l[2]
135                         if fname.endswith('.gz'):
136                             fname = fname[:-3]
137                         if not self.history.has_key(fname):
138                             self.history[fname] = [None,None,None]
139                             self.history_order.append(fname)
140                         if not self.history[fname][ind]:
141                             self.history[fname][ind] = (int(l[1]), None, None)
142                         if hashind == 1:
143                             self.history[fname][ind] = (int(self.history[fname][ind][0]), l[0], self.history[fname][ind][2])
144                         else:
145                             self.history[fname][ind] = (int(self.history[fname][ind][0]), self.history[fname][ind][1], l[0])
146                     return x
147
148                 while x:
149                     l = x.split()
150
151                     if len(l) == 0:
152                         x = f.readline()
153                         continue
154
155                     if l[0] == "SHA1-History:":
156                         x = read_hashs(0,1,f,self)
157                         continue
158
159                     if l[0] == "SHA256-History:":
160                         x = read_hashs(0,2,f,self)
161                         continue
162
163                     if l[0] == "SHA1-Patches:":
164                         x = read_hashs(1,1,f,self)
165                         continue
166
167                     if l[0] == "SHA256-Patches:":
168                         x = read_hashs(1,2,f,self)
169                         continue
170
171                     if l[0] == "SHA1-Download:":
172                         x = read_hashs(2,1,f,self)
173                         continue
174
175                     if l[0] == "SHA256-Download:":
176                         x = read_hashs(2,2,f,self)
177                         continue
178
179                     if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
180                         self.can_path = l[1]
181
182                     if l[0] == "SHA1-Current:" and len(l) == 3:
183                         if not self.filesizehashes:
184                             self.filesizehashes = (int(l[2]), None, None)
185                         self.filesizehashes = (int(self.filesizehashes[0]), l[1], self.filesizehashes[2])
186
187                     if l[0] == "SHA256-Current:" and len(l) == 3:
188                         if not self.filesizehashes:
189                             self.filesizehashes = (int(l[2]), None, None)
190                         self.filesizehashes = (int(self.filesizehashes[0]), self.filesizehashes[2], l[1])
191
192                     x = f.readline()
193
194             except IOError:
195                 0
196
197     def dump(self, out=sys.stdout):
198         if self.can_path:
199             out.write("Canonical-Path: %s\n" % (self.can_path))
200
201         if self.filesizehashes:
202             if self.filesizehashes[1]:
203                 out.write("SHA1-Current: %s %7d\n" % (self.filesizehashes[1], self.filesizehashes[0]))
204             if self.filesizehashes[2]:
205                 out.write("SHA256-Current: %s %7d\n" % (self.filesizehashes[2], self.filesizehashes[0]))
206
207         hs = self.history
208         l = self.history_order[:]
209
210         cnt = len(l)
211         if cnt > self.max:
212             for h in l[:cnt-self.max]:
213                 tryunlink("%s/%s.gz" % (self.readpath, h))
214                 del hs[h]
215             l = l[cnt-self.max:]
216             self.history_order = l[:]
217
218         out.write("SHA1-History:\n")
219         for h in l:
220             if hs[h][0] and hs[h][0][1]:
221                 out.write(" %s %7d %s\n" % (hs[h][0][1], hs[h][0][0], h))
222         out.write("SHA256-History:\n")
223         for h in l:
224             if hs[h][0] and hs[h][0][2]:
225                 out.write(" %s %7d %s\n" % (hs[h][0][2], hs[h][0][0], h))
226         out.write("SHA1-Patches:\n")
227         for h in l:
228             if hs[h][1] and hs[h][1][1]:
229                 out.write(" %s %7d %s\n" % (hs[h][1][1], hs[h][1][0], h))
230         out.write("SHA256-Patches:\n")
231         for h in l:
232             if hs[h][1] and hs[h][1][2]:
233                 out.write(" %s %7d %s\n" % (hs[h][1][2], hs[h][1][0], h))
234         out.write("SHA1-Download:\n")
235         for h in l:
236             if hs[h][2] and hs[h][2][1]:
237                 out.write(" %s %7d %s.gz\n" % (hs[h][2][1], hs[h][2][0], h))
238         out.write("SHA256-Download:\n")
239         for h in l:
240             if hs[h][2] and hs[h][2][2]:
241                 out.write(" %s %7d %s.gz\n" % (hs[h][2][2], hs[h][2][0], h))
242
243 def create_temp_file(r):
244     f = tempfile.TemporaryFile()
245     while 1:
246         x = r.readline()
247         if not x: break
248         f.write(x)
249     r.close()
250     del x,r
251     f.flush()
252     f.seek(0)
253     return f
254
255 def sizehashes(f):
256     size = os.fstat(f.fileno())[6]
257     f.seek(0)
258     sha1sum = apt_pkg.sha1sum(f)
259     f.seek(0)
260     sha256sum = apt_pkg.sha256sum(f)
261     return (size, sha1sum, sha256sum)
262
263 def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 56):
264     if Options.has_key("NoAct"):
265         print "Not acting on: od: %s, oldf: %s, origf: %s, md: %s" % (outdir, oldfile, origfile, maxdiffs)
266         return
267
268     patchname = Options["PatchName"]
269
270     # origfile = /path/to/Packages
271     # oldfile  = ./Packages
272     # newfile  = ./Packages.tmp
273     # difffile = outdir/patchname
274     # index   => outdir/Index
275
276     # (outdir, oldfile, origfile) = argv
277
278     newfile = oldfile + ".new"
279     difffile = "%s/%s" % (outdir, patchname)
280
281     upd = Updates(outdir, int(maxdiffs))
282     (oldext, oldstat) = smartstat(oldfile)
283     (origext, origstat) = smartstat(origfile)
284     if not origstat:
285         print "%s: doesn't exist" % (origfile)
286         return
287     if not oldstat:
288         print "%s: initial run" % (origfile)
289         os.link(origfile + origext, oldfile + origext)
290         return
291
292     if oldstat[1:3] == origstat[1:3]:
293         #print "%s: hardlink unbroken, assuming unchanged" % (origfile)
294         return
295
296     oldf = smartopen(oldfile)
297     oldsizehashes = sizehashes(oldf)
298
299     # should probably early exit if either of these checks fail
300     # alternatively (optionally?) could just trim the patch history
301
302     #if upd.filesizesha1:
303     #    if upd.filesizesha1 != oldsizesha1:
304     #        print "info: old file " + oldfile + " changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
305
306     if Options.has_key("CanonicalPath"): upd.can_path=Options["CanonicalPath"]
307
308     if os.path.exists(newfile): os.unlink(newfile)
309     smartlink(origfile, newfile)
310     newf = open(newfile, "r")
311     newsizehashes = sizehashes(newf)
312     newf.close()
313
314     if newsizehashes == oldsizehashes:
315         os.unlink(newfile)
316         oldf.close()
317         #print "%s: unchanged" % (origfile)
318     else:
319         if not os.path.isdir(outdir):
320             os.mkdir(outdir)
321
322         w = os.popen("diff --ed - %s | gzip --rsyncable -c -9 > %s.gz" %
323                      (newfile, difffile), "w")
324         pipe_file(oldf, w)
325         oldf.close()
326
327         difff = smartopen(difffile)
328         difsizehashes = sizehashes(difff)
329         difff.close()
330
331         difffgz = open(difffile + ".gz", "r")
332         difgzsizehashes = sizehashes(difffgz)
333         difffgz.close()
334
335         upd.history[patchname] = (oldsizehashes, difsizehashes, difgzsizehashes)
336         upd.history_order.append(patchname)
337
338         upd.filesizehashes = newsizehashes
339
340         os.unlink(oldfile + oldext)
341         os.link(origfile + origext, oldfile + origext)
342         os.unlink(newfile)
343
344         with open(outdir + "/Index.new", "w") as f:
345             upd.dump(f)
346         os.rename(outdir + "/Index.new", outdir + "/Index")
347
348
349 def main():
350     global Cnf, Options, Logger
351
352     os.umask(0o002)
353
354     Cnf = utils.get_conf()
355     Arguments = [ ('h', "help", "Generate-Index-Diffs::Options::Help"),
356                   ('a', 'archive', 'Generate-Index-Diffs::Options::Archive', 'hasArg'),
357                   ('c', None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
358                   ('p', "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
359                   ('d', "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
360                   ('m', "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
361                   ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
362                 ]
363     suites = apt_pkg.parse_commandline(Cnf,Arguments,sys.argv)
364     Options = Cnf.subtree("Generate-Index-Diffs::Options")
365     if Options.has_key("Help"): usage()
366
367     maxdiffs = Options.get("MaxDiffs::Default", "56")
368     maxpackages = Options.get("MaxDiffs::Packages", maxdiffs)
369     maxcontents = Options.get("MaxDiffs::Contents", maxdiffs)
370     maxsources = Options.get("MaxDiffs::Sources", maxdiffs)
371
372     if not Options.has_key("PatchName"):
373         format = "%Y-%m-%d-%H%M.%S"
374         Options["PatchName"] = time.strftime( format )
375
376     session = DBConn().session()
377
378     if not suites:
379         query = session.query(Suite.suite_name)
380         if Options.get('Archive'):
381             query = query.join(Suite.archive).filter(Archive.archive_name == Options['Archive'])
382         suites = [ s.suite_name for s in query ]
383
384     for suitename in suites:
385         print "Processing: " + suitename
386
387         suiteobj = get_suite(suitename.lower(), session=session)
388
389         # Use the canonical version of the suite name
390         suite = suiteobj.suite_name
391
392         if suiteobj.untouchable:
393             print "Skipping: " + suite + " (untouchable)"
394             continue
395
396         architectures = get_suite_architectures(suite, skipall=True, session=session)
397         components = [ c.component_name for c in session.query(Component.component_name) ]
398
399         suite_suffix = Cnf.find("Dinstall::SuiteSuffix")
400         if components and suite_suffix:
401             longsuite = suite + "/" + suite_suffix
402         else:
403             longsuite = suite
404
405         tree = os.path.join(suiteobj.archive.path, 'dists', longsuite)
406
407         # See if there are Translations which might need a new pdiff
408         cwd = os.getcwd()
409         for component in components:
410             #print "DEBUG: Working on %s" % (component)
411             workpath=os.path.join(tree, component, "i18n")
412             if os.path.isdir(workpath):
413                 os.chdir(workpath)
414                 for dirpath, dirnames, filenames in os.walk(".", followlinks=True, topdown=True):
415                     for entry in filenames:
416                         if not re_includeinpdiff.match(entry):
417                             #print "EXCLUDING %s" % (entry)
418                             continue
419                         (fname, fext) = os.path.splitext(entry)
420                         processfile=os.path.join(workpath, fname)
421                         #print "Working: %s" % (processfile)
422                         storename="%s/%s_%s_%s" % (Options["TempDir"], suite, component, fname)
423                         #print "Storefile: %s" % (storename)
424                         genchanges(Options, processfile + ".diff", storename, processfile, maxdiffs)
425         os.chdir(cwd)
426
427         for archobj in architectures:
428             architecture = archobj.arch_string
429
430             for component in components:
431                 if architecture == "source":
432                     longarch = architecture
433                     packages = "Sources"
434                     maxsuite = maxsources
435                 else:
436                     longarch = "binary-%s" % (architecture)
437                     packages = "Packages"
438                     maxsuite = maxpackages
439
440                 # Process Contents
441                 file = "%s/%s/Contents-%s" % (tree, component, architecture)
442                 storename = "%s/%s_%s_contents_%s" % (Options["TempDir"], suite, component, architecture)
443                 genchanges(Options, file + ".diff", storename, file, maxcontents)
444
445                 file = "%s/%s/%s/%s" % (tree, component, longarch, packages)
446                 storename = "%s/%s_%s_%s" % (Options["TempDir"], suite, component, architecture)
447                 genchanges(Options, file + ".diff", storename, file, maxsuite)
448
449 ################################################################################
450
451 if __name__ == '__main__':
452     main()