]> git.decadent.org.uk Git - dak.git/blob - dak/generate_index_diffs.py
And away with the old scripts
[dak.git] / dak / generate_index_diffs.py
1 #!/usr/bin/env python
2
3 """ generates partial package updates list"""
4
5 ###########################################################
6
7 # idea and basic implementation by Anthony, some changes by Andreas
8 # parts are stolen from 'dak generate-releases'
9 #
10 # Copyright (C) 2004, 2005, 2006  Anthony Towns <aj@azure.humbug.org.au>
11 # Copyright (C) 2004, 2005  Andreas Barth <aba@not.so.argh.org>
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27
28 # < elmo> bah, don't bother me with annoying facts
29 # < elmo> I was on a roll
30
31
32 ################################################################################
33
34 import sys
35 import os
36 import tempfile
37 import time
38 import apt_pkg
39 import glob
40
41 from daklib import utils
42 from daklib.dbconn import Archive, Component, DBConn, Suite, get_suite, get_suite_architectures
43 #from daklib.regexes import re_includeinpdiff
44 import re
45 re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))")
46
47 ################################################################################
48
49 Cnf = None
50 Logger = None
51 Options = None
52
53 ################################################################################
54
55 def usage (exit_code=0):
56     print """Usage: dak generate-index-diffs [OPTIONS] [suites]
57 Write out ed-style diffs to Packages/Source lists
58
59   -h, --help            show this help and exit
60   -a <archive>          generate diffs for suites in <archive>
61   -c                    give the canonical path of the file
62   -p                    name for the patch (defaults to current time)
63   -d                    name for the hardlink farm for status
64   -m                    how many diffs to generate
65   -n                    take no action
66     """
67     sys.exit(exit_code)
68
69 def tryunlink(file):
70     try:
71         os.unlink(file)
72     except OSError:
73         print "warning: removing of %s denied" % (file)
74
75 def smartstat(file):
76     for ext in ["", ".gz", ".bz2"]:
77         if os.path.isfile(file + ext):
78             return (ext, os.stat(file + ext))
79     return (None, None)
80
81 def smartlink(f, t):
82     if os.path.isfile(f):
83         os.link(f,t)
84     elif os.path.isfile("%s.gz" % (f)):
85         os.system("gzip -d < %s.gz > %s" % (f, t))
86     elif os.path.isfile("%s.bz2" % (f)):
87         os.system("bzip2 -d < %s.bz2 > %s" % (f, t))
88     else:
89         print "missing: %s" % (f)
90         raise IOError(f)
91
92 def smartopen(file):
93     if os.path.isfile(file):
94         f = open(file, "r")
95     elif os.path.isfile("%s.gz" % file):
96         f = create_temp_file(os.popen("zcat %s.gz" % file, "r"))
97     elif os.path.isfile("%s.bz2" % file):
98         f = create_temp_file(os.popen("bzcat %s.bz2" % file, "r"))
99     else:
100         f = None
101     return f
102
103 def pipe_file(f, t):
104     f.seek(0)
105     while 1:
106         l = f.read()
107         if not l: break
108         t.write(l)
109     t.close()
110
111 class Updates:
112     def __init__(self, readpath = None, max = 56):
113         self.can_path = None
114         self.history = {}
115         self.history_order = []
116         self.max = max
117         self.readpath = readpath
118         self.filesizehashes = None
119
120         if readpath:
121             try:
122                 f = open(readpath + "/Index")
123                 x = f.readline()
124
125                 def read_hashs(ind, hashind, f, self, x=x):
126                     while 1:
127                         x = f.readline()
128                         if not x or x[0] != " ": break
129                         l = x.split()
130                         fname = l[2]
131                         if fname.endswith('.gz'):
132                             fname = fname[:-3]
133                         if not self.history.has_key(fname):
134                             self.history[fname] = [None,None,None]
135                             self.history_order.append(fname)
136                         if not self.history[fname][ind]:
137                             self.history[fname][ind] = (int(l[1]), None, None)
138                         if hashind == 1:
139                             self.history[fname][ind] = (int(self.history[fname][ind][0]), l[0], self.history[fname][ind][2])
140                         else:
141                             self.history[fname][ind] = (int(self.history[fname][ind][0]), self.history[fname][ind][1], l[0])
142                     return x
143
144                 while x:
145                     l = x.split()
146
147                     if len(l) == 0:
148                         x = f.readline()
149                         continue
150
151                     if l[0] == "SHA1-History:":
152                         x = read_hashs(0,1,f,self)
153                         continue
154
155                     if l[0] == "SHA256-History:":
156                         x = read_hashs(0,2,f,self)
157                         continue
158
159                     if l[0] == "SHA1-Patches:":
160                         x = read_hashs(1,1,f,self)
161                         continue
162
163                     if l[0] == "SHA256-Patches:":
164                         x = read_hashs(1,2,f,self)
165                         continue
166
167                     if l[0] == "SHA1-Download:":
168                         x = read_hashs(2,1,f,self)
169                         continue
170
171                     if l[0] == "SHA256-Download:":
172                         x = read_hashs(2,2,f,self)
173                         continue
174
175                     if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
176                         self.can_path = l[1]
177
178                     if l[0] == "SHA1-Current:" and len(l) == 3:
179                         if not self.filesizehashes:
180                             self.filesizehashes = (int(l[2]), None, None)
181                         self.filesizehashes = (int(self.filesizehashes[0]), l[1], self.filesizehashes[2])
182
183                     if l[0] == "SHA256-Current:" and len(l) == 3:
184                         if not self.filesizehashes:
185                             self.filesizehashes = (int(l[2]), None, None)
186                         self.filesizehashes = (int(self.filesizehashes[0]), self.filesizehashes[2], l[1])
187
188                     x = f.readline()
189
190             except IOError:
191                 0
192
193     def dump(self, out=sys.stdout):
194         if self.can_path:
195             out.write("Canonical-Path: %s\n" % (self.can_path))
196
197         if self.filesizehashes:
198             if self.filesizehashes[1]:
199                 out.write("SHA1-Current: %s %7d\n" % (self.filesizehashes[1], self.filesizehashes[0]))
200             if self.filesizehashes[2]:
201                 out.write("SHA256-Current: %s %7d\n" % (self.filesizehashes[2], self.filesizehashes[0]))
202
203         hs = self.history
204         l = self.history_order[:]
205
206         cnt = len(l)
207         if cnt > self.max:
208             for h in l[:cnt-self.max]:
209                 tryunlink("%s/%s.gz" % (self.readpath, h))
210                 del hs[h]
211             l = l[cnt-self.max:]
212             self.history_order = l[:]
213
214         out.write("SHA1-History:\n")
215         for h in l:
216             if hs[h][0] and hs[h][0][1]:
217                 out.write(" %s %7d %s\n" % (hs[h][0][1], hs[h][0][0], h))
218         out.write("SHA256-History:\n")
219         for h in l:
220             if hs[h][0] and hs[h][0][2]:
221                 out.write(" %s %7d %s\n" % (hs[h][0][2], hs[h][0][0], h))
222         out.write("SHA1-Patches:\n")
223         for h in l:
224             if hs[h][1] and hs[h][1][1]:
225                 out.write(" %s %7d %s\n" % (hs[h][1][1], hs[h][1][0], h))
226         out.write("SHA256-Patches:\n")
227         for h in l:
228             if hs[h][1] and hs[h][1][2]:
229                 out.write(" %s %7d %s\n" % (hs[h][1][2], hs[h][1][0], h))
230         out.write("SHA1-Download:\n")
231         for h in l:
232             if hs[h][2] and hs[h][2][1]:
233                 out.write(" %s %7d %s.gz\n" % (hs[h][2][1], hs[h][2][0], h))
234         out.write("SHA256-Download:\n")
235         for h in l:
236             if hs[h][2] and hs[h][2][2]:
237                 out.write(" %s %7d %s.gz\n" % (hs[h][2][2], hs[h][2][0], h))
238
239 def create_temp_file(r):
240     f = tempfile.TemporaryFile()
241     while 1:
242         x = r.readline()
243         if not x: break
244         f.write(x)
245     r.close()
246     del x,r
247     f.flush()
248     f.seek(0)
249     return f
250
251 def sizehashes(f):
252     size = os.fstat(f.fileno())[6]
253     f.seek(0)
254     sha1sum = apt_pkg.sha1sum(f)
255     f.seek(0)
256     sha256sum = apt_pkg.sha256sum(f)
257     return (size, sha1sum, sha256sum)
258
259 def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 56):
260     if Options.has_key("NoAct"):
261         print "Not acting on: od: %s, oldf: %s, origf: %s, md: %s" % (outdir, oldfile, origfile, maxdiffs)
262         return
263
264     patchname = Options["PatchName"]
265
266     # origfile = /path/to/Packages
267     # oldfile  = ./Packages
268     # newfile  = ./Packages.tmp
269     # difffile = outdir/patchname
270     # index   => outdir/Index
271
272     # (outdir, oldfile, origfile) = argv
273
274     newfile = oldfile + ".new"
275     difffile = "%s/%s" % (outdir, patchname)
276
277     upd = Updates(outdir, int(maxdiffs))
278     (oldext, oldstat) = smartstat(oldfile)
279     (origext, origstat) = smartstat(origfile)
280     if not origstat:
281         print "%s: doesn't exist" % (origfile)
282         return
283     if not oldstat:
284         print "%s: initial run" % (origfile)
285         os.link(origfile + origext, oldfile + origext)
286         return
287
288     if oldstat[1:3] == origstat[1:3]:
289         #print "%s: hardlink unbroken, assuming unchanged" % (origfile)
290         return
291
292     oldf = smartopen(oldfile)
293     oldsizehashes = sizehashes(oldf)
294
295     # should probably early exit if either of these checks fail
296     # alternatively (optionally?) could just trim the patch history
297
298     #if upd.filesizesha1:
299     #    if upd.filesizesha1 != oldsizesha1:
300     #        print "info: old file " + oldfile + " changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
301
302     if Options.has_key("CanonicalPath"): upd.can_path=Options["CanonicalPath"]
303
304     if os.path.exists(newfile): os.unlink(newfile)
305     smartlink(origfile, newfile)
306     newf = open(newfile, "r")
307     newsizehashes = sizehashes(newf)
308     newf.close()
309
310     if newsizehashes == oldsizehashes:
311         os.unlink(newfile)
312         oldf.close()
313         #print "%s: unchanged" % (origfile)
314     else:
315         if not os.path.isdir(outdir):
316             os.mkdir(outdir)
317
318         w = os.popen("diff --ed - %s | gzip --rsyncable -c -9 > %s.gz" %
319                      (newfile, difffile), "w")
320         pipe_file(oldf, w)
321         oldf.close()
322
323         difff = smartopen(difffile)
324         difsizehashes = sizehashes(difff)
325         difff.close()
326
327         difffgz = open(difffile + ".gz", "r")
328         difgzsizehashes = sizehashes(difffgz)
329         difffgz.close()
330
331         upd.history[patchname] = (oldsizehashes, difsizehashes, difgzsizehashes)
332         upd.history_order.append(patchname)
333
334         upd.filesizehashes = newsizehashes
335
336         os.unlink(oldfile + oldext)
337         os.link(origfile + origext, oldfile + origext)
338         os.unlink(newfile)
339
340         with open(outdir + "/Index.new", "w") as f:
341             upd.dump(f)
342         os.rename(outdir + "/Index.new", outdir + "/Index")
343
344
345 def main():
346     global Cnf, Options, Logger
347
348     os.umask(0o002)
349
350     Cnf = utils.get_conf()
351     Arguments = [ ('h', "help", "Generate-Index-Diffs::Options::Help"),
352                   ('a', 'archive', 'Generate-Index-Diffs::Options::Archive', 'hasArg'),
353                   ('c', None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
354                   ('p', "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
355                   ('d', "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
356                   ('m', "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
357                   ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
358                 ]
359     suites = apt_pkg.parse_commandline(Cnf,Arguments,sys.argv)
360     Options = Cnf.subtree("Generate-Index-Diffs::Options")
361     if Options.has_key("Help"): usage()
362
363     maxdiffs = Options.get("MaxDiffs::Default", "56")
364     maxpackages = Options.get("MaxDiffs::Packages", maxdiffs)
365     maxcontents = Options.get("MaxDiffs::Contents", maxdiffs)
366     maxsources = Options.get("MaxDiffs::Sources", maxdiffs)
367
368     if not Options.has_key("PatchName"):
369         format = "%Y-%m-%d-%H%M.%S"
370         Options["PatchName"] = time.strftime( format )
371
372     session = DBConn().session()
373
374     if not suites:
375         query = session.query(Suite.suite_name)
376         if Options.get('Archive'):
377             query = query.join(Suite.archive).filter(Archive.archive_name == Options['Archive'])
378         suites = [ s.suite_name for s in query ]
379
380     for suitename in suites:
381         print "Processing: " + suitename
382
383         suiteobj = get_suite(suitename.lower(), session=session)
384
385         # Use the canonical version of the suite name
386         suite = suiteobj.suite_name
387
388         if suiteobj.untouchable:
389             print "Skipping: " + suite + " (untouchable)"
390             continue
391
392         architectures = get_suite_architectures(suite, skipall=True, session=session)
393         components = [ c.component_name for c in session.query(Component.component_name) ]
394
395         suite_suffix = Cnf.find("Dinstall::SuiteSuffix")
396         if components and suite_suffix:
397             longsuite = suite + "/" + suite_suffix
398         else:
399             longsuite = suite
400
401         tree = os.path.join(suiteobj.archive.path, 'dists', longsuite)
402
403         # See if there are Translations which might need a new pdiff
404         cwd = os.getcwd()
405         for component in components:
406             #print "DEBUG: Working on %s" % (component)
407             workpath=os.path.join(tree, component, "i18n")
408             if os.path.isdir(workpath):
409                 os.chdir(workpath)
410                 for dirpath, dirnames, filenames in os.walk(".", followlinks=True, topdown=True):
411                     for entry in filenames:
412                         if not re_includeinpdiff.match(entry):
413                             #print "EXCLUDING %s" % (entry)
414                             continue
415                         (fname, fext) = os.path.splitext(entry)
416                         processfile=os.path.join(workpath, fname)
417                         #print "Working: %s" % (processfile)
418                         storename="%s/%s_%s_%s" % (Options["TempDir"], suite, component, fname)
419                         #print "Storefile: %s" % (storename)
420                         genchanges(Options, processfile + ".diff", storename, processfile, maxdiffs)
421         os.chdir(cwd)
422
423         for archobj in architectures:
424             architecture = archobj.arch_string
425
426             for component in components:
427                 if architecture == "source":
428                     longarch = architecture
429                     packages = "Sources"
430                     maxsuite = maxsources
431                 else:
432                     longarch = "binary-%s" % (architecture)
433                     packages = "Packages"
434                     maxsuite = maxpackages
435
436                 # Process Contents
437                 file = "%s/%s/Contents-%s" % (tree, component, architecture)
438                 storename = "%s/%s_%s_contents_%s" % (Options["TempDir"], suite, component, architecture)
439                 genchanges(Options, file + ".diff", storename, file, maxcontents)
440
441                 file = "%s/%s/%s/%s" % (tree, component, longarch, packages)
442                 storename = "%s/%s_%s_%s" % (Options["TempDir"], suite, component, architecture)
443                 genchanges(Options, file + ".diff", storename, file, maxsuite)
444
445 ################################################################################
446
447 if __name__ == '__main__':
448     main()