]> git.decadent.org.uk Git - dak.git/blob - dak/generate_index_diffs.py
845137138fd3fa544cbacdd490927fc7ada4a0ae
[dak.git] / dak / generate_index_diffs.py
1 #!/usr/bin/env python
2
3 """ generates partial package updates list"""
4
5 ###########################################################
6
7 # idea and basic implementation by Anthony, some changes by Andreas
8 # parts are stolen from 'dak generate-releases'
9 #
10 # Copyright (C) 2004, 2005, 2006  Anthony Towns <aj@azure.humbug.org.au>
11 # Copyright (C) 2004, 2005  Andreas Barth <aba@not.so.argh.org>
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27
28 # < elmo> bah, don't bother me with annoying facts
29 # < elmo> I was on a roll
30
31
32 ################################################################################
33
34 import sys
35 import os
36 import tempfile
37 import time
38 import apt_pkg
39 import glob
40
41 from daklib import utils
42 from daklib.dbconn import Archive, Component, DBConn, Suite, get_suite, get_suite_architectures
43 #from daklib.regexes import re_includeinpdiff
44 import re
45 re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))")
46
47 ################################################################################
48
49 Cnf = None
50 Logger = None
51 Options = None
52
53 ################################################################################
54
55 def usage (exit_code=0):
56     print """Usage: dak generate-index-diffs [OPTIONS] [suites]
57 Write out ed-style diffs to Packages/Source lists
58
59   -h, --help            show this help and exit
60   -a <archive>          generate diffs for suites in <archive>
61   -c                    give the canonical path of the file
62   -p                    name for the patch (defaults to current time)
63   -d                    name for the hardlink farm for status
64   -m                    how many diffs to generate
65   -n                    take no action
66     """
67     sys.exit(exit_code)
68
69 def tryunlink(file):
70     try:
71         os.unlink(file)
72     except OSError:
73         print "warning: removing of %s denied" % (file)
74
75 def smartstat(file):
76     for ext in ["", ".gz", ".bz2"]:
77         if os.path.isfile(file + ext):
78             return (ext, os.stat(file + ext))
79     return (None, None)
80
81 def smartlink(f, t):
82     if os.path.isfile(f):
83         os.link(f,t)
84     elif os.path.isfile("%s.gz" % (f)):
85         os.system("gzip -d < %s.gz > %s" % (f, t))
86     elif os.path.isfile("%s.bz2" % (f)):
87         os.system("bzip2 -d < %s.bz2 > %s" % (f, t))
88     else:
89         print "missing: %s" % (f)
90         raise IOError(f)
91
92 def smartopen(file):
93     if os.path.isfile(file):
94         f = open(file, "r")
95     elif os.path.isfile("%s.gz" % file):
96         f = create_temp_file(os.popen("zcat %s.gz" % file, "r"))
97     elif os.path.isfile("%s.bz2" % file):
98         f = create_temp_file(os.popen("bzcat %s.bz2" % file, "r"))
99     else:
100         f = None
101     return f
102
103 def pipe_file(f, t):
104     f.seek(0)
105     while 1:
106         l = f.read()
107         if not l: break
108         t.write(l)
109     t.close()
110
111 class Updates:
112     def __init__(self, readpath = None, max = 56):
113         self.can_path = None
114         self.history = {}
115         self.history_order = []
116         self.max = max
117         self.readpath = readpath
118         self.filesizesha1 = None
119
120         if readpath:
121             try:
122                 f = open(readpath + "/Index")
123                 x = f.readline()
124
125                 def read_hashs(ind, f, self, x=x):
126                     while 1:
127                         x = f.readline()
128                         if not x or x[0] != " ": break
129                         l = x.split()
130                         fname = l[2]
131                         if fname.endswith('.gz'):
132                             fname = fname[:-3]
133                         if not self.history.has_key(fname):
134                             self.history[fname] = [None,None,None]
135                             self.history_order.append(fname)
136                         self.history[fname][ind] = (l[0], int(l[1]))
137                     return x
138
139                 while x:
140                     l = x.split()
141
142                     if len(l) == 0:
143                         x = f.readline()
144                         continue
145
146                     if l[0] == "SHA1-History:":
147                         x = read_hashs(0,f,self)
148                         continue
149
150                     if l[0] == "SHA1-Patches:":
151                         x = read_hashs(1,f,self)
152                         continue
153
154                     if l[0] == "SHA1-Download:":
155                         x = read_hashs(2,f,self)
156                         continue
157
158                     if l[0] == "Canonical-Name:" or l[0]=="Canonical-Path:":
159                         self.can_path = l[1]
160
161                     if l[0] == "SHA1-Current:" and len(l) == 3:
162                         self.filesizesha1 = (l[1], int(l[2]))
163
164                     x = f.readline()
165
166             except IOError:
167                 0
168
169     def dump(self, out=sys.stdout):
170         if self.can_path:
171             out.write("Canonical-Path: %s\n" % (self.can_path))
172
173         if self.filesizesha1:
174             out.write("SHA1-Current: %s %7d\n" % (self.filesizesha1))
175
176         hs = self.history
177         l = self.history_order[:]
178
179         cnt = len(l)
180         if cnt > self.max:
181             for h in l[:cnt-self.max]:
182                 tryunlink("%s/%s.gz" % (self.readpath, h))
183                 del hs[h]
184             l = l[cnt-self.max:]
185             self.history_order = l[:]
186
187         out.write("SHA1-History:\n")
188         for h in l:
189             out.write(" %s %7d %s\n" % (hs[h][0][0], hs[h][0][1], h))
190         out.write("SHA1-Patches:\n")
191         for h in l:
192             out.write(" %s %7d %s\n" % (hs[h][1][0], hs[h][1][1], h))
193         out.write("SHA1-Download:\n")
194         for h in l:
195             if hs[h][2]:
196                 out.write(" %s %7d %s.gz\n" % (hs[h][2][0], hs[h][2][1], h))
197
198 def create_temp_file(r):
199     f = tempfile.TemporaryFile()
200     while 1:
201         x = r.readline()
202         if not x: break
203         f.write(x)
204     r.close()
205     del x,r
206     f.flush()
207     f.seek(0)
208     return f
209
210 def sizesha1(f):
211     size = os.fstat(f.fileno())[6]
212     f.seek(0)
213     sha1sum = apt_pkg.sha1sum(f)
214     return (sha1sum, size)
215
216 def genchanges(Options, outdir, oldfile, origfile, maxdiffs = 56):
217     if Options.has_key("NoAct"):
218         print "Not acting on: od: %s, oldf: %s, origf: %s, md: %s" % (outdir, oldfile, origfile, maxdiffs)
219         return
220
221     patchname = Options["PatchName"]
222
223     # origfile = /path/to/Packages
224     # oldfile  = ./Packages
225     # newfile  = ./Packages.tmp
226     # difffile = outdir/patchname
227     # index   => outdir/Index
228
229     # (outdir, oldfile, origfile) = argv
230
231     newfile = oldfile + ".new"
232     difffile = "%s/%s" % (outdir, patchname)
233
234     upd = Updates(outdir, int(maxdiffs))
235     (oldext, oldstat) = smartstat(oldfile)
236     (origext, origstat) = smartstat(origfile)
237     if not origstat:
238         print "%s: doesn't exist" % (origfile)
239         return
240     if not oldstat:
241         print "%s: initial run" % (origfile)
242         os.link(origfile + origext, oldfile + origext)
243         return
244
245     if oldstat[1:3] == origstat[1:3]:
246         #print "%s: hardlink unbroken, assuming unchanged" % (origfile)
247         return
248
249     oldf = smartopen(oldfile)
250     oldsizesha1 = sizesha1(oldf)
251
252     # should probably early exit if either of these checks fail
253     # alternatively (optionally?) could just trim the patch history
254
255     #if upd.filesizesha1:
256     #    if upd.filesizesha1 != oldsizesha1:
257     #        print "info: old file " + oldfile + " changed! %s %s => %s %s" % (upd.filesizesha1 + oldsizesha1)
258
259     if Options.has_key("CanonicalPath"): upd.can_path=Options["CanonicalPath"]
260
261     if os.path.exists(newfile): os.unlink(newfile)
262     smartlink(origfile, newfile)
263     newf = open(newfile, "r")
264     newsizesha1 = sizesha1(newf)
265     newf.close()
266
267     if newsizesha1 == oldsizesha1:
268         os.unlink(newfile)
269         oldf.close()
270         #print "%s: unchanged" % (origfile)
271     else:
272         if not os.path.isdir(outdir):
273             os.mkdir(outdir)
274
275         w = os.popen("diff --ed - %s | gzip --rsyncable -c -9 > %s.gz" %
276                      (newfile, difffile), "w")
277         pipe_file(oldf, w)
278         oldf.close()
279
280         difff = smartopen(difffile)
281         difsizesha1 = sizesha1(difff)
282         difff.close()
283
284         difffgz = open(difffile + ".gz", "r")
285         difgzsizesha1 = sizesha1(difffgz)
286         difffgz.close()
287
288         upd.history[patchname] = (oldsizesha1, difsizesha1, difgzsizesha1)
289         upd.history_order.append(patchname)
290
291         upd.filesizesha1 = newsizesha1
292
293         os.unlink(oldfile + oldext)
294         os.link(origfile + origext, oldfile + origext)
295         os.unlink(newfile)
296
297         f = open(outdir + "/Index", "w")
298         upd.dump(f)
299         f.close()
300
301
302 def main():
303     global Cnf, Options, Logger
304
305     os.umask(0o002)
306
307     Cnf = utils.get_conf()
308     Arguments = [ ('h', "help", "Generate-Index-Diffs::Options::Help"),
309                   ('a', 'archive', 'Generate-Index-Diffs::Options::Archive', 'hasArg'),
310                   ('c', None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
311                   ('p', "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
312                   ('d', "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
313                   ('m', "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
314                   ('n', "n-act", "Generate-Index-Diffs::Options::NoAct"),
315                 ]
316     suites = apt_pkg.parse_commandline(Cnf,Arguments,sys.argv)
317     Options = Cnf.subtree("Generate-Index-Diffs::Options")
318     if Options.has_key("Help"): usage()
319
320     maxdiffs = Options.get("MaxDiffs::Default", "56")
321     maxpackages = Options.get("MaxDiffs::Packages", maxdiffs)
322     maxcontents = Options.get("MaxDiffs::Contents", maxdiffs)
323     maxsources = Options.get("MaxDiffs::Sources", maxdiffs)
324
325     if not Options.has_key("PatchName"):
326         format = "%Y-%m-%d-%H%M.%S"
327         Options["PatchName"] = time.strftime( format )
328
329     session = DBConn().session()
330
331     if not suites:
332         query = session.query(Suite.suite_name)
333         if Options.get('Archive'):
334             query = query.join(Suite.archive).filter(Archive.archive_name == Options['Archive'])
335         suites = [ s.suite_name for s in query ]
336
337     for suitename in suites:
338         print "Processing: " + suitename
339
340         suiteobj = get_suite(suitename.lower(), session=session)
341
342         # Use the canonical version of the suite name
343         suite = suiteobj.suite_name
344
345         if suiteobj.untouchable:
346             print "Skipping: " + suite + " (untouchable)"
347             continue
348
349         architectures = get_suite_architectures(suite, skipall=True, session=session)
350         components = [ c.component_name for c in session.query(Component.component_name) ]
351
352         suite_suffix = Cnf.find("Dinstall::SuiteSuffix")
353         if components and suite_suffix:
354             longsuite = suite + "/" + suite_suffix
355         else:
356             longsuite = suite
357
358         tree = os.path.join(suiteobj.archive.path, 'dists', longsuite)
359
360         # See if there are Translations which might need a new pdiff
361         cwd = os.getcwd()
362         for component in components:
363             #print "DEBUG: Working on %s" % (component)
364             workpath=os.path.join(tree, component, "i18n")
365             if os.path.isdir(workpath):
366                 os.chdir(workpath)
367                 for dirpath, dirnames, filenames in os.walk(".", followlinks=True, topdown=True):
368                     for entry in filenames:
369                         if not re_includeinpdiff.match(entry):
370                             #print "EXCLUDING %s" % (entry)
371                             continue
372                         (fname, fext) = os.path.splitext(entry)
373                         processfile=os.path.join(workpath, fname)
374                         #print "Working: %s" % (processfile)
375                         storename="%s/%s_%s_%s" % (Options["TempDir"], suite, component, fname)
376                         #print "Storefile: %s" % (storename)
377                         genchanges(Options, processfile + ".diff", storename, processfile, maxdiffs)
378         os.chdir(cwd)
379
380         for archobj in architectures:
381             architecture = archobj.arch_string
382
383             for component in components:
384                 if architecture == "source":
385                     longarch = architecture
386                     packages = "Sources"
387                     maxsuite = maxsources
388                 else:
389                     longarch = "binary-%s"% (architecture)
390                     packages = "Packages"
391                     maxsuite = maxpackages
392                     # Process Contents
393                     file = "%s/%s/Contents-%s" % (tree, component, architecture)
394                     storename = "%s/%s_%s_contents_%s" % (Options["TempDir"], suite, component, architecture)
395                     genchanges(Options, file + ".diff", storename, file, maxcontents)
396
397                 file = "%s/%s/%s/%s" % (tree, component, longarch, packages)
398                 storename = "%s/%s_%s_%s" % (Options["TempDir"], suite, component, architecture)
399                 genchanges(Options, file + ".diff", storename, file, maxsuite)
400
401 ################################################################################
402
403 if __name__ == '__main__':
404     main()