]> git.decadent.org.uk Git - dak.git/blob - dak/generate_releases.py
generate_releases: reduce number of sql queries for by-hash files
[dak.git] / dak / generate_releases.py
1 #!/usr/bin/env python
2
3 """
4 Create all the Release files
5
6 @contact: Debian FTPMaster <ftpmaster@debian.org>
7 @copyright: 2011  Joerg Jaspert <joerg@debian.org>
8 @copyright: 2011  Mark Hymers <mhy@debian.org>
9 @license: GNU General Public License version 2 or later
10
11 """
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <mhy> I wish they wouldnt leave biscuits out, thats just tempting. Damnit.
30
31 ################################################################################
32
33 import sys
34 import os
35 import os.path
36 import stat
37 import time
38 import gzip
39 import bz2
40 import errno
41 import apt_pkg
42 import subprocess
43 from tempfile import mkstemp, mkdtemp
44 import commands
45 from sqlalchemy.orm import object_session
46
47 from daklib import utils, daklog
48 from daklib.regexes import re_gensubrelease, re_includeinrelease
49 from daklib.dak_exceptions import *
50 from daklib.dbconn import *
51 from daklib.config import Config
52 from daklib.dakmultiprocessing import DakProcessPool, PROC_STATUS_SUCCESS
53 import daklib.daksubprocess
54
55 ################################################################################
56 Logger = None                  #: Our logging object
57
58 ################################################################################
59
60 def usage (exit_code=0):
61     """ Usage information"""
62
63     print """Usage: dak generate-releases [OPTIONS]
64 Generate the Release files
65
66   -a, --archive=ARCHIVE      process suites in ARCHIVE
67   -s, --suite=SUITE(s)       process this suite
68                              Default: All suites not marked 'untouchable'
69   -f, --force                Allow processing of untouchable suites
70                              CAREFUL: Only to be used at (point) release time!
71   -h, --help                 show this help and exit
72   -q, --quiet                Don't output progress
73
74 SUITE can be a space separated list, e.g.
75    --suite=unstable testing
76   """
77     sys.exit(exit_code)
78
79 ########################################################################
80
81 def sign_release_dir(suite, dirname):
82     cnf = Config()
83
84     if cnf.has_key("Dinstall::SigningKeyring"):
85         keyring = "--secret-keyring \"%s\"" % cnf["Dinstall::SigningKeyring"]
86         if cnf.has_key("Dinstall::SigningPubKeyring"):
87             keyring += " --keyring \"%s\"" % cnf["Dinstall::SigningPubKeyring"]
88
89         arguments = "--no-options --batch --no-tty --armour --personal-digest-preferences=SHA256"
90
91         relname = os.path.join(dirname, 'Release')
92
93         dest = os.path.join(dirname, 'Release.gpg')
94         if os.path.exists(dest):
95             os.unlink(dest)
96
97         inlinedest = os.path.join(dirname, 'InRelease')
98         if os.path.exists(inlinedest):
99             os.unlink(inlinedest)
100
101         defkeyid=""
102         for keyid in suite.signingkeys or []:
103             defkeyid += "--local-user %s " % keyid
104
105         os.system("gpg %s %s %s --detach-sign <%s >>%s" %
106                   (keyring, defkeyid, arguments, relname, dest))
107         os.system("gpg %s %s %s --clearsign <%s >>%s" %
108                   (keyring, defkeyid, arguments, relname, inlinedest))
109
110 class XzFile(object):
111     def __init__(self, filename, mode='r'):
112         self.filename = filename
113     def read(self):
114         cmd = ("xz", "-d")
115         with open(self.filename, 'r') as stdin:
116             process = daklib.daksubprocess.Popen(cmd, stdin=stdin, stdout=subprocess.PIPE)
117             (stdout, stderr) = process.communicate()
118             return stdout
119
120
121 class HashFunc(object):
122     def __init__(self, release_field, func, db_name):
123         self.release_field = release_field
124         self.func = func
125         self.db_name = db_name
126
127 RELEASE_HASHES = [
128     HashFunc('MD5Sum', apt_pkg.md5sum, 'md5'),
129     HashFunc('SHA1', apt_pkg.sha1sum, 'sha1'),
130     HashFunc('SHA256', apt_pkg.sha256sum, 'sha256'),
131 ]
132
133
134 class ReleaseWriter(object):
135     def __init__(self, suite):
136         self.suite = suite
137
138     def suite_path(self):
139         """
140         Absolute path to the suite-specific files.
141         """
142         cnf = Config()
143         suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
144
145         return os.path.join(self.suite.archive.path, 'dists',
146                             self.suite.suite_name, suite_suffix)
147
148     def suite_release_path(self):
149         """
150         Absolute path where Release files are physically stored.
151         This should be a path that sorts after the dists/ directory.
152         """
153         # TODO: Eventually always create Release in `zzz-dists` to avoid
154         # special cases. However we don't want to move existing Release files
155         # for released suites.
156         # See `create_release_symlinks` below.
157         if not self.suite.byhash:
158             return self.suite_path()
159
160         cnf = Config()
161         suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
162
163         return os.path.join(self.suite.archive.path, 'zzz-dists',
164                             self.suite.suite_name, suite_suffix)
165
166     def create_release_symlinks(self):
167         """
168         Create symlinks for Release files.
169         This creates the symlinks for Release files in the `suite_path`
170         to the actual files in `suite_release_path`.
171         """
172         # TODO: Eventually always create the links.
173         # See `suite_release_path` above.
174         if not self.suite.byhash:
175             return
176
177         relpath = os.path.relpath(self.suite_release_path(), self.suite_path())
178         for f in ("Release", "Release.gpg", "InRelease"):
179             source = os.path.join(relpath, f)
180             dest = os.path.join(self.suite_path(), f)
181             if not os.path.islink(dest):
182                 os.unlink(dest)
183             elif os.readlink(dest) == source:
184                 continue
185             else:
186                 os.unlink(dest)
187             os.symlink(source, dest)
188
189     def create_output_directories(self):
190         for path in (self.suite_path(), self.suite_release_path()):
191             try:
192                 os.makedirs(path)
193             except OSError as e:
194                 if e.errno != errno.EEXIST:
195                     raise
196
197     def generate_release_files(self):
198         """
199         Generate Release files for the given suite
200
201         @type suite: string
202         @param suite: Suite name
203         """
204
205         suite = self.suite
206         session = object_session(suite)
207
208         architectures = get_suite_architectures(suite.suite_name, skipall=True, skipsrc=True, session=session)
209
210         # Attribs contains a tuple of field names and the database names to use to
211         # fill them in
212         attribs = ( ('Origin',      'origin'),
213                     ('Label',       'label'),
214                     ('Suite',       'release_suite_output'),
215                     ('Version',     'version'),
216                     ('Codename',    'codename'),
217                     ('Changelogs',  'changelog_url'),
218                   )
219
220         # A "Sub" Release file has slightly different fields
221         subattribs = ( ('Archive',  'suite_name'),
222                        ('Origin',   'origin'),
223                        ('Label',    'label'),
224                        ('Version',  'version') )
225
226         # Boolean stuff. If we find it true in database, write out "yes" into the release file
227         boolattrs = ( ('NotAutomatic',         'notautomatic'),
228                       ('ButAutomaticUpgrades', 'butautomaticupgrades'),
229                       ('Acquire-By-Hash',      'byhash'),
230                     )
231
232         cnf = Config()
233
234         suite_suffix = cnf.find("Dinstall::SuiteSuffix", "")
235
236         self.create_output_directories()
237         self.create_release_symlinks()
238
239         outfile = os.path.join(self.suite_release_path(), "Release")
240         out = open(outfile + ".new", "w")
241
242         for key, dbfield in attribs:
243             # Hack to skip NULL Version fields as we used to do this
244             # We should probably just always ignore anything which is None
245             if key in ("Version", "Changelogs") and getattr(suite, dbfield) is None:
246                 continue
247
248             out.write("%s: %s\n" % (key, getattr(suite, dbfield)))
249
250         out.write("Date: %s\n" % (time.strftime("%a, %d %b %Y %H:%M:%S UTC", time.gmtime(time.time()))))
251
252         if suite.validtime:
253             validtime=float(suite.validtime)
254             out.write("Valid-Until: %s\n" % (time.strftime("%a, %d %b %Y %H:%M:%S UTC", time.gmtime(time.time()+validtime))))
255
256         for key, dbfield in boolattrs:
257             if getattr(suite, dbfield, False):
258                 out.write("%s: yes\n" % (key))
259
260         out.write("Architectures: %s\n" % (" ".join([a.arch_string for a in architectures])))
261
262         components = [ c.component_name for c in suite.components ]
263
264         out.write("Components: %s\n" % (" ".join(components)))
265
266         # For exact compatibility with old g-r, write out Description here instead
267         # of with the rest of the DB fields above
268         if getattr(suite, 'description') is not None:
269             out.write("Description: %s\n" % suite.description)
270
271         for comp in components:
272             for dirpath, dirnames, filenames in os.walk(os.path.join(self.suite_path(), comp), topdown=True):
273                 if not re_gensubrelease.match(dirpath):
274                     continue
275
276                 subfile = os.path.join(dirpath, "Release")
277                 subrel = open(subfile + '.new', "w")
278
279                 for key, dbfield in subattribs:
280                     if getattr(suite, dbfield) is not None:
281                         subrel.write("%s: %s\n" % (key, getattr(suite, dbfield)))
282
283                 for key, dbfield in boolattrs:
284                     if getattr(suite, dbfield, False):
285                         subrel.write("%s: yes\n" % (key))
286
287                 subrel.write("Component: %s%s\n" % (suite_suffix, comp))
288
289                 # Urgh, but until we have all the suite/component/arch stuff in the DB,
290                 # this'll have to do
291                 arch = os.path.split(dirpath)[-1]
292                 if arch.startswith('binary-'):
293                     arch = arch[7:]
294
295                 subrel.write("Architecture: %s\n" % (arch))
296                 subrel.close()
297
298                 os.rename(subfile + '.new', subfile)
299
300         # Now that we have done the groundwork, we want to get off and add the files with
301         # their checksums to the main Release file
302         oldcwd = os.getcwd()
303
304         os.chdir(self.suite_path())
305
306         hashes = [x for x in RELEASE_HASHES if x.db_name in suite.checksums]
307
308         fileinfo = {}
309
310         uncompnotseen = {}
311
312         for dirpath, dirnames, filenames in os.walk(".", followlinks=True, topdown=True):
313             for entry in filenames:
314                 # Skip things we don't want to include
315                 if not re_includeinrelease.match(entry):
316                     continue
317
318                 if dirpath == '.' and entry in ["Release", "Release.gpg", "InRelease"]:
319                     continue
320
321                 filename = os.path.join(dirpath.lstrip('./'), entry)
322                 fileinfo[filename] = {}
323                 contents = open(filename, 'r').read()
324
325                 # If we find a file for which we have a compressed version and
326                 # haven't yet seen the uncompressed one, store the possibility
327                 # for future use
328                 if entry.endswith(".gz") and filename[:-3] not in uncompnotseen:
329                     uncompnotseen[filename[:-3]] = (gzip.GzipFile, filename)
330                 elif entry.endswith(".bz2") and filename[:-4] not in uncompnotseen:
331                     uncompnotseen[filename[:-4]] = (bz2.BZ2File, filename)
332                 elif entry.endswith(".xz") and filename[:-3] not in uncompnotseen:
333                     uncompnotseen[filename[:-3]] = (XzFile, filename)
334
335                 fileinfo[filename]['len'] = len(contents)
336
337                 for hf in hashes:
338                     fileinfo[filename][hf.release_field] = hf.func(contents)
339
340         for filename, comp in uncompnotseen.items():
341             # If we've already seen the uncompressed file, we don't
342             # need to do anything again
343             if filename in fileinfo:
344                 continue
345
346             fileinfo[filename] = {}
347
348             # File handler is comp[0], filename of compressed file is comp[1]
349             contents = comp[0](comp[1], 'r').read()
350
351             fileinfo[filename]['len'] = len(contents)
352
353             for hf in hashes:
354                 fileinfo[filename][hf.release_field] = hf.func(contents)
355
356
357         for field in sorted(h.release_field for h in hashes):
358             out.write('%s:\n' % field)
359             for filename in sorted(fileinfo.keys()):
360                 out.write(" %s %8d %s\n" % (fileinfo[filename][field], fileinfo[filename]['len'], filename))
361
362         out.close()
363         os.rename(outfile + '.new', outfile)
364
365         # Mark all by-hash files as obsolete.  We will undo that for the ones
366         # we still reference later.
367         query = """
368             UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
369             WHERE suite_id = :id AND unreferenced IS NULL"""
370         session.execute(query, {'id': suite.suite_id})
371
372         if suite.byhash:
373             query = "SELECT path FROM hashfile WHERE suite_id = :id"
374             q = session.execute(query, {'id': suite.suite_id})
375             known_hashfiles = set(row[0] for row in q)
376             updated = []
377             new = []
378
379             # Update the hashfile table with new or updated files
380             for filename in fileinfo:
381                 if not os.path.exists(filename):
382                     # probably an uncompressed index we didn't generate
383                     continue
384                 byhashdir = os.path.join(os.path.dirname(filename), 'by-hash')
385                 for h in hashes:
386                     field = h.release_field
387                     hashfile = os.path.join(byhashdir, field, fileinfo[filename][field])
388                     if hashfile in known_hashfiles:
389                         updated.append(hashfile)
390                     else:
391                         new.append(hashfile)
392
393             if updated:
394                 session.execute("""
395                     UPDATE hashfile SET unreferenced = NULL
396                     WHERE path = ANY(:p) AND suite_id = :id""",
397                     {'p': updated, 'id': suite.suite_id})
398             if new:
399                 session.execute("""
400                     INSERT INTO hashfile (path, suite_id)
401                     VALUES (:p, :id)""",
402                     [{'p': hashfile, 'id': suite.suite_id} for hashfile in new])
403
404         session.commit()
405
406         if suite.byhash:
407             # Create hardlinks in by-hash directories
408             for filename in fileinfo:
409                 if not os.path.exists(filename):
410                     # probably an uncompressed index we didn't generate
411                     continue
412
413                 for h in hashes:
414                     field = h.release_field
415                     hashfile = os.path.join(os.path.dirname(filename), 'by-hash', field, fileinfo[filename][field])
416                     try:
417                         os.makedirs(os.path.dirname(hashfile))
418                     except OSError as exc:
419                         if exc.errno != errno.EEXIST:
420                             raise
421                     try:
422                         os.link(filename, hashfile)
423                     except OSError as exc:
424                         if exc.errno != errno.EEXIST:
425                             raise
426
427
428         sign_release_dir(suite, os.path.dirname(outfile))
429
430         os.chdir(oldcwd)
431
432         return
433
434
435 def main ():
436     global Logger
437
438     cnf = Config()
439
440     for i in ["Help", "Suite", "Force", "Quiet"]:
441         if not cnf.has_key("Generate-Releases::Options::%s" % (i)):
442             cnf["Generate-Releases::Options::%s" % (i)] = ""
443
444     Arguments = [('h',"help","Generate-Releases::Options::Help"),
445                  ('a','archive','Generate-Releases::Options::Archive','HasArg'),
446                  ('s',"suite","Generate-Releases::Options::Suite"),
447                  ('f',"force","Generate-Releases::Options::Force"),
448                  ('q',"quiet","Generate-Releases::Options::Quiet"),
449                  ('o','option','','ArbItem')]
450
451     suite_names = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
452     Options = cnf.subtree("Generate-Releases::Options")
453
454     if Options["Help"]:
455         usage()
456
457     Logger = daklog.Logger('generate-releases')
458     pool = DakProcessPool()
459
460     session = DBConn().session()
461
462     if Options["Suite"]:
463         suites = []
464         for s in suite_names:
465             suite = get_suite(s.lower(), session)
466             if suite:
467                 suites.append(suite)
468             else:
469                 print "cannot find suite %s" % s
470                 Logger.log(['cannot find suite %s' % s])
471     else:
472         query = session.query(Suite).filter(Suite.untouchable == False)
473         if 'Archive' in Options:
474             query = query.join(Suite.archive).filter(Archive.archive_name==Options['Archive'])
475         suites = query.all()
476
477     broken=[]
478
479     for s in suites:
480         # Setup a multiprocessing Pool. As many workers as we have CPU cores.
481         if s.untouchable and not Options["Force"]:
482             print "Skipping %s (untouchable)" % s.suite_name
483             continue
484
485         if not Options["Quiet"]:
486             print "Processing %s" % s.suite_name
487         Logger.log(['Processing release file for Suite: %s' % (s.suite_name)])
488         pool.apply_async(generate_helper, (s.suite_id, ))
489
490     # No more work will be added to our pool, close it and then wait for all to finish
491     pool.close()
492     pool.join()
493
494     retcode = pool.overall_status()
495
496     if retcode > 0:
497         # TODO: CENTRAL FUNCTION FOR THIS / IMPROVE LOGGING
498         Logger.log(['Release file generation broken: %s' % (','.join([str(x[1]) for x in pool.results]))])
499
500     Logger.close()
501
502     sys.exit(retcode)
503
504 def generate_helper(suite_id):
505     '''
506     This function is called in a new subprocess.
507     '''
508     session = DBConn().session()
509     suite = Suite.get(suite_id, session)
510
511     # We allow the process handler to catch and deal with any exceptions
512     rw = ReleaseWriter(suite)
513     rw.generate_release_files()
514
515     return (PROC_STATUS_SUCCESS, 'Release file written for %s' % suite.suite_name)
516
517 #######################################################################################
518
519 if __name__ == '__main__':
520     main()