]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
Merge branch 'merge'
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import math
41 import gzip
42 import threading
43 import Queue
44 import apt_pkg
45 from daklib import utils
46 from daklib.binary import Binary
47 from daklib.config import Config
48 from daklib.dbconn import DBConn
49 ################################################################################
50
51 def usage (exit_code=0):
52     print """Usage: dak contents [options] command [arguments]
53
54 COMMANDS
55     generate
56         generate Contents-$arch.gz files
57
58     bootstrap
59         scan the debs in the existing pool and load contents in the the database
60
61     cruft
62         remove files/paths which are no longer referenced by a binary
63
64 OPTIONS
65      -h, --help
66         show this help and exit
67
68      -v, --verbose
69         show verbose information messages
70
71      -q, --quiet
72         supress all output but errors
73
74      -s, --suite={stable,testing,unstable,...}
75         only operate on a single suite
76 """
77     sys.exit(exit_code)
78
79 ################################################################################
80
81 # where in dak.conf all of our configuration will be stowed
82
83 options_prefix = "Contents"
84 options_prefix = "%s::Options" % options_prefix
85
86 log = logging.getLogger()
87
88 ################################################################################
89
90 # get all the arches delivered for a given suite
91 # this should probably exist somehere common
92 arches_q = """PREPARE arches_q(int) as
93               SELECT s.architecture, a.arch_string
94               FROM suite_architectures s
95               JOIN architecture a ON (s.architecture=a.id)
96                   WHERE suite = $1"""
97
98 # find me the .deb for a given binary id
99 debs_q = """PREPARE debs_q(int, int) as
100               SELECT b.id, f.filename FROM bin_assoc_by_arch baa
101               JOIN binaries b ON baa.bin=b.id
102               JOIN files f ON b.file=f.id
103               WHERE suite = $1
104                   AND arch = $2"""
105
106 # ask if we already have contents associated with this binary
107 olddeb_q = """PREPARE olddeb_q(int) as
108               SELECT 1 FROM content_associations
109               WHERE binary_pkg = $1
110               LIMIT 1"""
111
112 # find me all of the contents for a given .deb
113 contents_q = """PREPARE contents_q(int,int) as
114                 SELECT (p.path||'/'||n.file) AS fn,
115                         s.section,
116                         b.package,
117                         b.architecture
118                FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
119                JOIN content_file_names n ON (c.filename=n.id)
120                JOIN binaries b ON (b.id=c.binary_pkg)
121                JOIN override o ON (o.package=b.package)
122                JOIN section s ON (s.id=o.section)
123                WHERE o.suite = $1 AND o.type = $2
124                AND b.type='deb'
125                ORDER BY fn"""
126
127 # find me all of the contents for a given .udeb
128 udeb_contents_q = """PREPARE udeb_contents_q(int,int,int) as
129               SELECT (p.path||'/'||n.file) AS fn,
130                         s.section,
131                         b.package,
132                         b.architecture
133                FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
134                JOIN content_file_names n ON (c.filename=n.id)
135                JOIN binaries b ON (b.id=c.binary_pkg)
136                JOIN override o ON (o.package=b.package)
137                JOIN section s ON (s.id=o.section)
138                WHERE o.suite = $1 AND o.type = $2
139                AND s.id = $3
140                AND b.type='udeb'
141                ORDER BY fn"""
142
143 #               FROM content_file_paths p join content_associations c ON (c.filepath=p.id)
144 #               JOIN content_file_names n ON (c.filename=n.id)
145 #               JOIN binaries b ON (b.id=c.binary_pkg)
146 #               JOIN override o ON (o.package=b.package)
147 #               JOIN section s ON (s.id=o.section)
148 #               WHERE o.suite = $1 AND o.type = $2
149 #               AND s.id = $3
150 #               AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='udeb')
151 #               GROUP BY fn
152 #               ORDER BY fn;"""
153
154
155
156 # clear out all of the temporarily stored content associations
157 # this should be run only after p-a has run.  after a p-a
158 # run we should have either accepted or rejected every package
159 # so there should no longer be anything in the queue
160 remove_pending_contents_cruft_q = """DELETE FROM pending_content_associations"""
161
162 # delete any filenames we are storing which have no binary associated with them
163 remove_filename_cruft_q = """DELETE FROM content_file_names
164                              WHERE id IN (SELECT cfn.id FROM content_file_names cfn
165                                           LEFT JOIN content_associations ca
166                                             ON ca.filename=cfn.id
167                                           WHERE ca.id IS NULL)"""
168
169 # delete any paths we are storing which have no binary associated with them
170 remove_filepath_cruft_q = """DELETE FROM content_file_paths
171                              WHERE id IN (SELECT cfn.id FROM content_file_paths cfn
172                                           LEFT JOIN content_associations ca
173                                              ON ca.filepath=cfn.id
174                                           WHERE ca.id IS NULL)"""
175
176 class EndOfContents(object):
177     """
178     A sentry object for the end of the filename stream
179     """
180     pass
181
182 class GzippedContentWriter(object):
183     """
184     An object which will write contents out to a Contents-$arch.gz
185     file on a separate thread
186     """
187
188     header = None # a class object holding the header section of contents file
189
190     def __init__(self, filename):
191         """
192         @ptype filename: string
193         @param filename: the name of the file to write to
194         """
195         self.queue = Queue.Queue()
196         self.current_file = None
197         self.first_package = True
198         self.output = self.open_file(filename)
199         self.thread = threading.Thread(target=self.write_thread,
200                                        name='Contents writer')
201         self.thread.start()
202
203     def open_file(self, filename):
204         """
205         opens a gzip stream to the contents file
206         """
207         filepath = Config()["Contents::Root"] + filename
208         filedir = os.path.dirname(filepath)
209         if not os.path.isdir(filedir):
210             os.makedirs(filedir)
211         return gzip.open(filepath, "w")
212
213     def write(self, filename, section, package):
214         """
215         enqueue content to be written to the file on a separate thread
216         """
217         self.queue.put((filename,section,package))
218
219     def write_thread(self):
220         """
221         the target of a Thread which will do the actual writing
222         """
223         while True:
224             next = self.queue.get()
225             if isinstance(next, EndOfContents):
226                 self.output.write('\n')
227                 self.output.close()
228                 break
229
230             (filename,section,package)=next
231             if next != self.current_file:
232                 # this is the first file, so write the header first
233                 if not self.current_file:
234                     self.output.write(self._getHeader())
235
236                 self.output.write('\n%s\t' % filename)
237                 self.first_package = True
238
239             self.current_file=filename
240
241             if not self.first_package:
242                 self.output.write(',')
243             else:
244                 self.first_package=False
245             self.output.write('%s/%s' % (section,package))
246
247     def finish(self):
248         """
249         enqueue the sentry object so that writers will know to terminate
250         """
251         self.queue.put(EndOfContents())
252
253     @classmethod
254     def _getHeader(self):
255         """
256         Internal method to return the header for Contents.gz files
257
258         This is boilerplate which explains the contents of the file and how
259         it can be used.
260         """
261         if not GzippedContentWriter.header:
262             if Config().has_key("Contents::Header"):
263                 try:
264                     h = open(os.path.join( Config()["Dir::Templates"],
265                                            Config()["Contents::Header"] ), "r")
266                     GzippedContentWriter.header = h.read()
267                     h.close()
268                 except:
269                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
270                                                                       traceback.format_exc() ))
271                     GzippedContentWriter.header = None
272             else:
273                 GzippedContentWriter.header = None
274
275         return GzippedContentWriter.header
276
277
278 class Contents(object):
279     """
280     Class capable of generating Contents-$arch.gz files
281
282     Usage GenerateContents().generateContents( ["main","contrib","non-free"] )
283     """
284
285     def __init__(self):
286         self.header = None
287
288     def reject(self, message):
289         log.error("E: %s" % message)
290
291     # goal column for section column
292     _goal_column = 54
293
294     def cruft(self):
295         """
296         remove files/paths from the DB which are no longer referenced
297         by binaries and clean the temporary table
298         """
299         cursor = DBConn().cursor();
300         cursor.execute( "BEGIN WORK" )
301         cursor.execute( remove_pending_contents_cruft_q )
302         cursor.execute( remove_filename_cruft_q )
303         cursor.execute( remove_filepath_cruft_q )
304         cursor.execute( "COMMIT" )
305
306
307     def bootstrap(self):
308         """
309         scan the existing debs in the pool to populate the contents database tables
310         """
311         pooldir = Config()[ 'Dir::Pool' ]
312
313         cursor = DBConn().cursor();
314         DBConn().prepare("debs_q",debs_q)
315         DBConn().prepare("olddeb_q",olddeb_q)
316         DBConn().prepare("arches_q",arches_q)
317
318         suites = self._suites()
319         for suite in [i.lower() for i in suites]:
320             suite_id = DBConn().get_suite_id(suite)
321
322             arch_list = self._arches(cursor, suite_id)
323             arch_all_id = DBConn().get_architecture_id("all")
324             for arch_id in arch_list:
325                 cursor.execute( "EXECUTE debs_q(%d, %d)" % ( suite_id, arch_id[0] ) )
326
327                 count = 0
328                 while True:
329                     deb = cursor.fetchone()
330                     if not deb:
331                         break
332                     count += 1
333                     cursor1 = DBConn().cursor();
334                     cursor1.execute( "EXECUTE olddeb_q(%d)" % (deb[0] ) )
335                     old = cursor1.fetchone()
336                     if old:
337                         log.debug( "already imported: %s" % (deb[1]) )
338                     else:
339                         log.debug( "scanning: %s" % (deb[1]) )
340                         debfile = os.path.join( pooldir, deb[1] )
341                         if os.path.exists( debfile ):
342                             Binary(debfile, self.reject).scan_package(deb[0],True)
343                         else:
344                             log.error("missing .deb: %s" % deb[1])
345
346     def generate(self):
347         """
348         Generate Contents-$arch.gz files for every available arch in each given suite.
349         """
350         cursor = DBConn().cursor()
351
352         DBConn().prepare("arches_q", arches_q)
353         DBConn().prepare("contents_q", contents_q)
354         DBConn().prepare("udeb_contents_q", udeb_contents_q)
355
356         debtype_id=DBConn().get_override_type_id("deb")
357         udebtype_id=DBConn().get_override_type_id("udeb")
358
359         arch_all_id = DBConn().get_architecture_id("all")
360         suites = self._suites()
361
362
363         # Get our suites, and the architectures
364         for suite in [i.lower() for i in suites]:
365             suite_id = DBConn().get_suite_id(suite)
366             arch_list = self._arches(cursor, suite_id)
367
368             file_writers = {}
369
370             try:
371                 for arch_id in arch_list:
372                     file_writers[arch_id[0]] = GzippedContentWriter("dists/%s/Contents-%s.gz" % (suite, arch_id[1]))
373
374                 cursor.execute("EXECUTE contents_q(%d,%d);" % (suite_id, debtype_id))
375
376                 while True:
377                     r = cursor.fetchone()
378                     if not r:
379                         break
380
381                     filename, section, package, arch = r
382
383                     if not file_writers.has_key( arch ):
384                         continue
385
386                     if arch == arch_all_id:
387                         ## its arch all, so all contents files get it
388                         for writer in file_writers.values():
389                             writer.write(filename, section, package)
390
391                     else:
392                         file_writers[arch].write(filename, section, package)
393
394             finally:
395                 # close all the files
396                 for writer in file_writers.values():
397                     writer.finish()
398
399
400             # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
401             # This is HORRIBLY debian specific :-/
402         for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s.gz"),
403                                     ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
404
405             section_id = DBConn().get_section_id(section) # all udebs should be here)
406             if section_id != -1:
407
408                 # Get our suites, and the architectures
409                 for suite in [i.lower() for i in suites]:
410                     suite_id = DBConn().get_suite_id(suite)
411                     arch_list = self._arches(cursor, suite_id)
412
413                     file_writers = {}
414
415                     try:
416                         for arch_id in arch_list:
417                             file_writers[arch_id[0]] = GzippedContentWriter(fn_pattern % (suite, arch_id[1]))
418
419                         cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (suite_id, udebtype_id, section_id))
420
421                         while True:
422                             r = cursor.fetchone()
423                             if not r:
424                                 break
425
426                             filename, section, package, arch = r
427
428                             if not file_writers.has_key( arch ):
429                                 continue
430
431                             if arch == arch_all_id:
432                                 ## its arch all, so all contents files get it
433                                 for writer in file_writers.values():
434                                     writer.write(filename, section, package)
435
436                             else:
437                                 file_writers[arch].write(filename, section, package)
438                     finally:
439                         # close all the files
440                         for writer in file_writers.values():
441                             writer.finish()
442
443
444
445 ################################################################################
446
447     def _suites(self):
448         """
449         return a list of suites to operate on
450         """
451         if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
452             suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
453         else:
454             suites = Config().SubTree("Suite").List()
455
456         return suites
457
458     def _arches(self, cursor, suite):
459         """
460         return a list of archs to operate on
461         """
462         arch_list = []
463         cursor.execute("EXECUTE arches_q(%d)" % (suite))
464         while True:
465             r = cursor.fetchone()
466             if not r:
467                 break
468
469             if r[1] != "source" and r[1] != "all":
470                 arch_list.append((r[0], r[1]))
471
472         return arch_list
473
474 ################################################################################
475
476
477 def main():
478     cnf = Config()
479
480     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
481                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
482                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
483                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
484                 ]
485
486     commands = {'generate' : Contents.generate,
487                 'bootstrap' : Contents.bootstrap,
488                 'cruft' : Contents.cruft,
489                 }
490
491     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
492
493     if (len(args) < 1) or not commands.has_key(args[0]):
494         usage()
495
496     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
497         usage()
498
499     level=logging.INFO
500     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
501         level=logging.ERROR
502
503     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
504         level=logging.DEBUG
505
506
507     logging.basicConfig( level=level,
508                          format='%(asctime)s %(levelname)s %(message)s',
509                          stream = sys.stderr )
510
511     commands[args[0]](Contents())
512
513 if __name__ == '__main__':
514     main()