]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
convert contents.py to the new API
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import math
41 import gzip
42 import threading
43 import Queue
44 import apt_pkg
45 from daklib import utils
46 from daklib.binary import Binary
47 from daklib.config import Config
48 from daklib.dbconn import *
49
50 ################################################################################
51
52 def usage (exit_code=0):
53     print """Usage: dak contents [options] command [arguments]
54
55 COMMANDS
56     generate
57         generate Contents-$arch.gz files
58
59     bootstrap
60         scan the debs in the existing pool and load contents in the the database
61
62     cruft
63         remove files/paths which are no longer referenced by a binary
64
65 OPTIONS
66      -h, --help
67         show this help and exit
68
69      -v, --verbose
70         show verbose information messages
71
72      -q, --quiet
73         supress all output but errors
74
75      -s, --suite={stable,testing,unstable,...}
76         only operate on a single suite
77 """
78     sys.exit(exit_code)
79
80 ################################################################################
81
82 # where in dak.conf all of our configuration will be stowed
83
84 options_prefix = "Contents"
85 options_prefix = "%s::Options" % options_prefix
86
87 log = logging.getLogger()
88
89 ################################################################################
90
91 # find me all of the contents for a given .deb
92 contents_q = """PREPARE contents_q(int,int) as
93                 SELECT (p.path||'/'||n.file) AS fn,
94                         s.section,
95                         b.package,
96                         b.architecture
97                FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
98                JOIN content_file_names n ON (c.filename=n.id)
99                JOIN binaries b ON (b.id=c.binary_pkg)
100                JOIN override o ON (o.package=b.package)
101                JOIN section s ON (s.id=o.section)
102                WHERE o.suite = $1 AND o.type = $2
103                AND b.type='deb'
104                ORDER BY fn"""
105
106 # find me all of the contents for a given .udeb
107 udeb_contents_q = """PREPARE udeb_contents_q(int,int,int) as
108               SELECT (p.path||'/'||n.file) AS fn,
109                         s.section,
110                         b.package,
111                         b.architecture
112                FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
113                JOIN content_file_names n ON (c.filename=n.id)
114                JOIN binaries b ON (b.id=c.binary_pkg)
115                JOIN override o ON (o.package=b.package)
116                JOIN section s ON (s.id=o.section)
117                WHERE o.suite = $1 AND o.type = $2
118                AND s.id = $3
119                AND b.type='udeb'
120                ORDER BY fn"""
121
122 #               FROM content_file_paths p join content_associations c ON (c.filepath=p.id)
123 #               JOIN content_file_names n ON (c.filename=n.id)
124 #               JOIN binaries b ON (b.id=c.binary_pkg)
125 #               JOIN override o ON (o.package=b.package)
126 #               JOIN section s ON (s.id=o.section)
127 #               WHERE o.suite = $1 AND o.type = $2
128 #               AND s.id = $3
129 #               AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='udeb')
130 #               GROUP BY fn
131 #               ORDER BY fn;"""
132
133 class EndOfContents(object):
134     """
135     A sentry object for the end of the filename stream
136     """
137     pass
138
139 class GzippedContentWriter(object):
140     """
141     An object which will write contents out to a Contents-$arch.gz
142     file on a separate thread
143     """
144
145     header = None # a class object holding the header section of contents file
146
147     def __init__(self, filename):
148         """
149         @ptype filename: string
150         @param filename: the name of the file to write to
151         """
152         self.queue = Queue.Queue()
153         self.current_file = None
154         self.first_package = True
155         self.output = self.open_file(filename)
156         self.thread = threading.Thread(target=self.write_thread,
157                                        name='Contents writer')
158         self.thread.start()
159
160     def open_file(self, filename):
161         """
162         opens a gzip stream to the contents file
163         """
164         filepath = Config()["Contents::Root"] + filename
165         filedir = os.path.dirname(filepath)
166         if not os.path.isdir(filedir):
167             os.makedirs(filedir)
168         return gzip.open(filepath, "w")
169
170     def write(self, filename, section, package):
171         """
172         enqueue content to be written to the file on a separate thread
173         """
174         self.queue.put((filename,section,package))
175
176     def write_thread(self):
177         """
178         the target of a Thread which will do the actual writing
179         """
180         while True:
181             next = self.queue.get()
182             if isinstance(next, EndOfContents):
183                 self.output.write('\n')
184                 self.output.close()
185                 break
186
187             (filename,section,package)=next
188             if next != self.current_file:
189                 # this is the first file, so write the header first
190                 if not self.current_file:
191                     self.output.write(self._getHeader())
192
193                 self.output.write('\n%s\t' % filename)
194                 self.first_package = True
195
196             self.current_file=filename
197
198             if not self.first_package:
199                 self.output.write(',')
200             else:
201                 self.first_package=False
202             self.output.write('%s/%s' % (section,package))
203
204     def finish(self):
205         """
206         enqueue the sentry object so that writers will know to terminate
207         """
208         self.queue.put(EndOfContents())
209
210     @classmethod
211     def _getHeader(self):
212         """
213         Internal method to return the header for Contents.gz files
214
215         This is boilerplate which explains the contents of the file and how
216         it can be used.
217         """
218         if not GzippedContentWriter.header:
219             if Config().has_key("Contents::Header"):
220                 try:
221                     h = open(os.path.join( Config()["Dir::Templates"],
222                                            Config()["Contents::Header"] ), "r")
223                     GzippedContentWriter.header = h.read()
224                     h.close()
225                 except:
226                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
227                                                                       traceback.format_exc() ))
228                     GzippedContentWriter.header = None
229             else:
230                 GzippedContentWriter.header = None
231
232         return GzippedContentWriter.header
233
234
235 class Contents(object):
236     """
237     Class capable of generating Contents-$arch.gz files
238
239     Usage GenerateContents().generateContents( ["main","contrib","non-free"] )
240     """
241
242     def __init__(self):
243         self.header = None
244
245     def reject(self, message):
246         log.error("E: %s" % message)
247
248     def cruft(self):
249         """
250         remove files/paths from the DB which are no longer referenced
251         by binaries and clean the temporary table
252         """
253         s = DBConn().session()
254
255         # clear out all of the temporarily stored content associations
256         # this should be run only after p-a has run.  after a p-a
257         # run we should have either accepted or rejected every package
258         # so there should no longer be anything in the queue
259         s.query(PendingContentsAssociation).delete()
260
261         # delete any filenames we are storing which have no binary associated
262         # with them
263         cafq = s.query(ContentAssociation.filename_id).distinct()
264         cfq = s.query(ContentFilename)
265         cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
266         cfq.delete()
267
268         # delete any paths we are storing which have no binary associated with
269         # them
270         capq = s.query(ContentAssociation.filepath_id).distinct()
271         cpq = s.query(ContentFilepath)
272         cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
273         cpq.delete()
274
275         s.commit()
276
277
278     def bootstrap(self):
279         """
280         scan the existing debs in the pool to populate the contents database tables
281         """
282         pooldir = Config()[ 'Dir::Pool' ]
283
284         s = DBConn().session()
285
286         for suite in s.query(Suite).all():
287             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
288                 q = s.query(BinAssociation).join(Suite)
289                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
290                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
291                 for ba in q:
292                     filename = ba.binary.poolfile.filename
293                     # Check for existing contents
294                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
295                     if existingq.count() > 0:
296                         log.debug( "already imported: %s" % (filename))
297                     else:
298                         # We don't have existing contents so import them
299                         log.debug( "scanning: %s" % (filename) )
300                         debfile = os.path.join(pooldir, filename)
301                         if os.path.exists(debfile):
302                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
303                         else:
304                             log.error("missing .deb: %s" % filename)
305
306
307     def generate(self):
308         """
309         Generate Contents-$arch.gz files for every available arch in each given suite.
310         """
311         cursor = DBConn().cursor()
312
313         DBConn().prepare("contents_q", contents_q)
314         DBConn().prepare("udeb_contents_q", udeb_contents_q)
315
316         debtype_id=DBConn().get_override_type_id("deb")
317         udebtype_id=DBConn().get_override_type_id("udeb")
318
319         arch_all_id = DBConn().get_architecture_id("all")
320         suites = self._suites()
321
322
323         # Get our suites, and the architectures
324         for suite in [i.lower() for i in suites]:
325             suite_id = DBConn().get_suite_id(suite)
326             arch_list = self._arches(cursor, suite_id)
327
328             file_writers = {}
329
330             try:
331                 for arch_id in arch_list:
332                     file_writers[arch_id[0]] = GzippedContentWriter("dists/%s/Contents-%s.gz" % (suite, arch_id[1]))
333
334                 cursor.execute("EXECUTE contents_q(%d,%d);" % (suite_id, debtype_id))
335
336                 while True:
337                     r = cursor.fetchone()
338                     if not r:
339                         break
340
341                     filename, section, package, arch = r
342
343                     if not file_writers.has_key( arch ):
344                         continue
345
346                     if arch == arch_all_id:
347                         ## its arch all, so all contents files get it
348                         for writer in file_writers.values():
349                             writer.write(filename, section, package)
350
351                     else:
352                         file_writers[arch].write(filename, section, package)
353
354             finally:
355                 # close all the files
356                 for writer in file_writers.values():
357                     writer.finish()
358
359
360             # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
361             # This is HORRIBLY debian specific :-/
362         for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s.gz"),
363                                     ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
364
365             section_id = DBConn().get_section_id(section) # all udebs should be here)
366             if section_id != -1:
367
368                 # Get our suites, and the architectures
369                 for suite in [i.lower() for i in suites]:
370                     suite_id = DBConn().get_suite_id(suite)
371                     arch_list = self._arches(cursor, suite_id)
372
373                     file_writers = {}
374
375                     try:
376                         for arch_id in arch_list:
377                             file_writers[arch_id[0]] = GzippedContentWriter(fn_pattern % (suite, arch_id[1]))
378
379                         cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (suite_id, udebtype_id, section_id))
380
381                         while True:
382                             r = cursor.fetchone()
383                             if not r:
384                                 break
385
386                             filename, section, package, arch = r
387
388                             if not file_writers.has_key( arch ):
389                                 continue
390
391                             if arch == arch_all_id:
392                                 ## its arch all, so all contents files get it
393                                 for writer in file_writers.values():
394                                     writer.write(filename, section, package)
395
396                             else:
397                                 file_writers[arch].write(filename, section, package)
398                     finally:
399                         # close all the files
400                         for writer in file_writers.values():
401                             writer.finish()
402
403
404
405 ################################################################################
406
407 def main():
408     cnf = Config()
409
410     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
411                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
412                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
413                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
414                 ]
415
416     commands = {'generate' : Contents.generate,
417                 'bootstrap' : Contents.bootstrap,
418                 'cruft' : Contents.cruft,
419                 }
420
421     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
422
423     if (len(args) < 1) or not commands.has_key(args[0]):
424         usage()
425
426     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
427         usage()
428
429     level=logging.INFO
430     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
431         level=logging.ERROR
432
433     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
434         level=logging.DEBUG
435
436
437     logging.basicConfig( level=level,
438                          format='%(asctime)s %(levelname)s %(message)s',
439                          stream = sys.stderr )
440
441     commands[args[0]](Contents())
442
443 if __name__ == '__main__':
444     main()