]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
do all binaries, not just the first
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import gzip
41 import threading
42 import Queue
43 import apt_pkg
44 from daklib import utils
45 from daklib.binary import Binary
46 from daklib.config import Config
47 from daklib.dbconn import *
48
49 ################################################################################
50
51 def usage (exit_code=0):
52     print """Usage: dak contents [options] command [arguments]
53
54 COMMANDS
55     generate
56         generate Contents-$arch.gz files
57
58     bootstrap_bin
59         scan the debs in the existing pool and load contents into the bin_contents table
60
61     cruft
62         remove files/paths which are no longer referenced by a binary
63
64 OPTIONS
65      -h, --help
66         show this help and exit
67
68      -v, --verbose
69         show verbose information messages
70
71      -q, --quiet
72         supress all output but errors
73
74      -s, --suite={stable,testing,unstable,...}
75         only operate on a single suite
76 """
77     sys.exit(exit_code)
78
79 ################################################################################
80
81 # where in dak.conf all of our configuration will be stowed
82
83 options_prefix = "Contents"
84 options_prefix = "%s::Options" % options_prefix
85
86 log = logging.getLogger()
87
88 ################################################################################
89
90 class EndOfContents(object):
91     """
92     A sentry object for the end of the filename stream
93     """
94     pass
95
96 class GzippedContentWriter(object):
97     """
98     An object which will write contents out to a Contents-$arch.gz
99     file on a separate thread
100     """
101
102     header = None # a class object holding the header section of contents file
103
104     def __init__(self, filename):
105         """
106         @type filename: string
107         @param filename: the name of the file to write to
108         """
109         self.queue = Queue.Queue()
110         self.current_file = None
111         self.first_package = True
112         self.output = self.open_file(filename)
113         self.thread = threading.Thread(target=self.write_thread,
114                                        name='Contents writer')
115         self.thread.start()
116
117     def open_file(self, filename):
118         """
119         opens a gzip stream to the contents file
120         """
121         filepath = Config()["Contents::Root"] + filename
122         filedir = os.path.dirname(filepath)
123         if not os.path.isdir(filedir):
124             os.makedirs(filedir)
125         return gzip.open(filepath, "w")
126
127     def write(self, filename, section, package):
128         """
129         enqueue content to be written to the file on a separate thread
130         """
131         self.queue.put((filename,section,package))
132
133     def write_thread(self):
134         """
135         the target of a Thread which will do the actual writing
136         """
137         while True:
138             next = self.queue.get()
139             if isinstance(next, EndOfContents):
140                 self.output.write('\n')
141                 self.output.close()
142                 break
143
144             (filename,section,package)=next
145             if next != self.current_file:
146                 # this is the first file, so write the header first
147                 if not self.current_file:
148                     self.output.write(self._getHeader())
149
150                 self.output.write('\n%s\t' % filename)
151                 self.first_package = True
152
153             self.current_file=filename
154
155             if not self.first_package:
156                 self.output.write(',')
157             else:
158                 self.first_package=False
159             self.output.write('%s/%s' % (section,package))
160
161     def finish(self):
162         """
163         enqueue the sentry object so that writers will know to terminate
164         """
165         self.queue.put(EndOfContents())
166
167     @classmethod
168     def _getHeader(self):
169         """
170         Internal method to return the header for Contents.gz files
171
172         This is boilerplate which explains the contents of the file and how
173         it can be used.
174         """
175         if not GzippedContentWriter.header:
176             if Config().has_key("Contents::Header"):
177                 try:
178                     h = open(os.path.join( Config()["Dir::Templates"],
179                                            Config()["Contents::Header"] ), "r")
180                     GzippedContentWriter.header = h.read()
181                     h.close()
182                 except:
183                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
184                                                                       traceback.format_exc() ))
185                     GzippedContentWriter.header = None
186             else:
187                 GzippedContentWriter.header = None
188
189         return GzippedContentWriter.header
190
191
192 class Contents(object):
193     """
194     Class capable of generating Contents-$arch.gz files
195     """
196
197     def __init__(self):
198         self.header = None
199
200     def reject(self, message):
201         log.error("E: %s" % message)
202
203     def cruft(self):
204         """
205         remove files/paths from the DB which are no longer referenced
206         by binaries and clean the temporary table
207         """
208         s = DBConn().session()
209
210         # clear out all of the temporarily stored content associations
211         # this should be run only after p-a has run.  after a p-a
212         # run we should have either accepted or rejected every package
213         # so there should no longer be anything in the queue
214         s.query(PendingContentAssociation).delete()
215
216         # delete any filenames we are storing which have no binary associated
217         # with them
218         cafq = s.query(ContentAssociation.filename_id).distinct()
219         cfq = s.query(ContentFilename)
220         cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
221         cfq.delete()
222
223         # delete any paths we are storing which have no binary associated with
224         # them
225         capq = s.query(ContentAssociation.filepath_id).distinct()
226         cpq = s.query(ContentFilepath)
227         cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
228         cpq.delete()
229
230         s.commit()
231
232
233     def bootstrap_bin(self):
234         """
235         scan the existing debs in the pool to populate the bin_contents table
236         """
237         pooldir = Config()[ 'Dir::Pool' ]
238
239         s = DBConn().session()
240
241         for binary in s.query(DBBinary).all() ):
242             filename = binary.poolfile.filename
243              # Check for existing contents
244             existingq = s.execute( "select 1 from bin_contents where binary_id=:id", {'id':binary.binary_id} );
245             if existingq.fetchone():
246                 log.debug( "already imported: %s" % (filename))
247             else:
248                 # We don't have existing contents so import them
249                 log.debug( "scanning: %s" % (filename) )
250
251                 debfile = os.path.join(pooldir, filename)
252                 if os.path.exists(debfile):
253                     Binary(debfile, self.reject).scan_package(binary.binary_id, True)
254                 else:
255                     log.error("missing .deb: %s" % filename)
256
257
258
259     def bootstrap(self):
260         """
261         scan the existing debs in the pool to populate the contents database tables
262         """
263         pooldir = Config()[ 'Dir::Pool' ]
264
265         s = DBConn().session()
266
267         for suite in s.query(Suite).all():
268             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
269                 q = s.query(BinAssociation).join(Suite)
270                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
271                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
272                 for ba in q:
273                     filename = ba.binary.poolfile.filename
274                     # Check for existing contents
275                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
276                     if existingq.count() > 0:
277                         log.debug( "already imported: %s" % (filename))
278                     else:
279                         # We don't have existing contents so import them
280                         log.debug( "scanning: %s" % (filename) )
281                         debfile = os.path.join(pooldir, filename)
282                         if os.path.exists(debfile):
283                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
284                         else:
285                             log.error("missing .deb: %s" % filename)
286
287
288     def generate(self):
289         """
290         Generate Contents-$arch.gz files for every available arch in each given suite.
291         """
292         session = DBConn().session()
293
294         arch_all_id = get_architecture("all", session).arch_id
295
296         # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
297         # This is HORRIBLY debian specific :-/
298         for dtype, section, fn_pattern in \
299               [('deb',  None,                        "dists/%s/Contents-%s.gz"),
300                ('udeb', "debian-installer",          "dists/%s/Contents-udeb-%s.gz"),
301                ('udeb', "non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
302
303             overridetype = get_override_type(dtype, session)
304
305             # For udebs, we only look in certain sections (see the for loop above)
306             if section is not None:
307                 section = get_section(section, session)
308
309             # Get our suites
310             for suite in which_suites():
311                 # Which architectures do we need to work on
312                 arch_list = get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=session)
313
314                 # Set up our file writer dictionary
315                 file_writers = {}
316                 try:
317                     # One file writer per arch
318                     for arch in arch_list:
319                         file_writers[arch.arch_id] = GzippedContentWriter(fn_pattern % (suite, arch.arch_string))
320
321                     for r in get_suite_contents(suite, overridetype, section, session=session).fetchall():
322                         filename, section, package, arch_id = r
323
324                         if arch_id == arch_all_id:
325                             # It's arch all, so all contents files get it
326                             for writer in file_writers.values():
327                                 writer.write(filename, section, package)
328                         else:
329                             if file_writers.has_key(arch_id):
330                                 file_writers[arch_id].write(filename, section, package)
331
332                 finally:
333                     # close all the files
334                     for writer in file_writers.values():
335                         writer.finish()
336
337 ################################################################################
338
339 def main():
340     cnf = Config()
341
342     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
343                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
344                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
345                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
346                 ]
347
348     commands = {'generate' : Contents.generate,
349                 'bootstrap_bin' : Contents.bootstrap_bin,
350                 'cruft' : Contents.cruft,
351                 }
352
353     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
354
355     if (len(args) < 1) or not commands.has_key(args[0]):
356         usage()
357
358     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
359         usage()
360
361     level=logging.INFO
362     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
363         level=logging.ERROR
364
365     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
366         level=logging.DEBUG
367
368
369     logging.basicConfig( level=level,
370                          format='%(asctime)s %(levelname)s %(message)s',
371                          stream = sys.stderr )
372
373     commands[args[0]](Contents())
374
375 def which_suites(session):
376     """
377     return a list of suites to operate on
378     """
379     if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
380         suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
381     else:
382         suites = Config().SubTree("Suite").List()
383
384     return [get_suite(s.lower(), session) for s in suites]
385
386
387 if __name__ == '__main__':
388     main()