]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
bootstrap_bin working
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import gzip
41 import threading
42 import Queue
43 import apt_pkg
44 from daklib import utils
45 from daklib.binary import Binary
46 from daklib.config import Config
47 from daklib.dbconn import *
48
49 ################################################################################
50
51 def usage (exit_code=0):
52     print """Usage: dak contents [options] command [arguments]
53
54 COMMANDS
55     generate
56         generate Contents-$arch.gz files
57
58     bootstrap_bin
59         scan the debs in the existing pool and load contents into the bin_contents table
60
61     cruft
62         remove files/paths which are no longer referenced by a binary
63
64 OPTIONS
65      -h, --help
66         show this help and exit
67
68      -v, --verbose
69         show verbose information messages
70
71      -q, --quiet
72         supress all output but errors
73
74      -s, --suite={stable,testing,unstable,...}
75         only operate on a single suite
76 """
77     sys.exit(exit_code)
78
79 ################################################################################
80
81 # where in dak.conf all of our configuration will be stowed
82
83 options_prefix = "Contents"
84 options_prefix = "%s::Options" % options_prefix
85
86 log = logging.getLogger()
87
88 ################################################################################
89
90 class EndOfContents(object):
91     """
92     A sentry object for the end of the filename stream
93     """
94     pass
95
96 class GzippedContentWriter(object):
97     """
98     An object which will write contents out to a Contents-$arch.gz
99     file on a separate thread
100     """
101
102     header = None # a class object holding the header section of contents file
103
104     def __init__(self, filename):
105         """
106         @type filename: string
107         @param filename: the name of the file to write to
108         """
109         self.queue = Queue.Queue()
110         self.current_file = None
111         self.first_package = True
112         self.output = self.open_file(filename)
113         self.thread = threading.Thread(target=self.write_thread,
114                                        name='Contents writer')
115         self.thread.start()
116
117     def open_file(self, filename):
118         """
119         opens a gzip stream to the contents file
120         """
121         filepath = Config()["Contents::Root"] + filename
122         filedir = os.path.dirname(filepath)
123         if not os.path.isdir(filedir):
124             os.makedirs(filedir)
125         return gzip.open(filepath, "w")
126
127     def write(self, filename, section, package):
128         """
129         enqueue content to be written to the file on a separate thread
130         """
131         self.queue.put((filename,section,package))
132
133     def write_thread(self):
134         """
135         the target of a Thread which will do the actual writing
136         """
137         while True:
138             next = self.queue.get()
139             if isinstance(next, EndOfContents):
140                 self.output.write('\n')
141                 self.output.close()
142                 break
143
144             (filename,section,package)=next
145             if next != self.current_file:
146                 # this is the first file, so write the header first
147                 if not self.current_file:
148                     self.output.write(self._getHeader())
149
150                 self.output.write('\n%s\t' % filename)
151                 self.first_package = True
152
153             self.current_file=filename
154
155             if not self.first_package:
156                 self.output.write(',')
157             else:
158                 self.first_package=False
159             self.output.write('%s/%s' % (section,package))
160
161     def finish(self):
162         """
163         enqueue the sentry object so that writers will know to terminate
164         """
165         self.queue.put(EndOfContents())
166
167     @classmethod
168     def _getHeader(self):
169         """
170         Internal method to return the header for Contents.gz files
171
172         This is boilerplate which explains the contents of the file and how
173         it can be used.
174         """
175         if not GzippedContentWriter.header:
176             if Config().has_key("Contents::Header"):
177                 try:
178                     h = open(os.path.join( Config()["Dir::Templates"],
179                                            Config()["Contents::Header"] ), "r")
180                     GzippedContentWriter.header = h.read()
181                     h.close()
182                 except:
183                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
184                                                                       traceback.format_exc() ))
185                     GzippedContentWriter.header = None
186             else:
187                 GzippedContentWriter.header = None
188
189         return GzippedContentWriter.header
190
191
192 class Contents(object):
193     """
194     Class capable of generating Contents-$arch.gz files
195     """
196
197     def __init__(self):
198         self.header = None
199
200     def reject(self, message):
201         log.error("E: %s" % message)
202
203     def cruft(self):
204         """
205         remove files/paths from the DB which are no longer referenced
206         by binaries and clean the temporary table
207         """
208         s = DBConn().session()
209
210         # clear out all of the temporarily stored content associations
211         # this should be run only after p-a has run.  after a p-a
212         # run we should have either accepted or rejected every package
213         # so there should no longer be anything in the queue
214         s.query(PendingContentAssociation).delete()
215
216         # delete any filenames we are storing which have no binary associated
217         # with them
218         cafq = s.query(ContentAssociation.filename_id).distinct()
219         cfq = s.query(ContentFilename)
220         cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
221         cfq.delete()
222
223         # delete any paths we are storing which have no binary associated with
224         # them
225         capq = s.query(ContentAssociation.filepath_id).distinct()
226         cpq = s.query(ContentFilepath)
227         cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
228         cpq.delete()
229
230         s.commit()
231
232
233     def bootstrap_bin(self):
234         """
235         scan the existing debs in the pool to populate the bin_contents table
236         """
237         pooldir = Config()[ 'Dir::Pool' ]
238
239         s = DBConn().session()
240
241         #        for binary in s.query(DBBinary).all() ):
242         binary = s.query(DBBinary).first()
243         if binary:
244             filename = binary.poolfile.filename
245              # Check for existing contents
246             existingq = s.execute( "select 1 from bin_contents where binary_id=:id", {'id':binary.binary_id} );
247             if existingq.fetchone():
248                 log.debug( "already imported: %s" % (filename))
249             else:
250                 # We don't have existing contents so import them
251                 log.debug( "scanning: %s" % (filename) )
252
253                 debfile = os.path.join(pooldir, filename)
254                 if os.path.exists(debfile):
255                     Binary(debfile, self.reject).scan_package(binary.binary_id, True)
256                 else:
257                     log.error("missing .deb: %s" % filename)
258
259
260
261     def bootstrap(self):
262         """
263         scan the existing debs in the pool to populate the contents database tables
264         """
265         pooldir = Config()[ 'Dir::Pool' ]
266
267         s = DBConn().session()
268
269         for suite in s.query(Suite).all():
270             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
271                 q = s.query(BinAssociation).join(Suite)
272                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
273                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
274                 for ba in q:
275                     filename = ba.binary.poolfile.filename
276                     # Check for existing contents
277                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
278                     if existingq.count() > 0:
279                         log.debug( "already imported: %s" % (filename))
280                     else:
281                         # We don't have existing contents so import them
282                         log.debug( "scanning: %s" % (filename) )
283                         debfile = os.path.join(pooldir, filename)
284                         if os.path.exists(debfile):
285                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
286                         else:
287                             log.error("missing .deb: %s" % filename)
288
289
290     def generate(self):
291         """
292         Generate Contents-$arch.gz files for every available arch in each given suite.
293         """
294         session = DBConn().session()
295
296         arch_all_id = get_architecture("all", session).arch_id
297
298         # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
299         # This is HORRIBLY debian specific :-/
300         for dtype, section, fn_pattern in \
301               [('deb',  None,                        "dists/%s/Contents-%s.gz"),
302                ('udeb', "debian-installer",          "dists/%s/Contents-udeb-%s.gz"),
303                ('udeb', "non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
304
305             overridetype = get_override_type(dtype, session)
306
307             # For udebs, we only look in certain sections (see the for loop above)
308             if section is not None:
309                 section = get_section(section, session)
310
311             # Get our suites
312             for suite in which_suites():
313                 # Which architectures do we need to work on
314                 arch_list = get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=session)
315
316                 # Set up our file writer dictionary
317                 file_writers = {}
318                 try:
319                     # One file writer per arch
320                     for arch in arch_list:
321                         file_writers[arch.arch_id] = GzippedContentWriter(fn_pattern % (suite, arch.arch_string))
322
323                     for r in get_suite_contents(suite, overridetype, section, session=session).fetchall():
324                         filename, section, package, arch_id = r
325
326                         if arch_id == arch_all_id:
327                             # It's arch all, so all contents files get it
328                             for writer in file_writers.values():
329                                 writer.write(filename, section, package)
330                         else:
331                             if file_writers.has_key(arch_id):
332                                 file_writers[arch_id].write(filename, section, package)
333
334                 finally:
335                     # close all the files
336                     for writer in file_writers.values():
337                         writer.finish()
338
339 ################################################################################
340
341 def main():
342     cnf = Config()
343
344     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
345                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
346                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
347                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
348                 ]
349
350     commands = {'generate' : Contents.generate,
351                 'bootstrap_bin' : Contents.bootstrap_bin,
352                 'cruft' : Contents.cruft,
353                 }
354
355     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
356
357     if (len(args) < 1) or not commands.has_key(args[0]):
358         usage()
359
360     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
361         usage()
362
363     level=logging.INFO
364     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
365         level=logging.ERROR
366
367     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
368         level=logging.DEBUG
369
370
371     logging.basicConfig( level=level,
372                          format='%(asctime)s %(levelname)s %(message)s',
373                          stream = sys.stderr )
374
375     commands[args[0]](Contents())
376
377 def which_suites(session):
378     """
379     return a list of suites to operate on
380     """
381     if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
382         suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
383     else:
384         suites = Config().SubTree("Suite").List()
385
386     return [get_suite(s.lower(), session) for s in suites]
387
388
389 if __name__ == '__main__':
390     main()