]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
Merge commit 'djpig/djpig' into merge
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import gzip
41 import threading
42 import Queue
43 import apt_pkg
44 from daklib import utils
45 from daklib.binary import Binary
46 from daklib.config import Config
47 from daklib.dbconn import *
48
49 ################################################################################
50
51 def usage (exit_code=0):
52     print """Usage: dak contents [options] command [arguments]
53
54 COMMANDS
55     generate
56         generate Contents-$arch.gz files
57
58     bootstrap
59         scan the debs in the existing pool and load contents in the the database
60
61     cruft
62         remove files/paths which are no longer referenced by a binary
63
64 OPTIONS
65      -h, --help
66         show this help and exit
67
68      -v, --verbose
69         show verbose information messages
70
71      -q, --quiet
72         supress all output but errors
73
74      -s, --suite={stable,testing,unstable,...}
75         only operate on a single suite
76 """
77     sys.exit(exit_code)
78
79 ################################################################################
80
81 # where in dak.conf all of our configuration will be stowed
82
83 options_prefix = "Contents"
84 options_prefix = "%s::Options" % options_prefix
85
86 log = logging.getLogger()
87
88 ################################################################################
89
90 class EndOfContents(object):
91     """
92     A sentry object for the end of the filename stream
93     """
94     pass
95
96 class GzippedContentWriter(object):
97     """
98     An object which will write contents out to a Contents-$arch.gz
99     file on a separate thread
100     """
101
102     header = None # a class object holding the header section of contents file
103
104     def __init__(self, filename):
105         """
106         @type filename: string
107         @param filename: the name of the file to write to
108         """
109         self.queue = Queue.Queue()
110         self.current_file = None
111         self.first_package = True
112         self.output = self.open_file(filename)
113         self.thread = threading.Thread(target=self.write_thread,
114                                        name='Contents writer')
115         self.thread.start()
116
117     def open_file(self, filename):
118         """
119         opens a gzip stream to the contents file
120         """
121         filepath = Config()["Contents::Root"] + filename
122         filedir = os.path.dirname(filepath)
123         if not os.path.isdir(filedir):
124             os.makedirs(filedir)
125         return gzip.open(filepath, "w")
126
127     def write(self, filename, section, package):
128         """
129         enqueue content to be written to the file on a separate thread
130         """
131         self.queue.put((filename,section,package))
132
133     def write_thread(self):
134         """
135         the target of a Thread which will do the actual writing
136         """
137         while True:
138             next = self.queue.get()
139             if isinstance(next, EndOfContents):
140                 self.output.write('\n')
141                 self.output.close()
142                 break
143
144             (filename,section,package)=next
145             if next != self.current_file:
146                 # this is the first file, so write the header first
147                 if not self.current_file:
148                     self.output.write(self._getHeader())
149
150                 self.output.write('\n%s\t' % filename)
151                 self.first_package = True
152
153             self.current_file=filename
154
155             if not self.first_package:
156                 self.output.write(',')
157             else:
158                 self.first_package=False
159             self.output.write('%s/%s' % (section,package))
160
161     def finish(self):
162         """
163         enqueue the sentry object so that writers will know to terminate
164         """
165         self.queue.put(EndOfContents())
166
167     @classmethod
168     def _getHeader(self):
169         """
170         Internal method to return the header for Contents.gz files
171
172         This is boilerplate which explains the contents of the file and how
173         it can be used.
174         """
175         if not GzippedContentWriter.header:
176             if Config().has_key("Contents::Header"):
177                 try:
178                     h = open(os.path.join( Config()["Dir::Templates"],
179                                            Config()["Contents::Header"] ), "r")
180                     GzippedContentWriter.header = h.read()
181                     h.close()
182                 except:
183                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
184                                                                       traceback.format_exc() ))
185                     GzippedContentWriter.header = None
186             else:
187                 GzippedContentWriter.header = None
188
189         return GzippedContentWriter.header
190
191
192 class Contents(object):
193     """
194     Class capable of generating Contents-$arch.gz files
195     """
196
197     def __init__(self):
198         self.header = None
199
200     def reject(self, message):
201         log.error("E: %s" % message)
202
203     def cruft(self):
204         """
205         remove files/paths from the DB which are no longer referenced
206         by binaries and clean the temporary table
207         """
208         s = DBConn().session()
209
210         # clear out all of the temporarily stored content associations
211         # this should be run only after p-a has run.  after a p-a
212         # run we should have either accepted or rejected every package
213         # so there should no longer be anything in the queue
214         s.query(PendingContentAssociation).delete()
215
216         # delete any filenames we are storing which have no binary associated
217         # with them
218         cafq = s.query(ContentAssociation.filename_id).distinct()
219         cfq = s.query(ContentFilename)
220         cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
221         cfq.delete()
222
223         # delete any paths we are storing which have no binary associated with
224         # them
225         capq = s.query(ContentAssociation.filepath_id).distinct()
226         cpq = s.query(ContentFilepath)
227         cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
228         cpq.delete()
229
230         s.commit()
231
232
233     def bootstrap(self):
234         """
235         scan the existing debs in the pool to populate the contents database tables
236         """
237         pooldir = Config()[ 'Dir::Pool' ]
238
239         s = DBConn().session()
240
241         for suite in s.query(Suite).all():
242             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
243                 q = s.query(BinAssociation).join(Suite)
244                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
245                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
246                 for ba in q:
247                     filename = ba.binary.poolfile.filename
248                     # Check for existing contents
249                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
250                     if existingq.count() > 0:
251                         log.debug( "already imported: %s" % (filename))
252                     else:
253                         # We don't have existing contents so import them
254                         log.debug( "scanning: %s" % (filename) )
255                         debfile = os.path.join(pooldir, filename)
256                         if os.path.exists(debfile):
257                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
258                         else:
259                             log.error("missing .deb: %s" % filename)
260
261
262     def generate(self):
263         """
264         Generate Contents-$arch.gz files for every available arch in each given suite.
265         """
266         session = DBConn().session()
267
268         arch_all_id = get_architecture("all", session).arch_id
269
270         # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
271         # This is HORRIBLY debian specific :-/
272         for dtype, section, fn_pattern in \
273               [('deb',  None,                        "dists/%s/Contents-%s.gz"),
274                ('udeb', "debian-installer",          "dists/%s/Contents-udeb-%s.gz"),
275                ('udeb', "non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
276
277             overridetype = get_override_type(dtype, session)
278
279             # For udebs, we only look in certain sections (see the for loop above)
280             if section is not None:
281                 section = get_section(section, session)
282
283             # Get our suites
284             for suite in which_suites():
285                 # Which architectures do we need to work on
286                 arch_list = get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=session)
287
288                 # Set up our file writer dictionary
289                 file_writers = {}
290                 try:
291                     # One file writer per arch
292                     for arch in arch_list:
293                         file_writers[arch.arch_id] = GzippedContentWriter(fn_pattern % (suite, arch.arch_string))
294
295                     for r in get_suite_contents(suite, overridetype, section, session=session).fetchall():
296                         filename, section, package, arch_id = r
297
298                         if arch_id == arch_all_id:
299                             # It's arch all, so all contents files get it
300                             for writer in file_writers.values():
301                                 writer.write(filename, section, package)
302                         else:
303                             if file_writers.has_key(arch_id):
304                                 file_writers[arch_id].write(filename, section, package)
305
306                 finally:
307                     # close all the files
308                     for writer in file_writers.values():
309                         writer.finish()
310
311 ################################################################################
312
313 def main():
314     cnf = Config()
315
316     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
317                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
318                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
319                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
320                 ]
321
322     commands = {'generate' : Contents.generate,
323                 'bootstrap' : Contents.bootstrap,
324                 'cruft' : Contents.cruft,
325                 }
326
327     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
328
329     if (len(args) < 1) or not commands.has_key(args[0]):
330         usage()
331
332     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
333         usage()
334
335     level=logging.INFO
336     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
337         level=logging.ERROR
338
339     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
340         level=logging.DEBUG
341
342
343     logging.basicConfig( level=level,
344                          format='%(asctime)s %(levelname)s %(message)s',
345                          stream = sys.stderr )
346
347     commands[args[0]](Contents())
348
349 def which_suites(session):
350     """
351     return a list of suites to operate on
352     """
353     if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
354         suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
355     else:
356         suites = Config().SubTree("Suite").List()
357
358     return [get_suite(s.lower(), session) for s in suites]
359
360
361 if __name__ == '__main__':
362     main()