]> git.decadent.org.uk Git - dak.git/blob - dak/contents.py
dinstall
[dak.git] / dak / contents.py
1 #!/usr/bin/env python
2 """
3 Create all the contents files
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
7 @copyright: 2009 Mike O'Connor <stew@debian.org>
8 @license: GNU General Public License version 2 or later
9 """
10
11 ################################################################################
12
13 # This program is free software; you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation; either version 2 of the License, or
16 # (at your option) any later version.
17
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 # GNU General Public License for more details.
22
23 # You should have received a copy of the GNU General Public License
24 # along with this program; if not, write to the Free Software
25 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
27 ################################################################################
28
29 # <Ganneff> there is the idea to slowly replace contents files
30 # <Ganneff> with a new generation of such files.
31 # <Ganneff> having more info.
32
33 # <Ganneff> of course that wont help for now where we need to generate them :)
34
35 ################################################################################
36
37 import sys
38 import os
39 import logging
40 import gzip
41 import threading
42 import traceback
43 import Queue
44 import apt_pkg
45 from daklib import utils
46 from daklib.binary import Binary
47 from daklib.config import Config
48 from daklib.dbconn import *
49
50 ################################################################################
51
52 def usage (exit_code=0):
53     print """Usage: dak contents [options] command [arguments]
54
55 COMMANDS
56     generate
57         generate Contents-$arch.gz files
58
59     bootstrap_bin
60         scan the debs in the existing pool and load contents into the bin_contents table
61
62     cruft
63         remove files/paths which are no longer referenced by a binary
64
65 OPTIONS
66      -h, --help
67         show this help and exit
68
69      -v, --verbose
70         show verbose information messages
71
72      -q, --quiet
73         supress all output but errors
74
75      -s, --suite={stable,testing,unstable,...}
76         only operate on a single suite
77 """
78     sys.exit(exit_code)
79
80 ################################################################################
81
82 # where in dak.conf all of our configuration will be stowed
83
84 options_prefix = "Contents"
85 options_prefix = "%s::Options" % options_prefix
86
87 log = logging.getLogger()
88
89 ################################################################################
90
91 class EndOfContents(object):
92     """
93     A sentry object for the end of the filename stream
94     """
95     pass
96
97 class GzippedContentWriter(object):
98     """
99     An object which will write contents out to a Contents-$arch.gz
100     file on a separate thread
101     """
102
103     header = None # a class object holding the header section of contents file
104
105     def __init__(self, filename):
106         """
107         @type filename: string
108         @param filename: the name of the file to write to
109         """
110         self.queue = Queue.Queue()
111         self.current_file = None
112         self.first_package = True
113         self.output = self.open_file(filename)
114         self.thread = threading.Thread(target=self.write_thread,
115                                        name='Contents writer')
116         self.thread.start()
117
118     def open_file(self, filename):
119         """
120         opens a gzip stream to the contents file
121         """
122         filepath = Config()["Contents::Root"] + filename
123         filedir = os.path.dirname(filepath)
124         if not os.path.isdir(filedir):
125             os.makedirs(filedir)
126         return gzip.open(filepath, "w")
127
128     def write(self, filename, section, package):
129         """
130         enqueue content to be written to the file on a separate thread
131         """
132         self.queue.put((filename,section,package))
133
134     def write_thread(self):
135         """
136         the target of a Thread which will do the actual writing
137         """
138         while True:
139             next = self.queue.get()
140             if isinstance(next, EndOfContents):
141                 self.output.write('\n')
142                 self.output.close()
143                 break
144
145             (filename,section,package)=next
146             if next != self.current_file:
147                 # this is the first file, so write the header first
148                 if not self.current_file:
149                     self.output.write(self._getHeader())
150
151                 self.output.write('\n%s\t' % filename)
152                 self.first_package = True
153
154             self.current_file=filename
155
156             if not self.first_package:
157                 self.output.write(',')
158             else:
159                 self.first_package=False
160             self.output.write('%s/%s' % (section,package))
161
162     def finish(self):
163         """
164         enqueue the sentry object so that writers will know to terminate
165         """
166         self.queue.put(EndOfContents())
167
168     @classmethod
169     def _getHeader(self):
170         """
171         Internal method to return the header for Contents.gz files
172
173         This is boilerplate which explains the contents of the file and how
174         it can be used.
175         """
176         if not GzippedContentWriter.header:
177             if Config().has_key("Contents::Header"):
178                 try:
179                     h = open(os.path.join( Config()["Dir::Templates"],
180                                            Config()["Contents::Header"] ), "r")
181                     GzippedContentWriter.header = h.read()
182                     h.close()
183                 except:
184                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
185                                                                       traceback.format_exc() ))
186                     GzippedContentWriter.header = None
187             else:
188                 GzippedContentWriter.header = None
189
190         return GzippedContentWriter.header
191
192
193 class Contents(object):
194     """
195     Class capable of generating Contents-$arch.gz files
196     """
197
198     def __init__(self):
199         self.header = None
200
201     def reject(self, message):
202         log.error("E: %s" % message)
203
204     def cruft(self):
205         """
206         remove files/paths from the DB which are no longer referenced
207         by binaries and clean the temporary table
208         """
209         s = DBConn().session()
210
211         # clear out all of the temporarily stored content associations
212         # this should be run only after p-a has run.  after a p-a
213         # run we should have either accepted or rejected every package
214         # so there should no longer be anything in the queue
215         s.query(PendingContentAssociation).delete()
216
217         # delete any filenames we are storing which have no binary associated
218         # with them
219         cafq = s.query(ContentAssociation.filename_id).distinct()
220         cfq = s.query(ContentFilename)
221         cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
222         cfq.delete()
223
224         # delete any paths we are storing which have no binary associated with
225         # them
226         capq = s.query(ContentAssociation.filepath_id).distinct()
227         cpq = s.query(ContentFilepath)
228         cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
229         cpq.delete()
230
231         s.commit()
232
233
234     def bootstrap_bin(self):
235         """
236         scan the existing debs in the pool to populate the bin_contents table
237         """
238         pooldir = Config()[ 'Dir::Pool' ]
239
240         s = DBConn().session()
241
242         #        for binary in s.query(DBBinary).all() ):
243         binary = s.query(DBBinary).first()
244         if binary:
245             filename = binary.poolfile.filename
246              # Check for existing contents
247             existingq = s.execute( "select 1 from bin_contents where binary_id=:id", {'id':binary.binary_id} );
248             if existingq.fetchone():
249                 log.debug( "already imported: %s" % (filename))
250             else:
251                 # We don't have existing contents so import them
252                 log.debug( "scanning: %s" % (filename) )
253
254                 debfile = os.path.join(pooldir, filename)
255                 if os.path.exists(debfile):
256                     Binary(debfile, self.reject).scan_package(binary.binary_id, True)
257                 else:
258                     log.error("missing .deb: %s" % filename)
259
260
261
262     def bootstrap(self):
263         """
264         scan the existing debs in the pool to populate the contents database tables
265         """
266         pooldir = Config()[ 'Dir::Pool' ]
267
268         s = DBConn().session()
269
270         for suite in s.query(Suite).all():
271             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
272                 q = s.query(BinAssociation).join(Suite)
273                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
274                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
275                 for ba in q:
276                     filename = ba.binary.poolfile.filename
277                     # Check for existing contents
278                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
279                     if existingq.count() > 0:
280                         log.debug( "already imported: %s" % (filename))
281                     else:
282                         # We don't have existing contents so import them
283                         log.debug( "scanning: %s" % (filename) )
284                         debfile = os.path.join(pooldir, filename)
285                         if os.path.exists(debfile):
286                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
287                         else:
288                             log.error("missing .deb: %s" % filename)
289
290
291     def generate(self):
292         """
293         Generate Contents-$arch.gz files for every available arch in each given suite.
294         """
295         session = DBConn().session()
296
297         arch_all_id = get_architecture("all", session).arch_id
298
299         # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
300         # This is HORRIBLY debian specific :-/
301         for dtype, section, fn_pattern in \
302               [('deb',  None,                        "dists/%s/Contents-%s.gz"),
303                ('udeb', "debian-installer",          "dists/%s/Contents-udeb-%s.gz"),
304                ('udeb', "non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
305
306             overridetype = get_override_type(dtype, session)
307
308             # For udebs, we only look in certain sections (see the for loop above)
309             if section is not None:
310                 section = get_section(section, session)
311
312             # Get our suites
313             for suite in which_suites():
314                 # Which architectures do we need to work on
315                 arch_list = get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=session)
316
317                 # Set up our file writer dictionary
318                 file_writers = {}
319                 try:
320                     # One file writer per arch
321                     for arch in arch_list:
322                         file_writers[arch.arch_id] = GzippedContentWriter(fn_pattern % (suite, arch.arch_string))
323
324                     for r in get_suite_contents(suite, overridetype, section, session=session).fetchall():
325                         filename, section, package, arch_id = r
326
327                         if arch_id == arch_all_id:
328                             # It's arch all, so all contents files get it
329                             for writer in file_writers.values():
330                                 writer.write(filename, section, package)
331                         else:
332                             if file_writers.has_key(arch_id):
333                                 file_writers[arch_id].write(filename, section, package)
334
335                 finally:
336                     # close all the files
337                     for writer in file_writers.values():
338                         writer.finish()
339
340 ################################################################################
341
342 def main():
343     cnf = Config()
344
345     arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")),
346                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
347                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
348                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
349                 ]
350
351     commands = {'generate' : Contents.generate,
352                 'bootstrap_bin' : Contents.bootstrap_bin,
353                 'cruft' : Contents.cruft,
354                 }
355
356     args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
357
358     if (len(args) < 1) or not commands.has_key(args[0]):
359         usage()
360
361     if cnf.has_key("%s::%s" % (options_prefix,"Help")):
362         usage()
363
364     level=logging.INFO
365     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
366         level=logging.ERROR
367
368     elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")):
369         level=logging.DEBUG
370
371
372     logging.basicConfig( level=level,
373                          format='%(asctime)s %(levelname)s %(message)s',
374                          stream = sys.stderr )
375
376     commands[args[0]](Contents())
377
378 def which_suites(session):
379     """
380     return a list of suites to operate on
381     """
382     if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
383         suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
384     else:
385         suites = Config().SubTree("Suite").List()
386
387     return [get_suite(s.lower(), session) for s in suites]
388
389
390 if __name__ == '__main__':
391     main()