3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
72 (select distinct on (package) id, package from binaries
73 where type = :type and
74 (architecture = :arch_all or architecture = :arch) and
75 id in (select bin from bin_associations where suite = :suite)
76 order by package, version desc),
79 (select o.package, s.section
80 from override o, section s
81 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
82 o.component = :component)
84 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
85 from newest_binaries b, bin_contents bc, unique_override o
86 where b.id = bc.binary_id and o.package = b.package
89 return self.session.query("file", "pkglist").from_statement(sql). \
92 def formatline(self, filename, package_list):
94 Returns a formatted string for the filename argument.
96 return "%-55s %s\n" % (filename, package_list)
100 Yields a new line of the Contents-$arch.gz file in filename order.
102 for filename, package_list in self.query().yield_per(100):
103 yield self.formatline(filename, package_list)
104 # end transaction to return connection to pool
105 self.session.rollback()
109 Returns a list of lines for the Contents-$arch.gz file.
111 return [item for item in self.fetch()]
115 Returns a writer object.
118 'suite': self.suite.suite_name,
119 'component': self.component.component_name,
120 'debtype': self.overridetype.overridetype,
121 'architecture': self.architecture.arch_string,
123 return BinaryContentsFileWriter(**values)
125 def get_header(self):
127 Returns the header for the Contents files as a string.
131 filename = os.path.join(Config()['Dir::Templates'], 'contents')
132 header_file = open(filename)
133 return header_file.read()
138 def write_file(self):
140 Write the output file.
142 writer = self.writer()
144 file.write(self.get_header())
145 for item in self.fetch():
150 class SourceContentsWriter(object):
152 SourceContentsWriter writes the Contents-source.gz files.
154 def __init__(self, suite, component):
156 self.component = component
157 self.session = suite.session()
161 Returns a query object that is doing most of the work.
164 'suite_id': self.suite.suite_id,
165 'component_id': self.component.component_id,
171 (select distinct on (source) s.id, s.source from source s
172 join files f on f.id = s.file
173 join location l on l.id = f.location
174 where s.id in (select source from src_associations where suite = :suite_id)
175 and l.component = :component_id
176 order by source, version desc)
178 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
179 from newest_sources s, src_contents sc
180 where s.id = sc.source_id group by sc.file'''
182 return self.session.query("file", "pkglist").from_statement(sql). \
185 def formatline(self, filename, package_list):
187 Returns a formatted string for the filename argument.
189 return "%s\t%s\n" % (filename, package_list)
193 Yields a new line of the Contents-source.gz file in filename order.
195 for filename, package_list in self.query().yield_per(100):
196 yield self.formatline(filename, package_list)
197 # end transaction to return connection to pool
198 self.session.rollback()
202 Returns a list of lines for the Contents-source.gz file.
204 return [item for item in self.fetch()]
208 Returns a writer object.
211 'suite': self.suite.suite_name,
212 'component': self.component.component_name
214 return SourceContentsFileWriter(**values)
216 def write_file(self):
218 Write the output file.
220 writer = self.writer()
222 for item in self.fetch():
227 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
229 This function is called in a new subprocess and multiprocessing wants a top
232 session = DBConn().session(work_mem = 1000)
233 suite = Suite.get(suite_id, session)
234 architecture = Architecture.get(arch_id, session)
235 overridetype = OverrideType.get(overridetype_id, session)
236 component = Component.get(component_id, session)
237 log_message = [suite.suite_name, architecture.arch_string, \
238 overridetype.overridetype, component.component_name]
239 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
240 contents_writer.write_file()
243 def source_helper(suite_id, component_id):
245 This function is called in a new subprocess and multiprocessing wants a top
248 session = DBConn().session(work_mem = 1000)
249 suite = Suite.get(suite_id, session)
250 component = Component.get(component_id, session)
251 log_message = [suite.suite_name, 'source', component.component_name]
252 contents_writer = SourceContentsWriter(suite, component)
253 contents_writer.write_file()
256 class ContentsWriter(object):
258 Loop over all suites, architectures, overridetypes, and components to write
262 def log_result(class_, result):
264 Writes a result message to the logfile.
266 class_.logger.log(result)
269 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
271 Writes all Contents files for suites in list suite_names which defaults
272 to all 'touchable' suites if not specified explicitely. Untouchable
273 suites will be included if the force argument is set to True.
275 class_.logger = logger
276 session = DBConn().session()
277 suite_query = session.query(Suite)
278 if len(suite_names) > 0:
279 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
280 component_query = session.query(Component)
281 if len(component_names) > 0:
282 component_query = component_query.filter(Component.component_name.in_(component_names))
284 suite_query = suite_query.filter_by(untouchable = False)
285 deb_id = get_override_type('deb', session).overridetype_id
286 udeb_id = get_override_type('udeb', session).overridetype_id
288 for suite in suite_query:
289 suite_id = suite.suite_id
290 for component in component_query:
291 component_id = component.component_id
292 # handle source packages
293 pool.apply_async(source_helper, (suite_id, component_id),
294 callback = class_.log_result)
295 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
296 arch_id = architecture.arch_id
297 # handle 'deb' packages
298 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
299 callback = class_.log_result)
300 # handle 'udeb' packages
301 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
302 callback = class_.log_result)
308 class BinaryContentsScanner(object):
310 BinaryContentsScanner provides a threadsafe method scan() to scan the
311 contents of a DBBinary object.
313 def __init__(self, binary_id):
315 The argument binary_id is the id of the DBBinary object that
318 self.binary_id = binary_id
320 def scan(self, dummy_arg = None):
322 This method does the actual scan and fills in the associated BinContents
323 property. It commits any changes to the database. The argument dummy_arg
324 is ignored but needed by our threadpool implementation.
326 session = DBConn().session()
327 binary = session.query(DBBinary).get(self.binary_id)
328 fileset = set(binary.scan_contents())
329 if len(fileset) == 0:
330 fileset.add('EMPTY_PACKAGE')
331 for filename in fileset:
332 binary.contents.append(BinContents(file = filename))
337 def scan_all(class_, limit = None):
339 The class method scan_all() scans all binaries using multiple threads.
340 The number of binaries to be scanned can be limited with the limit
341 argument. Returns the number of processed and remaining packages as a
344 session = DBConn().session()
345 query = session.query(DBBinary).filter(DBBinary.contents == None)
346 remaining = query.count
347 if limit is not None:
348 query = query.limit(limit)
349 processed = query.count()
351 for binary in query.yield_per(100):
352 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
355 remaining = remaining()
357 return { 'processed': processed, 'remaining': remaining }
359 def binary_scan_helper(binary_id):
361 This function runs in a subprocess.
363 scanner = BinaryContentsScanner(binary_id)
367 def subprocess_setup():
368 # Python installs a SIGPIPE handler by default. This is usually not what
369 # non-Python subprocesses expect.
370 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
372 class UnpackedSource(object):
374 UnpackedSource extracts a source package into a temporary location and
375 gives you some convinient function for accessing it.
377 def __init__(self, dscfilename):
379 The dscfilename is a name of a DSC file that will be extracted.
381 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
382 self.root_directory = os.path.join(temp_directory, 'root')
383 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
384 dscfilename, self.root_directory)
385 check_call(command, preexec_fn = subprocess_setup)
387 def get_root_directory(self):
389 Returns the name of the package's root directory which is the directory
390 where the debian subdirectory is located.
392 return self.root_directory
394 def get_changelog_file(self):
396 Returns a file object for debian/changelog or None if no such file exists.
398 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
400 return open(changelog_name)
404 def get_all_filenames(self):
406 Returns an iterator over all filenames. The filenames will be relative
407 to the root directory.
409 skip = len(self.root_directory) + 1
410 for root, _, files in os.walk(self.root_directory):
412 yield os.path.join(root[skip:], name)
416 Removes all temporary files.
418 if self.root_directory is None:
420 parent_directory = os.path.dirname(self.root_directory)
421 rmtree(parent_directory)
422 self.root_directory = None
431 class SourceContentsScanner(object):
433 SourceContentsScanner provides a method scan() to scan the contents of a
436 def __init__(self, source_id):
438 The argument source_id is the id of the DBSource object that
441 self.source_id = source_id
445 This method does the actual scan and fills in the associated SrcContents
446 property. It commits any changes to the database.
448 session = DBConn().session()
449 source = session.query(DBSource).get(self.source_id)
450 fileset = set(source.scan_contents())
451 for filename in fileset:
452 source.contents.append(SrcContents(file = filename))
457 def scan_all(class_, limit = None):
459 The class method scan_all() scans all source using multiple processes.
460 The number of sources to be scanned can be limited with the limit
461 argument. Returns the number of processed and remaining packages as a
464 session = DBConn().session()
465 query = session.query(DBSource).filter(DBSource.contents == None)
466 remaining = query.count
467 if limit is not None:
468 query = query.limit(limit)
469 processed = query.count()
471 for source in query.yield_per(100):
472 pool.apply_async(source_scan_helper, (source.source_id, ))
475 remaining = remaining()
477 return { 'processed': processed, 'remaining': remaining }
479 def source_scan_helper(source_id):
481 This function runs in a subprocess.
484 scanner = SourceContentsScanner(source_id)
486 except Exception as e: