3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;'''
81 self.session.execute(sql_create_temp, params=params)
87 (select o.package, s.section
88 from override o, section s
89 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
90 o.component = :component)
92 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
93 from newest_binaries b, bin_contents bc, unique_override o
94 where b.id = bc.binary_id and o.package = b.package
97 return self.session.query("file", "pkglist").from_statement(sql). \
100 def formatline(self, filename, package_list):
102 Returns a formatted string for the filename argument.
104 return "%-55s %s\n" % (filename, package_list)
108 Yields a new line of the Contents-$arch.gz file in filename order.
110 for filename, package_list in self.query().yield_per(100):
111 yield self.formatline(filename, package_list)
112 # end transaction to return connection to pool
113 self.session.rollback()
117 Returns a list of lines for the Contents-$arch.gz file.
119 return [item for item in self.fetch()]
123 Returns a writer object.
126 'archive': self.suite.archive.path,
127 'suite': self.suite.suite_name,
128 'component': self.component.component_name,
129 'debtype': self.overridetype.overridetype,
130 'architecture': self.architecture.arch_string,
132 return BinaryContentsFileWriter(**values)
134 def get_header(self):
136 Returns the header for the Contents files as a string.
140 filename = os.path.join(Config()['Dir::Templates'], 'contents')
141 header_file = open(filename)
142 return header_file.read()
147 def write_file(self):
149 Write the output file.
151 writer = self.writer()
153 file.write(self.get_header())
154 for item in self.fetch():
159 class SourceContentsWriter(object):
161 SourceContentsWriter writes the Contents-source.gz files.
163 def __init__(self, suite, component):
165 self.component = component
166 self.session = suite.session()
170 Returns a query object that is doing most of the work.
173 'suite_id': self.suite.suite_id,
174 'component_id': self.component.component_id,
177 sql_create_temp = '''
178 create temp table newest_sources (
179 id integer primary key,
182 create index sources_binaries_by_source on newest_sources (source);
184 insert into newest_sources (id, source)
185 select distinct on (source) s.id, s.source from source s
186 join files_archive_map af on s.file = af.file_id
187 where s.id in (select source from src_associations where suite = :suite_id)
188 and af.component_id = :component_id
189 order by source, version desc;'''
190 self.session.execute(sql_create_temp, params=params)
193 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
194 from newest_sources s, src_contents sc
195 where s.id = sc.source_id group by sc.file'''
197 return self.session.query("file", "pkglist").from_statement(sql). \
200 def formatline(self, filename, package_list):
202 Returns a formatted string for the filename argument.
204 return "%s\t%s\n" % (filename, package_list)
208 Yields a new line of the Contents-source.gz file in filename order.
210 for filename, package_list in self.query().yield_per(100):
211 yield self.formatline(filename, package_list)
212 # end transaction to return connection to pool
213 self.session.rollback()
217 Returns a list of lines for the Contents-source.gz file.
219 return [item for item in self.fetch()]
223 Returns a writer object.
226 'archive': self.suite.archive.path,
227 'suite': self.suite.suite_name,
228 'component': self.component.component_name
230 return SourceContentsFileWriter(**values)
232 def write_file(self):
234 Write the output file.
236 writer = self.writer()
238 for item in self.fetch():
243 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
245 This function is called in a new subprocess and multiprocessing wants a top
248 session = DBConn().session(work_mem = 1000)
249 suite = Suite.get(suite_id, session)
250 architecture = Architecture.get(arch_id, session)
251 overridetype = OverrideType.get(overridetype_id, session)
252 component = Component.get(component_id, session)
253 log_message = [suite.suite_name, architecture.arch_string, \
254 overridetype.overridetype, component.component_name]
255 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
256 contents_writer.write_file()
260 def source_helper(suite_id, component_id):
262 This function is called in a new subprocess and multiprocessing wants a top
265 session = DBConn().session(work_mem = 1000)
266 suite = Suite.get(suite_id, session)
267 component = Component.get(component_id, session)
268 log_message = [suite.suite_name, 'source', component.component_name]
269 contents_writer = SourceContentsWriter(suite, component)
270 contents_writer.write_file()
274 class ContentsWriter(object):
276 Loop over all suites, architectures, overridetypes, and components to write
280 def log_result(class_, result):
282 Writes a result message to the logfile.
284 class_.logger.log(result)
287 def write_all(class_, logger, archive_names = [], suite_names = [], component_names = [], force = False):
289 Writes all Contents files for suites in list suite_names which defaults
290 to all 'touchable' suites if not specified explicitely. Untouchable
291 suites will be included if the force argument is set to True.
293 class_.logger = logger
294 session = DBConn().session()
295 suite_query = session.query(Suite)
296 if len(archive_names) > 0:
297 suite_query = suite_query.join(Suite.archive).filter(Archive.archive_name.in_(archive_names))
298 if len(suite_names) > 0:
299 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
300 component_query = session.query(Component)
301 if len(component_names) > 0:
302 component_query = component_query.filter(Component.component_name.in_(component_names))
304 suite_query = suite_query.filter(Suite.untouchable == False)
305 deb_id = get_override_type('deb', session).overridetype_id
306 udeb_id = get_override_type('udeb', session).overridetype_id
308 for suite in suite_query:
309 suite_id = suite.suite_id
310 for component in component_query:
311 component_id = component.component_id
312 # handle source packages
313 pool.apply_async(source_helper, (suite_id, component_id),
314 callback = class_.log_result)
315 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
316 arch_id = architecture.arch_id
317 # handle 'deb' packages
318 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
319 callback = class_.log_result)
320 # handle 'udeb' packages
321 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
322 callback = class_.log_result)
328 class BinaryContentsScanner(object):
330 BinaryContentsScanner provides a threadsafe method scan() to scan the
331 contents of a DBBinary object.
333 def __init__(self, binary_id):
335 The argument binary_id is the id of the DBBinary object that
338 self.binary_id = binary_id
340 def scan(self, dummy_arg = None):
342 This method does the actual scan and fills in the associated BinContents
343 property. It commits any changes to the database. The argument dummy_arg
344 is ignored but needed by our threadpool implementation.
346 session = DBConn().session()
347 binary = session.query(DBBinary).get(self.binary_id)
348 fileset = set(binary.scan_contents())
349 if len(fileset) == 0:
350 fileset.add('EMPTY_PACKAGE')
351 for filename in fileset:
352 binary.contents.append(BinContents(file = filename))
357 def scan_all(class_, limit = None):
359 The class method scan_all() scans all binaries using multiple threads.
360 The number of binaries to be scanned can be limited with the limit
361 argument. Returns the number of processed and remaining packages as a
364 session = DBConn().session()
365 query = session.query(DBBinary).filter(DBBinary.contents == None)
366 remaining = query.count
367 if limit is not None:
368 query = query.limit(limit)
369 processed = query.count()
371 for binary in query.yield_per(100):
372 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
375 remaining = remaining()
377 return { 'processed': processed, 'remaining': remaining }
379 def binary_scan_helper(binary_id):
381 This function runs in a subprocess.
383 scanner = BinaryContentsScanner(binary_id)
387 def subprocess_setup():
388 # Python installs a SIGPIPE handler by default. This is usually not what
389 # non-Python subprocesses expect.
390 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
392 class UnpackedSource(object):
394 UnpackedSource extracts a source package into a temporary location and
395 gives you some convinient function for accessing it.
397 def __init__(self, dscfilename, tmpbasedir=None):
399 The dscfilename is a name of a DSC file that will be extracted.
401 basedir = tmpbasedir if tmpbasedir else Config()['Dir::TempPath']
402 temp_directory = mkdtemp(dir = basedir)
403 self.root_directory = os.path.join(temp_directory, 'root')
404 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
405 dscfilename, self.root_directory)
406 check_call(command, preexec_fn = subprocess_setup)
408 def get_root_directory(self):
410 Returns the name of the package's root directory which is the directory
411 where the debian subdirectory is located.
413 return self.root_directory
415 def get_changelog_file(self):
417 Returns a file object for debian/changelog or None if no such file exists.
419 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
421 return open(changelog_name)
425 def get_all_filenames(self):
427 Returns an iterator over all filenames. The filenames will be relative
428 to the root directory.
430 skip = len(self.root_directory) + 1
431 for root, _, files in os.walk(self.root_directory):
433 yield os.path.join(root[skip:], name)
437 Removes all temporary files.
439 if self.root_directory is None:
441 parent_directory = os.path.dirname(self.root_directory)
442 rmtree(parent_directory)
443 self.root_directory = None
452 class SourceContentsScanner(object):
454 SourceContentsScanner provides a method scan() to scan the contents of a
457 def __init__(self, source_id):
459 The argument source_id is the id of the DBSource object that
462 self.source_id = source_id
466 This method does the actual scan and fills in the associated SrcContents
467 property. It commits any changes to the database.
469 session = DBConn().session()
470 source = session.query(DBSource).get(self.source_id)
471 fileset = set(source.scan_contents())
472 for filename in fileset:
473 source.contents.append(SrcContents(file = filename))
478 def scan_all(class_, limit = None):
480 The class method scan_all() scans all source using multiple processes.
481 The number of sources to be scanned can be limited with the limit
482 argument. Returns the number of processed and remaining packages as a
485 session = DBConn().session()
486 query = session.query(DBSource).filter(DBSource.contents == None)
487 remaining = query.count
488 if limit is not None:
489 query = query.limit(limit)
490 processed = query.count()
492 for source in query.yield_per(100):
493 pool.apply_async(source_scan_helper, (source.source_id, ))
496 remaining = remaining()
498 return { 'processed': processed, 'remaining': remaining }
500 def source_scan_helper(source_id):
502 This function runs in a subprocess.
505 scanner = SourceContentsScanner(source_id)
507 except Exception as e: