3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;'''
81 self.session.execute(sql_create_temp, params=params)
87 (select o.package, s.section
88 from override o, section s
89 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
90 o.component = :component)
92 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
93 from newest_binaries b, bin_contents bc, unique_override o
94 where b.id = bc.binary_id and o.package = b.package
97 return self.session.query("file", "pkglist").from_statement(sql). \
100 def formatline(self, filename, package_list):
102 Returns a formatted string for the filename argument.
104 return "%-55s %s\n" % (filename, package_list)
108 Yields a new line of the Contents-$arch.gz file in filename order.
110 for filename, package_list in self.query().yield_per(100):
111 yield self.formatline(filename, package_list)
112 # end transaction to return connection to pool
113 self.session.rollback()
117 Returns a list of lines for the Contents-$arch.gz file.
119 return [item for item in self.fetch()]
123 Returns a writer object.
126 'suite': self.suite.suite_name,
127 'component': self.component.component_name,
128 'debtype': self.overridetype.overridetype,
129 'architecture': self.architecture.arch_string,
131 return BinaryContentsFileWriter(**values)
133 def get_header(self):
135 Returns the header for the Contents files as a string.
139 filename = os.path.join(Config()['Dir::Templates'], 'contents')
140 header_file = open(filename)
141 return header_file.read()
146 def write_file(self):
148 Write the output file.
150 writer = self.writer()
152 file.write(self.get_header())
153 for item in self.fetch():
158 class SourceContentsWriter(object):
160 SourceContentsWriter writes the Contents-source.gz files.
162 def __init__(self, suite, component):
164 self.component = component
165 self.session = suite.session()
169 Returns a query object that is doing most of the work.
172 'suite_id': self.suite.suite_id,
173 'component_id': self.component.component_id,
176 sql_create_temp = '''
177 create temp table newest_sources (
178 id integer primary key,
181 create index sources_binaries_by_source on newest_sources (source);
183 insert into newest_sources (id, source)
184 select distinct on (source) s.id, s.source from source s
185 join files f on f.id = s.file
186 join location l on l.id = f.location
187 where s.id in (select source from src_associations where suite = :suite_id)
188 and l.component = :component_id
189 order by source, version desc;'''
190 self.session.execute(sql_create_temp, params=params)
193 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
194 from newest_sources s, src_contents sc
195 where s.id = sc.source_id group by sc.file'''
197 return self.session.query("file", "pkglist").from_statement(sql). \
200 def formatline(self, filename, package_list):
202 Returns a formatted string for the filename argument.
204 return "%s\t%s\n" % (filename, package_list)
208 Yields a new line of the Contents-source.gz file in filename order.
210 for filename, package_list in self.query().yield_per(100):
211 yield self.formatline(filename, package_list)
212 # end transaction to return connection to pool
213 self.session.rollback()
217 Returns a list of lines for the Contents-source.gz file.
219 return [item for item in self.fetch()]
223 Returns a writer object.
226 'suite': self.suite.suite_name,
227 'component': self.component.component_name
229 return SourceContentsFileWriter(**values)
231 def write_file(self):
233 Write the output file.
235 writer = self.writer()
237 for item in self.fetch():
242 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
244 This function is called in a new subprocess and multiprocessing wants a top
247 session = DBConn().session(work_mem = 1000)
248 suite = Suite.get(suite_id, session)
249 architecture = Architecture.get(arch_id, session)
250 overridetype = OverrideType.get(overridetype_id, session)
251 component = Component.get(component_id, session)
252 log_message = [suite.suite_name, architecture.arch_string, \
253 overridetype.overridetype, component.component_name]
254 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
255 contents_writer.write_file()
258 def source_helper(suite_id, component_id):
260 This function is called in a new subprocess and multiprocessing wants a top
263 session = DBConn().session(work_mem = 1000)
264 suite = Suite.get(suite_id, session)
265 component = Component.get(component_id, session)
266 log_message = [suite.suite_name, 'source', component.component_name]
267 contents_writer = SourceContentsWriter(suite, component)
268 contents_writer.write_file()
271 class ContentsWriter(object):
273 Loop over all suites, architectures, overridetypes, and components to write
277 def log_result(class_, result):
279 Writes a result message to the logfile.
281 class_.logger.log(result)
284 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
286 Writes all Contents files for suites in list suite_names which defaults
287 to all 'touchable' suites if not specified explicitely. Untouchable
288 suites will be included if the force argument is set to True.
290 class_.logger = logger
291 session = DBConn().session()
292 suite_query = session.query(Suite)
293 if len(suite_names) > 0:
294 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
295 component_query = session.query(Component)
296 if len(component_names) > 0:
297 component_query = component_query.filter(Component.component_name.in_(component_names))
299 suite_query = suite_query.filter_by(untouchable = False)
300 deb_id = get_override_type('deb', session).overridetype_id
301 udeb_id = get_override_type('udeb', session).overridetype_id
303 for suite in suite_query:
304 suite_id = suite.suite_id
305 for component in component_query:
306 component_id = component.component_id
307 # handle source packages
308 pool.apply_async(source_helper, (suite_id, component_id),
309 callback = class_.log_result)
310 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
311 arch_id = architecture.arch_id
312 # handle 'deb' packages
313 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
314 callback = class_.log_result)
315 # handle 'udeb' packages
316 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
317 callback = class_.log_result)
323 class BinaryContentsScanner(object):
325 BinaryContentsScanner provides a threadsafe method scan() to scan the
326 contents of a DBBinary object.
328 def __init__(self, binary_id):
330 The argument binary_id is the id of the DBBinary object that
333 self.binary_id = binary_id
335 def scan(self, dummy_arg = None):
337 This method does the actual scan and fills in the associated BinContents
338 property. It commits any changes to the database. The argument dummy_arg
339 is ignored but needed by our threadpool implementation.
341 session = DBConn().session()
342 binary = session.query(DBBinary).get(self.binary_id)
343 fileset = set(binary.scan_contents())
344 if len(fileset) == 0:
345 fileset.add('EMPTY_PACKAGE')
346 for filename in fileset:
347 binary.contents.append(BinContents(file = filename))
352 def scan_all(class_, limit = None):
354 The class method scan_all() scans all binaries using multiple threads.
355 The number of binaries to be scanned can be limited with the limit
356 argument. Returns the number of processed and remaining packages as a
359 session = DBConn().session()
360 query = session.query(DBBinary).filter(DBBinary.contents == None)
361 remaining = query.count
362 if limit is not None:
363 query = query.limit(limit)
364 processed = query.count()
366 for binary in query.yield_per(100):
367 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
370 remaining = remaining()
372 return { 'processed': processed, 'remaining': remaining }
374 def binary_scan_helper(binary_id):
376 This function runs in a subprocess.
378 scanner = BinaryContentsScanner(binary_id)
382 def subprocess_setup():
383 # Python installs a SIGPIPE handler by default. This is usually not what
384 # non-Python subprocesses expect.
385 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
387 class UnpackedSource(object):
389 UnpackedSource extracts a source package into a temporary location and
390 gives you some convinient function for accessing it.
392 def __init__(self, dscfilename):
394 The dscfilename is a name of a DSC file that will be extracted.
396 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
397 self.root_directory = os.path.join(temp_directory, 'root')
398 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
399 dscfilename, self.root_directory)
400 check_call(command, preexec_fn = subprocess_setup)
402 def get_root_directory(self):
404 Returns the name of the package's root directory which is the directory
405 where the debian subdirectory is located.
407 return self.root_directory
409 def get_changelog_file(self):
411 Returns a file object for debian/changelog or None if no such file exists.
413 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
415 return open(changelog_name)
419 def get_all_filenames(self):
421 Returns an iterator over all filenames. The filenames will be relative
422 to the root directory.
424 skip = len(self.root_directory) + 1
425 for root, _, files in os.walk(self.root_directory):
427 yield os.path.join(root[skip:], name)
431 Removes all temporary files.
433 if self.root_directory is None:
435 parent_directory = os.path.dirname(self.root_directory)
436 rmtree(parent_directory)
437 self.root_directory = None
446 class SourceContentsScanner(object):
448 SourceContentsScanner provides a method scan() to scan the contents of a
451 def __init__(self, source_id):
453 The argument source_id is the id of the DBSource object that
456 self.source_id = source_id
460 This method does the actual scan and fills in the associated SrcContents
461 property. It commits any changes to the database.
463 session = DBConn().session()
464 source = session.query(DBSource).get(self.source_id)
465 fileset = set(source.scan_contents())
466 for filename in fileset:
467 source.contents.append(SrcContents(file = filename))
472 def scan_all(class_, limit = None):
474 The class method scan_all() scans all source using multiple processes.
475 The number of sources to be scanned can be limited with the limit
476 argument. Returns the number of processed and remaining packages as a
479 session = DBConn().session()
480 query = session.query(DBSource).filter(DBSource.contents == None)
481 remaining = query.count
482 if limit is not None:
483 query = query.limit(limit)
484 processed = query.count()
486 for source in query.yield_per(100):
487 pool.apply_async(source_scan_helper, (source.source_id, ))
490 remaining = remaining()
492 return { 'processed': processed, 'remaining': remaining }
494 def source_scan_helper(source_id):
496 This function runs in a subprocess.
499 scanner = SourceContentsScanner(source_id)
501 except Exception as e: