3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from tempfile import mkdtemp
36 import daklib.daksubprocess
39 class BinaryContentsWriter(object):
41 BinaryContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component):
45 self.architecture = architecture
46 self.overridetype = overridetype
47 self.component = component
48 self.session = suite.session()
52 Returns a query object that is doing most of the work.
54 overridesuite = self.suite
55 if self.suite.overridesuite is not None:
56 overridesuite = get_suite(self.suite.overridesuite, self.session)
58 'suite': self.suite.suite_id,
59 'overridesuite': overridesuite.suite_id,
60 'component': self.component.component_id,
61 'arch_all': get_architecture('all', self.session).arch_id,
62 'arch': self.architecture.arch_id,
63 'type_id': self.overridetype.overridetype_id,
64 'type': self.overridetype.overridetype,
68 create temp table newest_binaries (
69 id integer primary key,
72 create index newest_binaries_by_package on newest_binaries (package);
74 insert into newest_binaries (id, package)
75 select distinct on (package) id, package from binaries
76 where type = :type and
77 (architecture = :arch_all or architecture = :arch) and
78 id in (select bin from bin_associations where suite = :suite)
79 order by package, version desc;'''
80 self.session.execute(sql_create_temp, params=params)
86 (select o.package, s.section
87 from override o, section s
88 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
89 o.component = :component)
91 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
92 from newest_binaries b, bin_contents bc, unique_override o
93 where b.id = bc.binary_id and o.package = b.package
96 return self.session.query("file", "pkglist").from_statement(sql). \
99 def formatline(self, filename, package_list):
101 Returns a formatted string for the filename argument.
103 return "%-55s %s\n" % (filename, package_list)
107 Yields a new line of the Contents-$arch.gz file in filename order.
109 for filename, package_list in self.query().yield_per(100):
110 yield self.formatline(filename, package_list)
111 # end transaction to return connection to pool
112 self.session.rollback()
116 Returns a list of lines for the Contents-$arch.gz file.
118 return [item for item in self.fetch()]
122 Returns a writer object.
125 'archive': self.suite.archive.path,
126 'suite': self.suite.suite_name,
127 'component': self.component.component_name,
128 'debtype': self.overridetype.overridetype,
129 'architecture': self.architecture.arch_string,
131 return BinaryContentsFileWriter(**values)
133 def get_header(self):
135 Returns the header for the Contents files as a string.
139 filename = os.path.join(Config()['Dir::Templates'], 'contents')
140 header_file = open(filename)
141 return header_file.read()
146 def write_file(self):
148 Write the output file.
150 writer = self.writer()
152 file.write(self.get_header())
153 for item in self.fetch():
158 class SourceContentsWriter(object):
160 SourceContentsWriter writes the Contents-source.gz files.
162 def __init__(self, suite, component):
164 self.component = component
165 self.session = suite.session()
169 Returns a query object that is doing most of the work.
172 'suite_id': self.suite.suite_id,
173 'component_id': self.component.component_id,
176 sql_create_temp = '''
177 create temp table newest_sources (
178 id integer primary key,
181 create index sources_binaries_by_source on newest_sources (source);
183 insert into newest_sources (id, source)
184 select distinct on (source) s.id, s.source from source s
185 join files_archive_map af on s.file = af.file_id
186 where s.id in (select source from src_associations where suite = :suite_id)
187 and af.component_id = :component_id
188 order by source, version desc;'''
189 self.session.execute(sql_create_temp, params=params)
192 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
193 from newest_sources s, src_contents sc
194 where s.id = sc.source_id group by sc.file'''
196 return self.session.query("file", "pkglist").from_statement(sql). \
199 def formatline(self, filename, package_list):
201 Returns a formatted string for the filename argument.
203 return "%s\t%s\n" % (filename, package_list)
207 Yields a new line of the Contents-source.gz file in filename order.
209 for filename, package_list in self.query().yield_per(100):
210 yield self.formatline(filename, package_list)
211 # end transaction to return connection to pool
212 self.session.rollback()
216 Returns a list of lines for the Contents-source.gz file.
218 return [item for item in self.fetch()]
222 Returns a writer object.
225 'archive': self.suite.archive.path,
226 'suite': self.suite.suite_name,
227 'component': self.component.component_name
229 return SourceContentsFileWriter(**values)
231 def write_file(self):
233 Write the output file.
235 writer = self.writer()
237 for item in self.fetch():
242 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
244 This function is called in a new subprocess and multiprocessing wants a top
247 session = DBConn().session(work_mem = 1000)
248 suite = Suite.get(suite_id, session)
249 architecture = Architecture.get(arch_id, session)
250 overridetype = OverrideType.get(overridetype_id, session)
251 component = Component.get(component_id, session)
252 log_message = [suite.suite_name, architecture.arch_string, \
253 overridetype.overridetype, component.component_name]
254 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
255 contents_writer.write_file()
259 def source_helper(suite_id, component_id):
261 This function is called in a new subprocess and multiprocessing wants a top
264 session = DBConn().session(work_mem = 1000)
265 suite = Suite.get(suite_id, session)
266 component = Component.get(component_id, session)
267 log_message = [suite.suite_name, 'source', component.component_name]
268 contents_writer = SourceContentsWriter(suite, component)
269 contents_writer.write_file()
273 class ContentsWriter(object):
275 Loop over all suites, architectures, overridetypes, and components to write
279 def log_result(class_, result):
281 Writes a result message to the logfile.
283 class_.logger.log(result)
286 def write_all(class_, logger, archive_names = [], suite_names = [], component_names = [], force = False):
288 Writes all Contents files for suites in list suite_names which defaults
289 to all 'touchable' suites if not specified explicitely. Untouchable
290 suites will be included if the force argument is set to True.
292 class_.logger = logger
293 session = DBConn().session()
294 suite_query = session.query(Suite)
295 if len(archive_names) > 0:
296 suite_query = suite_query.join(Suite.archive).filter(Archive.archive_name.in_(archive_names))
297 if len(suite_names) > 0:
298 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
299 component_query = session.query(Component)
300 if len(component_names) > 0:
301 component_query = component_query.filter(Component.component_name.in_(component_names))
303 suite_query = suite_query.filter(Suite.untouchable == False)
304 deb_id = get_override_type('deb', session).overridetype_id
305 udeb_id = get_override_type('udeb', session).overridetype_id
307 for suite in suite_query:
308 suite_id = suite.suite_id
309 for component in component_query:
310 component_id = component.component_id
311 # handle source packages
312 pool.apply_async(source_helper, (suite_id, component_id),
313 callback = class_.log_result)
314 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
315 arch_id = architecture.arch_id
316 # handle 'deb' packages
317 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
318 callback = class_.log_result)
319 # handle 'udeb' packages
320 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
321 callback = class_.log_result)
327 class BinaryContentsScanner(object):
329 BinaryContentsScanner provides a threadsafe method scan() to scan the
330 contents of a DBBinary object.
332 def __init__(self, binary_id):
334 The argument binary_id is the id of the DBBinary object that
337 self.binary_id = binary_id
339 def scan(self, dummy_arg = None):
341 This method does the actual scan and fills in the associated BinContents
342 property. It commits any changes to the database. The argument dummy_arg
343 is ignored but needed by our threadpool implementation.
345 session = DBConn().session()
346 binary = session.query(DBBinary).get(self.binary_id)
347 fileset = set(binary.scan_contents())
348 if len(fileset) == 0:
349 fileset.add('EMPTY_PACKAGE')
350 for filename in fileset:
351 binary.contents.append(BinContents(file = filename))
356 def scan_all(class_, limit = None):
358 The class method scan_all() scans all binaries using multiple threads.
359 The number of binaries to be scanned can be limited with the limit
360 argument. Returns the number of processed and remaining packages as a
363 session = DBConn().session()
364 query = session.query(DBBinary).filter(DBBinary.contents == None)
365 remaining = query.count
366 if limit is not None:
367 query = query.limit(limit)
368 processed = query.count()
370 for binary in query.yield_per(100):
371 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
374 remaining = remaining()
376 return { 'processed': processed, 'remaining': remaining }
378 def binary_scan_helper(binary_id):
380 This function runs in a subprocess.
382 scanner = BinaryContentsScanner(binary_id)
385 class UnpackedSource(object):
387 UnpackedSource extracts a source package into a temporary location and
388 gives you some convinient function for accessing it.
390 def __init__(self, dscfilename, tmpbasedir=None):
392 The dscfilename is a name of a DSC file that will be extracted.
394 basedir = tmpbasedir if tmpbasedir else Config()['Dir::TempPath']
395 temp_directory = mkdtemp(dir = basedir)
396 self.root_directory = os.path.join(temp_directory, 'root')
397 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
398 dscfilename, self.root_directory)
399 daklib.daksubprocess.check_call(command)
401 def get_root_directory(self):
403 Returns the name of the package's root directory which is the directory
404 where the debian subdirectory is located.
406 return self.root_directory
408 def get_changelog_file(self):
410 Returns a file object for debian/changelog or None if no such file exists.
412 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
414 return open(changelog_name)
418 def get_all_filenames(self):
420 Returns an iterator over all filenames. The filenames will be relative
421 to the root directory.
423 skip = len(self.root_directory) + 1
424 for root, _, files in os.walk(self.root_directory):
426 yield os.path.join(root[skip:], name)
430 Removes all temporary files.
432 if self.root_directory is None:
434 parent_directory = os.path.dirname(self.root_directory)
435 rmtree(parent_directory)
436 self.root_directory = None
445 class SourceContentsScanner(object):
447 SourceContentsScanner provides a method scan() to scan the contents of a
450 def __init__(self, source_id):
452 The argument source_id is the id of the DBSource object that
455 self.source_id = source_id
459 This method does the actual scan and fills in the associated SrcContents
460 property. It commits any changes to the database.
462 session = DBConn().session()
463 source = session.query(DBSource).get(self.source_id)
464 fileset = set(source.scan_contents())
465 for filename in fileset:
466 source.contents.append(SrcContents(file = filename))
471 def scan_all(class_, limit = None):
473 The class method scan_all() scans all source using multiple processes.
474 The number of sources to be scanned can be limited with the limit
475 argument. Returns the number of processed and remaining packages as a
478 session = DBConn().session()
479 query = session.query(DBSource).filter(DBSource.contents == None)
480 remaining = query.count
481 if limit is not None:
482 query = query.limit(limit)
483 processed = query.count()
485 for source in query.yield_per(100):
486 pool.apply_async(source_scan_helper, (source.source_id, ))
489 remaining = remaining()
491 return { 'processed': processed, 'remaining': remaining }
493 def source_scan_helper(source_id):
495 This function runs in a subprocess.
498 scanner = SourceContentsScanner(source_id)
500 except Exception as e: