3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from tempfile import mkdtemp
36 import daklib.daksubprocess
39 class BinaryContentsWriter(object):
41 BinaryContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component):
45 self.architecture = architecture
46 self.overridetype = overridetype
47 self.component = component
48 self.session = suite.session()
52 Returns a query object that is doing most of the work.
54 overridesuite = self.suite
55 if self.suite.overridesuite is not None:
56 overridesuite = get_suite(self.suite.overridesuite, self.session)
58 'suite': self.suite.suite_id,
59 'overridesuite': overridesuite.suite_id,
60 'component': self.component.component_id,
61 'arch_all': get_architecture('all', self.session).arch_id,
62 'arch': self.architecture.arch_id,
63 'type_id': self.overridetype.overridetype_id,
64 'type': self.overridetype.overridetype,
68 create temp table newest_binaries (
69 id integer primary key,
72 create index newest_binaries_by_package on newest_binaries (package);
74 insert into newest_binaries (id, package)
75 select distinct on (package) id, package from binaries
76 where type = :type and
77 (architecture = :arch_all or architecture = :arch) and
78 id in (select bin from bin_associations where suite = :suite)
79 order by package, version desc;'''
80 self.session.execute(sql_create_temp, params=params)
86 (select o.package, s.section
87 from override o, section s
88 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
89 o.component = :component)
91 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
92 from newest_binaries b, bin_contents bc, unique_override o
93 where b.id = bc.binary_id and o.package = b.package
96 return self.session.query("file", "pkglist").from_statement(sql). \
99 def formatline(self, filename, package_list):
101 Returns a formatted string for the filename argument.
103 return "%-55s %s\n" % (filename, package_list)
107 Yields a new line of the Contents-$arch.gz file in filename order.
109 for filename, package_list in self.query().yield_per(100):
110 yield self.formatline(filename, package_list)
111 # end transaction to return connection to pool
112 self.session.rollback()
116 Returns a list of lines for the Contents-$arch.gz file.
118 return [item for item in self.fetch()]
122 Returns a writer object.
125 'archive': self.suite.archive.path,
126 'suite': self.suite.suite_name,
127 'component': self.component.component_name,
128 'debtype': self.overridetype.overridetype,
129 'architecture': self.architecture.arch_string,
131 return BinaryContentsFileWriter(**values)
133 def get_header(self):
135 Returns the header for the Contents files as a string.
137 filename = os.path.join(Config()['Dir::Templates'], 'contents')
138 with open(filename) as header_file:
139 return header_file.read()
141 def write_file(self):
143 Write the output file.
145 writer = self.writer()
147 file.write(self.get_header())
148 for item in self.fetch():
153 class SourceContentsWriter(object):
155 SourceContentsWriter writes the Contents-source.gz files.
157 def __init__(self, suite, component):
159 self.component = component
160 self.session = suite.session()
164 Returns a query object that is doing most of the work.
167 'suite_id': self.suite.suite_id,
168 'component_id': self.component.component_id,
171 sql_create_temp = '''
172 create temp table newest_sources (
173 id integer primary key,
176 create index sources_binaries_by_source on newest_sources (source);
178 insert into newest_sources (id, source)
179 select distinct on (source) s.id, s.source from source s
180 join files_archive_map af on s.file = af.file_id
181 where s.id in (select source from src_associations where suite = :suite_id)
182 and af.component_id = :component_id
183 order by source, version desc;'''
184 self.session.execute(sql_create_temp, params=params)
187 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
188 from newest_sources s, src_contents sc
189 where s.id = sc.source_id group by sc.file'''
191 return self.session.query("file", "pkglist").from_statement(sql). \
194 def formatline(self, filename, package_list):
196 Returns a formatted string for the filename argument.
198 return "%s\t%s\n" % (filename, package_list)
202 Yields a new line of the Contents-source.gz file in filename order.
204 for filename, package_list in self.query().yield_per(100):
205 yield self.formatline(filename, package_list)
206 # end transaction to return connection to pool
207 self.session.rollback()
211 Returns a list of lines for the Contents-source.gz file.
213 return [item for item in self.fetch()]
217 Returns a writer object.
220 'archive': self.suite.archive.path,
221 'suite': self.suite.suite_name,
222 'component': self.component.component_name
224 return SourceContentsFileWriter(**values)
226 def write_file(self):
228 Write the output file.
230 writer = self.writer()
232 for item in self.fetch():
237 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
239 This function is called in a new subprocess and multiprocessing wants a top
242 session = DBConn().session(work_mem = 1000)
243 suite = Suite.get(suite_id, session)
244 architecture = Architecture.get(arch_id, session)
245 overridetype = OverrideType.get(overridetype_id, session)
246 component = Component.get(component_id, session)
247 log_message = [suite.suite_name, architecture.arch_string, \
248 overridetype.overridetype, component.component_name]
249 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
250 contents_writer.write_file()
254 def source_helper(suite_id, component_id):
256 This function is called in a new subprocess and multiprocessing wants a top
259 session = DBConn().session(work_mem = 1000)
260 suite = Suite.get(suite_id, session)
261 component = Component.get(component_id, session)
262 log_message = [suite.suite_name, 'source', component.component_name]
263 contents_writer = SourceContentsWriter(suite, component)
264 contents_writer.write_file()
268 class ContentsWriter(object):
270 Loop over all suites, architectures, overridetypes, and components to write
274 def log_result(class_, result):
276 Writes a result message to the logfile.
278 class_.logger.log(result)
281 def write_all(class_, logger, archive_names = [], suite_names = [], component_names = [], force = False):
283 Writes all Contents files for suites in list suite_names which defaults
284 to all 'touchable' suites if not specified explicitely. Untouchable
285 suites will be included if the force argument is set to True.
287 class_.logger = logger
288 session = DBConn().session()
289 suite_query = session.query(Suite)
290 if len(archive_names) > 0:
291 suite_query = suite_query.join(Suite.archive).filter(Archive.archive_name.in_(archive_names))
292 if len(suite_names) > 0:
293 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
294 component_query = session.query(Component)
295 if len(component_names) > 0:
296 component_query = component_query.filter(Component.component_name.in_(component_names))
298 suite_query = suite_query.filter(Suite.untouchable == False)
299 deb_id = get_override_type('deb', session).overridetype_id
300 udeb_id = get_override_type('udeb', session).overridetype_id
302 for suite in suite_query:
303 suite_id = suite.suite_id
304 for component in component_query:
305 component_id = component.component_id
306 # handle source packages
307 pool.apply_async(source_helper, (suite_id, component_id),
308 callback = class_.log_result)
309 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
310 arch_id = architecture.arch_id
311 # handle 'deb' packages
312 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
313 callback = class_.log_result)
314 # handle 'udeb' packages
315 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
316 callback = class_.log_result)
322 class BinaryContentsScanner(object):
324 BinaryContentsScanner provides a threadsafe method scan() to scan the
325 contents of a DBBinary object.
327 def __init__(self, binary_id):
329 The argument binary_id is the id of the DBBinary object that
332 self.binary_id = binary_id
334 def scan(self, dummy_arg = None):
336 This method does the actual scan and fills in the associated BinContents
337 property. It commits any changes to the database. The argument dummy_arg
338 is ignored but needed by our threadpool implementation.
340 session = DBConn().session()
341 binary = session.query(DBBinary).get(self.binary_id)
342 fileset = set(binary.scan_contents())
343 if len(fileset) == 0:
344 fileset.add('EMPTY_PACKAGE')
345 for filename in fileset:
346 binary.contents.append(BinContents(file = filename))
351 def scan_all(class_, limit = None):
353 The class method scan_all() scans all binaries using multiple threads.
354 The number of binaries to be scanned can be limited with the limit
355 argument. Returns the number of processed and remaining packages as a
358 session = DBConn().session()
359 query = session.query(DBBinary).filter(DBBinary.contents == None)
360 remaining = query.count
361 if limit is not None:
362 query = query.limit(limit)
363 processed = query.count()
365 for binary in query.yield_per(100):
366 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
369 remaining = remaining()
371 return { 'processed': processed, 'remaining': remaining }
373 def binary_scan_helper(binary_id):
375 This function runs in a subprocess.
377 scanner = BinaryContentsScanner(binary_id)
380 class UnpackedSource(object):
382 UnpackedSource extracts a source package into a temporary location and
383 gives you some convinient function for accessing it.
385 def __init__(self, dscfilename, tmpbasedir=None):
387 The dscfilename is a name of a DSC file that will be extracted.
389 basedir = tmpbasedir if tmpbasedir else Config()['Dir::TempPath']
390 temp_directory = mkdtemp(dir = basedir)
391 self.root_directory = os.path.join(temp_directory, 'root')
392 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
393 dscfilename, self.root_directory)
394 daklib.daksubprocess.check_call(command)
396 def get_root_directory(self):
398 Returns the name of the package's root directory which is the directory
399 where the debian subdirectory is located.
401 return self.root_directory
403 def get_changelog_file(self):
405 Returns a file object for debian/changelog or None if no such file exists.
407 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
409 return open(changelog_name)
413 def get_all_filenames(self):
415 Returns an iterator over all filenames. The filenames will be relative
416 to the root directory.
418 skip = len(self.root_directory) + 1
419 for root, _, files in os.walk(self.root_directory):
421 yield os.path.join(root[skip:], name)
425 Removes all temporary files.
427 if self.root_directory is None:
429 parent_directory = os.path.dirname(self.root_directory)
430 rmtree(parent_directory)
431 self.root_directory = None
440 class SourceContentsScanner(object):
442 SourceContentsScanner provides a method scan() to scan the contents of a
445 def __init__(self, source_id):
447 The argument source_id is the id of the DBSource object that
450 self.source_id = source_id
454 This method does the actual scan and fills in the associated SrcContents
455 property. It commits any changes to the database.
457 session = DBConn().session()
458 source = session.query(DBSource).get(self.source_id)
459 fileset = set(source.scan_contents())
460 for filename in fileset:
461 source.contents.append(SrcContents(file = filename))
466 def scan_all(class_, limit = None):
468 The class method scan_all() scans all source using multiple processes.
469 The number of sources to be scanned can be limited with the limit
470 argument. Returns the number of processed and remaining packages as a
473 session = DBConn().session()
474 query = session.query(DBSource).filter(DBSource.contents == None)
475 remaining = query.count
476 if limit is not None:
477 query = query.limit(limit)
478 processed = query.count()
480 for source in query.yield_per(100):
481 pool.apply_async(source_scan_helper, (source.source_id, ))
484 remaining = remaining()
486 return { 'processed': processed, 'remaining': remaining }
488 def source_scan_helper(source_id):
490 This function runs in a subprocess.
493 scanner = SourceContentsScanner(source_id)
495 except Exception as e: