3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;'''
81 self.session.execute(sql_create_temp, params=params)
87 (select o.package, s.section
88 from override o, section s
89 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
90 o.component = :component)
92 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
93 from newest_binaries b, bin_contents bc, unique_override o
94 where b.id = bc.binary_id and o.package = b.package
97 return self.session.query("file", "pkglist").from_statement(sql). \
100 def formatline(self, filename, package_list):
102 Returns a formatted string for the filename argument.
104 return "%-55s %s\n" % (filename, package_list)
108 Yields a new line of the Contents-$arch.gz file in filename order.
110 for filename, package_list in self.query().yield_per(100):
111 yield self.formatline(filename, package_list)
112 # end transaction to return connection to pool
113 self.session.rollback()
117 Returns a list of lines for the Contents-$arch.gz file.
119 return [item for item in self.fetch()]
123 Returns a writer object.
126 'archive': self.suite.archive.path,
127 'suite': self.suite.suite_name,
128 'component': self.component.component_name,
129 'debtype': self.overridetype.overridetype,
130 'architecture': self.architecture.arch_string,
132 return BinaryContentsFileWriter(**values)
134 def get_header(self):
136 Returns the header for the Contents files as a string.
140 filename = os.path.join(Config()['Dir::Templates'], 'contents')
141 header_file = open(filename)
142 return header_file.read()
147 def write_file(self):
149 Write the output file.
151 writer = self.writer()
153 file.write(self.get_header())
154 for item in self.fetch():
159 class SourceContentsWriter(object):
161 SourceContentsWriter writes the Contents-source.gz files.
163 def __init__(self, suite, component):
165 self.component = component
166 self.session = suite.session()
170 Returns a query object that is doing most of the work.
173 'suite_id': self.suite.suite_id,
174 'component_id': self.component.component_id,
177 sql_create_temp = '''
178 create temp table newest_sources (
179 id integer primary key,
182 create index sources_binaries_by_source on newest_sources (source);
184 insert into newest_sources (id, source)
185 select distinct on (source) s.id, s.source from source s
186 join files f on f.id = s.file
187 join location l on l.id = f.location
188 where s.id in (select source from src_associations where suite = :suite_id)
189 and l.component = :component_id
190 order by source, version desc;'''
191 self.session.execute(sql_create_temp, params=params)
194 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
195 from newest_sources s, src_contents sc
196 where s.id = sc.source_id group by sc.file'''
198 return self.session.query("file", "pkglist").from_statement(sql). \
201 def formatline(self, filename, package_list):
203 Returns a formatted string for the filename argument.
205 return "%s\t%s\n" % (filename, package_list)
209 Yields a new line of the Contents-source.gz file in filename order.
211 for filename, package_list in self.query().yield_per(100):
212 yield self.formatline(filename, package_list)
213 # end transaction to return connection to pool
214 self.session.rollback()
218 Returns a list of lines for the Contents-source.gz file.
220 return [item for item in self.fetch()]
224 Returns a writer object.
227 'archive': self.suite.archive.path,
228 'suite': self.suite.suite_name,
229 'component': self.component.component_name
231 return SourceContentsFileWriter(**values)
233 def write_file(self):
235 Write the output file.
237 writer = self.writer()
239 for item in self.fetch():
244 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
246 This function is called in a new subprocess and multiprocessing wants a top
249 session = DBConn().session(work_mem = 1000)
250 suite = Suite.get(suite_id, session)
251 architecture = Architecture.get(arch_id, session)
252 overridetype = OverrideType.get(overridetype_id, session)
253 component = Component.get(component_id, session)
254 log_message = [suite.suite_name, architecture.arch_string, \
255 overridetype.overridetype, component.component_name]
256 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
257 contents_writer.write_file()
261 def source_helper(suite_id, component_id):
263 This function is called in a new subprocess and multiprocessing wants a top
266 session = DBConn().session(work_mem = 1000)
267 suite = Suite.get(suite_id, session)
268 component = Component.get(component_id, session)
269 log_message = [suite.suite_name, 'source', component.component_name]
270 contents_writer = SourceContentsWriter(suite, component)
271 contents_writer.write_file()
275 class ContentsWriter(object):
277 Loop over all suites, architectures, overridetypes, and components to write
281 def log_result(class_, result):
283 Writes a result message to the logfile.
285 class_.logger.log(result)
288 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
290 Writes all Contents files for suites in list suite_names which defaults
291 to all 'touchable' suites if not specified explicitely. Untouchable
292 suites will be included if the force argument is set to True.
294 class_.logger = logger
295 session = DBConn().session()
296 suite_query = session.query(Suite)
297 if len(suite_names) > 0:
298 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
299 component_query = session.query(Component)
300 if len(component_names) > 0:
301 component_query = component_query.filter(Component.component_name.in_(component_names))
303 suite_query = suite_query.filter_by(untouchable = False)
304 deb_id = get_override_type('deb', session).overridetype_id
305 udeb_id = get_override_type('udeb', session).overridetype_id
307 for suite in suite_query:
308 suite_id = suite.suite_id
309 for component in component_query:
310 component_id = component.component_id
311 # handle source packages
312 pool.apply_async(source_helper, (suite_id, component_id),
313 callback = class_.log_result)
314 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
315 arch_id = architecture.arch_id
316 # handle 'deb' packages
317 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
318 callback = class_.log_result)
319 # handle 'udeb' packages
320 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
321 callback = class_.log_result)
327 class BinaryContentsScanner(object):
329 BinaryContentsScanner provides a threadsafe method scan() to scan the
330 contents of a DBBinary object.
332 def __init__(self, binary_id):
334 The argument binary_id is the id of the DBBinary object that
337 self.binary_id = binary_id
339 def scan(self, dummy_arg = None):
341 This method does the actual scan and fills in the associated BinContents
342 property. It commits any changes to the database. The argument dummy_arg
343 is ignored but needed by our threadpool implementation.
345 session = DBConn().session()
346 binary = session.query(DBBinary).get(self.binary_id)
347 fileset = set(binary.scan_contents())
348 if len(fileset) == 0:
349 fileset.add('EMPTY_PACKAGE')
350 for filename in fileset:
351 binary.contents.append(BinContents(file = filename))
356 def scan_all(class_, limit = None):
358 The class method scan_all() scans all binaries using multiple threads.
359 The number of binaries to be scanned can be limited with the limit
360 argument. Returns the number of processed and remaining packages as a
363 session = DBConn().session()
364 query = session.query(DBBinary).filter(DBBinary.contents == None)
365 remaining = query.count
366 if limit is not None:
367 query = query.limit(limit)
368 processed = query.count()
370 for binary in query.yield_per(100):
371 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
374 remaining = remaining()
376 return { 'processed': processed, 'remaining': remaining }
378 def binary_scan_helper(binary_id):
380 This function runs in a subprocess.
382 scanner = BinaryContentsScanner(binary_id)
386 def subprocess_setup():
387 # Python installs a SIGPIPE handler by default. This is usually not what
388 # non-Python subprocesses expect.
389 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
391 class UnpackedSource(object):
393 UnpackedSource extracts a source package into a temporary location and
394 gives you some convinient function for accessing it.
396 def __init__(self, dscfilename):
398 The dscfilename is a name of a DSC file that will be extracted.
400 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
401 self.root_directory = os.path.join(temp_directory, 'root')
402 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
403 dscfilename, self.root_directory)
404 check_call(command, preexec_fn = subprocess_setup)
406 def get_root_directory(self):
408 Returns the name of the package's root directory which is the directory
409 where the debian subdirectory is located.
411 return self.root_directory
413 def get_changelog_file(self):
415 Returns a file object for debian/changelog or None if no such file exists.
417 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
419 return open(changelog_name)
423 def get_all_filenames(self):
425 Returns an iterator over all filenames. The filenames will be relative
426 to the root directory.
428 skip = len(self.root_directory) + 1
429 for root, _, files in os.walk(self.root_directory):
431 yield os.path.join(root[skip:], name)
435 Removes all temporary files.
437 if self.root_directory is None:
439 parent_directory = os.path.dirname(self.root_directory)
440 rmtree(parent_directory)
441 self.root_directory = None
450 class SourceContentsScanner(object):
452 SourceContentsScanner provides a method scan() to scan the contents of a
455 def __init__(self, source_id):
457 The argument source_id is the id of the DBSource object that
460 self.source_id = source_id
464 This method does the actual scan and fills in the associated SrcContents
465 property. It commits any changes to the database.
467 session = DBConn().session()
468 source = session.query(DBSource).get(self.source_id)
469 fileset = set(source.scan_contents())
470 for filename in fileset:
471 source.contents.append(SrcContents(file = filename))
476 def scan_all(class_, limit = None):
478 The class method scan_all() scans all source using multiple processes.
479 The number of sources to be scanned can be limited with the limit
480 argument. Returns the number of processed and remaining packages as a
483 session = DBConn().session()
484 query = session.query(DBSource).filter(DBSource.contents == None)
485 remaining = query.count
486 if limit is not None:
487 query = query.limit(limit)
488 processed = query.count()
490 for source in query.yield_per(100):
491 pool.apply_async(source_scan_helper, (source.source_id, ))
494 remaining = remaining()
496 return { 'processed': processed, 'remaining': remaining }
498 def source_scan_helper(source_id):
500 This function runs in a subprocess.
503 scanner = SourceContentsScanner(source_id)
505 except Exception as e: