3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
31 from multiprocessing import Pool
32 from shutil import rmtree
33 from subprocess import Popen, PIPE, check_call
34 from tempfile import mkdtemp
39 class BinaryContentsWriter(object):
41 BinaryContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component = None):
45 self.architecture = architecture
46 self.overridetype = overridetype
47 self.component = component
48 self.session = suite.session()
52 Returns a query object that is doing most of the work.
54 overridesuite = self.suite
55 if self.suite.overridesuite is not None:
56 overridesuite = get_suite(self.suite.overridesuite, self.session)
58 'suite': self.suite.suite_id,
59 'overridesuite': overridesuite.suite_id,
60 'arch_all': get_architecture('all', self.session).arch_id,
61 'arch': self.architecture.arch_id,
62 'type_id': self.overridetype.overridetype_id,
63 'type': self.overridetype.overridetype,
66 if self.component is not None:
67 params['component'] = self.component.component_id
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;
85 (select o.package, s.section
86 from override o, section s
87 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
88 o.component = :component)
90 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
91 from newest_binaries b, bin_contents bc, unique_override o
92 where b.id = bc.binary_id and o.package = b.package
97 create temp table newest_binaries (
98 id integer primary key,
101 create index newest_binaries_by_package on newest_binaries (package);
103 insert into newest_binaries (id, package)
104 select distinct on (package) id, package from binaries
105 where type = :type and
106 (architecture = :arch_all or architecture = :arch) and
107 id in (select bin from bin_associations where suite = :suite)
108 order by package, version desc;
113 (select distinct on (o.package, s.section) o.package, s.section
114 from override o, section s
115 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
116 order by o.package, s.section, o.modified desc)
118 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
119 from newest_binaries b, bin_contents bc, unique_override o
120 where b.id = bc.binary_id and o.package = b.package
123 return self.session.query("file", "pkglist").from_statement(sql). \
126 def formatline(self, filename, package_list):
128 Returns a formatted string for the filename argument.
130 return "%-55s %s\n" % (filename, package_list)
134 Yields a new line of the Contents-$arch.gz file in filename order.
136 for filename, package_list in self.query().yield_per(100):
137 yield self.formatline(filename, package_list)
138 # end transaction to return connection to pool
139 self.session.rollback()
143 Returns a list of lines for the Contents-$arch.gz file.
145 return [item for item in self.fetch()]
147 def output_filename(self):
149 Returns the name of the output file.
152 'root': Config()['Dir::Root'],
153 'suite': self.suite.suite_name,
154 'architecture': self.architecture.arch_string
156 if self.component is None:
157 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
158 values['component'] = self.component.component_name
159 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
161 def get_header(self):
163 Returns the header for the Contents files as a string.
167 filename = os.path.join(Config()['Dir::Templates'], 'contents')
168 header_file = open(filename)
169 return header_file.read()
174 def write_file(self):
176 Write the output file.
178 command = ['gzip', '--rsyncable']
179 final_filename = self.output_filename()
180 temp_filename = final_filename + '.new'
181 output_file = open(temp_filename, 'w')
182 gzip = Popen(command, stdin = PIPE, stdout = output_file)
183 gzip.stdin.write(self.get_header())
184 for item in self.fetch():
185 gzip.stdin.write(item)
189 os.chmod(temp_filename, 0664)
190 os.rename(temp_filename, final_filename)
193 class SourceContentsWriter(object):
195 SourceContentsWriter writes the Contents-source.gz files.
197 def __init__(self, suite, component):
199 self.component = component
200 self.session = suite.session()
204 Returns a query object that is doing most of the work.
207 'suite_id': self.suite.suite_id,
208 'component_id': self.component.component_id,
212 create temp table newest_sources (
213 id integer primary key,
216 create index sources_binaries_by_source on newest_sources (source);
218 insert into newest_sources (id, source)
219 select distinct on (source) s.id, s.source from source s
220 join files f on f.id = s.file
221 join location l on l.id = f.location
222 where s.id in (select source from src_associations where suite = :suite_id)
223 and l.component = :component_id
224 order by source, version desc;
226 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
227 from newest_sources s, src_contents sc
228 where s.id = sc.source_id group by sc.file'''
230 return self.session.query("file", "pkglist").from_statement(sql). \
233 def formatline(self, filename, package_list):
235 Returns a formatted string for the filename argument.
237 return "%s\t%s\n" % (filename, package_list)
241 Yields a new line of the Contents-source.gz file in filename order.
243 for filename, package_list in self.query().yield_per(100):
244 yield self.formatline(filename, package_list)
245 # end transaction to return connection to pool
246 self.session.rollback()
250 Returns a list of lines for the Contents-source.gz file.
252 return [item for item in self.fetch()]
254 def output_filename(self):
256 Returns the name of the output file.
259 'root': Config()['Dir::Root'],
260 'suite': self.suite.suite_name,
261 'component': self.component.component_name
263 return "%(root)s/dists/%(suite)s/%(component)s/Contents-source.gz" % values
265 def write_file(self):
267 Write the output file.
269 command = ['gzip', '--rsyncable']
270 final_filename = self.output_filename()
271 temp_filename = final_filename + '.new'
272 output_file = open(temp_filename, 'w')
273 gzip = Popen(command, stdin = PIPE, stdout = output_file)
274 for item in self.fetch():
275 gzip.stdin.write(item)
279 os.chmod(temp_filename, 0664)
280 os.rename(temp_filename, final_filename)
283 def binary_helper(suite_id, arch_id, overridetype_id, component_id = None):
285 This function is called in a new subprocess and multiprocessing wants a top
288 session = DBConn().session()
289 suite = Suite.get(suite_id, session)
290 architecture = Architecture.get(arch_id, session)
291 overridetype = OverrideType.get(overridetype_id, session)
292 log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype]
293 if component_id is None:
296 component = Component.get(component_id, session)
297 log_message.append(component.component_name)
298 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
299 contents_writer.write_file()
302 def source_helper(suite_id, component_id):
304 This function is called in a new subprocess and multiprocessing wants a top
307 session = DBConn().session()
308 suite = Suite.get(suite_id, session)
309 component = Component.get(component_id, session)
310 log_message = [suite.suite_name, 'source', component.component_name]
311 contents_writer = SourceContentsWriter(suite, component)
312 contents_writer.write_file()
315 class ContentsWriter(object):
317 Loop over all suites, architectures, overridetypes, and components to write
321 def log_result(class_, result):
323 Writes a result message to the logfile.
325 class_.logger.log(result)
328 def write_all(class_, logger, suite_names = [], force = False):
330 Writes all Contents files for suites in list suite_names which defaults
331 to all 'touchable' suites if not specified explicitely. Untouchable
332 suites will be included if the force argument is set to True.
334 class_.logger = logger
335 session = DBConn().session()
336 suite_query = session.query(Suite)
337 if len(suite_names) > 0:
338 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
340 suite_query = suite_query.filter_by(untouchable = False)
341 deb_id = get_override_type('deb', session).overridetype_id
342 udeb_id = get_override_type('udeb', session).overridetype_id
343 main_id = get_component('main', session).component_id
344 contrib_id = get_component('contrib', session).component_id
345 non_free_id = get_component('non-free', session).component_id
347 for suite in suite_query:
348 suite_id = suite.suite_id
349 # handle source packages
350 pool.apply_async(source_helper, (suite_id, main_id),
351 callback = class_.log_result)
352 pool.apply_async(source_helper, (suite_id, contrib_id),
353 callback = class_.log_result)
354 pool.apply_async(source_helper, (suite_id, non_free_id),
355 callback = class_.log_result)
356 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
357 arch_id = architecture.arch_id
358 # handle 'deb' packages
359 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id), \
360 callback = class_.log_result)
361 # handle 'udeb' packages for 'main' and 'non-free'
362 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, main_id), \
363 callback = class_.log_result)
364 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, non_free_id), \
365 callback = class_.log_result)
371 class BinaryContentsScanner(object):
373 BinaryContentsScanner provides a threadsafe method scan() to scan the
374 contents of a DBBinary object.
376 def __init__(self, binary_id):
378 The argument binary_id is the id of the DBBinary object that
381 self.binary_id = binary_id
383 def scan(self, dummy_arg = None):
385 This method does the actual scan and fills in the associated BinContents
386 property. It commits any changes to the database. The argument dummy_arg
387 is ignored but needed by our threadpool implementation.
389 session = DBConn().session()
390 binary = session.query(DBBinary).get(self.binary_id)
391 fileset = set(binary.scan_contents())
392 if len(fileset) == 0:
393 fileset.add('EMPTY_PACKAGE')
394 for filename in fileset:
395 binary.contents.append(BinContents(file = filename))
400 def scan_all(class_, limit = None):
402 The class method scan_all() scans all binaries using multiple threads.
403 The number of binaries to be scanned can be limited with the limit
404 argument. Returns the number of processed and remaining packages as a
407 session = DBConn().session()
408 query = session.query(DBBinary).filter(DBBinary.contents == None)
409 remaining = query.count
410 if limit is not None:
411 query = query.limit(limit)
412 processed = query.count()
414 for binary in query.yield_per(100):
415 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
418 remaining = remaining()
420 return { 'processed': processed, 'remaining': remaining }
422 def binary_scan_helper(binary_id):
424 This function runs in a subprocess.
426 scanner = BinaryContentsScanner(binary_id)
430 def subprocess_setup():
431 # Python installs a SIGPIPE handler by default. This is usually not what
432 # non-Python subprocesses expect.
433 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
435 class UnpackedSource(object):
437 UnpackedSource extracts a source package into a temporary location and
438 gives you some convinient function for accessing it.
440 def __init__(self, dscfilename):
442 The dscfilename is a name of a DSC file that will be extracted.
444 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
445 self.root_directory = os.path.join(temp_directory, 'root')
446 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
447 dscfilename, self.root_directory)
448 check_call(command, preexec_fn = subprocess_setup)
450 def get_root_directory(self):
452 Returns the name of the package's root directory which is the directory
453 where the debian subdirectory is located.
455 return self.root_directory
457 def get_changelog_file(self):
459 Returns a file object for debian/changelog or None if no such file exists.
461 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
463 return open(changelog_name)
467 def get_all_filenames(self):
469 Returns an iterator over all filenames. The filenames will be relative
470 to the root directory.
472 skip = len(self.root_directory) + 1
473 for root, _, files in os.walk(self.root_directory):
475 yield os.path.join(root[skip:], name)
479 Removes all temporary files.
481 if self.root_directory is None:
483 parent_directory = os.path.dirname(self.root_directory)
484 rmtree(parent_directory)
485 self.root_directory = None
494 class SourceContentsScanner(object):
496 SourceContentsScanner provides a method scan() to scan the contents of a
499 def __init__(self, source_id):
501 The argument source_id is the id of the DBSource object that
504 self.source_id = source_id
508 This method does the actual scan and fills in the associated SrcContents
509 property. It commits any changes to the database.
511 session = DBConn().session()
512 source = session.query(DBSource).get(self.source_id)
513 fileset = set(source.scan_contents())
514 for filename in fileset:
515 source.contents.append(SrcContents(file = filename))
520 def scan_all(class_, limit = None):
522 The class method scan_all() scans all source using multiple processes.
523 The number of sources to be scanned can be limited with the limit
524 argument. Returns the number of processed and remaining packages as a
527 session = DBConn().session()
528 query = session.query(DBSource).filter(DBSource.contents == None)
529 remaining = query.count
530 if limit is not None:
531 query = query.limit(limit)
532 processed = query.count()
534 for source in query.yield_per(100):
535 pool.apply_async(source_scan_helper, (source.source_id, ))
538 remaining = remaining()
540 return { 'processed': processed, 'remaining': remaining }
542 def source_scan_helper(source_id):
544 This function runs in a subprocess.
547 scanner = SourceContentsScanner(source_id)