3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
31 from multiprocessing import Pool
32 from shutil import rmtree
33 from subprocess import Popen, PIPE, check_call
34 from tempfile import mkdtemp
38 class BinaryContentsWriter(object):
40 BinaryContentsWriter writes the Contents-$arch.gz files.
42 def __init__(self, suite, architecture, overridetype, component = None):
44 self.architecture = architecture
45 self.overridetype = overridetype
46 self.component = component
47 self.session = suite.session()
51 Returns a query object that is doing most of the work.
53 overridesuite = self.suite
54 if self.suite.overridesuite is not None:
55 overridesuite = get_suite(self.suite.overridesuite, self.session)
57 'suite': self.suite.suite_id,
58 'overridesuite': overridesuite.suite_id,
59 'arch_all': get_architecture('all', self.session).arch_id,
60 'arch': self.architecture.arch_id,
61 'type_id': self.overridetype.overridetype_id,
62 'type': self.overridetype.overridetype,
65 if self.component is not None:
66 params['component'] = self.component.component_id
68 create temp table newest_binaries (
69 id integer primary key,
72 create index newest_binaries_by_package on newest_binaries (package);
74 insert into newest_binaries (id, package)
75 select distinct on (package) id, package from binaries
76 where type = :type and
77 (architecture = :arch_all or architecture = :arch) and
78 id in (select bin from bin_associations where suite = :suite)
79 order by package, version desc;
84 (select o.package, s.section
85 from override o, section s
86 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
87 o.component = :component)
89 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
90 from newest_binaries b, bin_contents bc, unique_override o
91 where b.id = bc.binary_id and o.package = b.package
96 create temp table newest_binaries (
97 id integer primary key,
100 create index newest_binaries_by_package on newest_binaries (package);
102 insert into newest_binaries (id, package)
103 select distinct on (package) id, package from binaries
104 where type = :type and
105 (architecture = :arch_all or architecture = :arch) and
106 id in (select bin from bin_associations where suite = :suite)
107 order by package, version desc;
112 (select distinct on (o.package, s.section) o.package, s.section
113 from override o, section s
114 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
115 order by o.package, s.section, o.modified desc)
117 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
118 from newest_binaries b, bin_contents bc, unique_override o
119 where b.id = bc.binary_id and o.package = b.package
122 return self.session.query("file", "pkglist").from_statement(sql). \
125 def formatline(self, filename, package_list):
127 Returns a formatted string for the filename argument.
129 return "%-55s %s\n" % (filename, package_list)
133 Yields a new line of the Contents-$arch.gz file in filename order.
135 for filename, package_list in self.query().yield_per(100):
136 yield self.formatline(filename, package_list)
137 # end transaction to return connection to pool
138 self.session.rollback()
142 Returns a list of lines for the Contents-$arch.gz file.
144 return [item for item in self.fetch()]
146 def output_filename(self):
148 Returns the name of the output file.
151 'root': Config()['Dir::Root'],
152 'suite': self.suite.suite_name,
153 'architecture': self.architecture.arch_string
155 if self.component is None:
156 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
157 values['component'] = self.component.component_name
158 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
160 def get_header(self):
162 Returns the header for the Contents files as a string.
166 filename = os.path.join(Config()['Dir::Templates'], 'contents')
167 header_file = open(filename)
168 return header_file.read()
173 def write_file(self):
175 Write the output file.
177 command = ['gzip', '--rsyncable']
178 final_filename = self.output_filename()
179 temp_filename = final_filename + '.new'
180 output_file = open(temp_filename, 'w')
181 gzip = Popen(command, stdin = PIPE, stdout = output_file)
182 gzip.stdin.write(self.get_header())
183 for item in self.fetch():
184 gzip.stdin.write(item)
188 os.chmod(temp_filename, 0664)
189 os.rename(temp_filename, final_filename)
192 class SourceContentsWriter(object):
194 SourceContentsWriter writes the Contents-source.gz files.
196 def __init__(self, suite, component):
198 self.component = component
199 self.session = suite.session()
203 Returns a query object that is doing most of the work.
206 'suite_id': self.suite.suite_id,
207 'component_id': self.component.component_id,
211 create temp table newest_sources (
212 id integer primary key,
215 create index sources_binaries_by_source on newest_sources (source);
217 insert into newest_sources (id, source)
218 select distinct on (source) s.id, s.source from source s
219 join files f on f.id = s.file
220 join location l on l.id = f.location
221 where s.id in (select source from src_associations where suite = :suite_id)
222 and l.component = :component_id
223 order by source, version desc;
225 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
226 from newest_sources s, src_contents sc
227 where s.id = sc.source_id group by sc.file'''
229 return self.session.query("file", "pkglist").from_statement(sql). \
232 def formatline(self, filename, package_list):
234 Returns a formatted string for the filename argument.
236 return "%s\t%s\n" % (filename, package_list)
240 Yields a new line of the Contents-source.gz file in filename order.
242 for filename, package_list in self.query().yield_per(100):
243 yield self.formatline(filename, package_list)
244 # end transaction to return connection to pool
245 self.session.rollback()
249 Returns a list of lines for the Contents-source.gz file.
251 return [item for item in self.fetch()]
253 def output_filename(self):
255 Returns the name of the output file.
258 'root': Config()['Dir::Root'],
259 'suite': self.suite.suite_name,
260 'component': self.component.component_name
262 return "%(root)s/dists/%(suite)s/%(component)s/Contents-source.gz" % values
264 def write_file(self):
266 Write the output file.
268 command = ['gzip', '--rsyncable']
269 final_filename = self.output_filename()
270 temp_filename = final_filename + '.new'
271 output_file = open(temp_filename, 'w')
272 gzip = Popen(command, stdin = PIPE, stdout = output_file)
273 for item in self.fetch():
274 gzip.stdin.write(item)
278 os.chmod(temp_filename, 0664)
279 os.rename(temp_filename, final_filename)
282 def binary_helper(suite_id, arch_id, overridetype_id, component_id = None):
284 This function is called in a new subprocess and multiprocessing wants a top
287 session = DBConn().session()
288 suite = Suite.get(suite_id, session)
289 architecture = Architecture.get(arch_id, session)
290 overridetype = OverrideType.get(overridetype_id, session)
291 log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype]
292 if component_id is None:
295 component = Component.get(component_id, session)
296 log_message.append(component.component_name)
297 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
298 contents_writer.write_file()
301 def source_helper(suite_id, component_id):
303 This function is called in a new subprocess and multiprocessing wants a top
306 session = DBConn().session()
307 suite = Suite.get(suite_id, session)
308 component = Component.get(component_id, session)
309 log_message = [suite.suite_name, component.component_name]
310 contents_writer = SourceContentsWriter(suite, component)
311 contents_writer.write_file()
314 class ContentsWriter(object):
316 Loop over all suites, architectures, overridetypes, and components to write
320 def log_result(class_, result):
322 Writes a result message to the logfile.
324 class_.logger.log(result)
327 def write_all(class_, logger, suite_names = [], force = False):
329 Writes all Contents files for suites in list suite_names which defaults
330 to all 'touchable' suites if not specified explicitely. Untouchable
331 suites will be included if the force argument is set to True.
333 class_.logger = logger
334 session = DBConn().session()
335 suite_query = session.query(Suite)
336 if len(suite_names) > 0:
337 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
339 suite_query = suite_query.filter_by(untouchable = False)
340 deb_id = get_override_type('deb', session).overridetype_id
341 udeb_id = get_override_type('udeb', session).overridetype_id
342 main_id = get_component('main', session).component_id
343 contrib_id = get_component('contrib', session).component_id
344 non_free_id = get_component('non-free', session).component_id
346 for suite in suite_query:
347 suite_id = suite.suite_id
348 # handle source packages
349 pool.apply_async(source_helper, (suite_id, main_id),
350 callback = class_.log_result)
351 pool.apply_async(source_helper, (suite_id, contrib_id),
352 callback = class_.log_result)
353 pool.apply_async(source_helper, (suite_id, non_free_id),
354 callback = class_.log_result)
355 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
356 arch_id = architecture.arch_id
357 # handle 'deb' packages
358 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id), \
359 callback = class_.log_result)
360 # handle 'udeb' packages for 'main' and 'non-free'
361 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, main_id), \
362 callback = class_.log_result)
363 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, non_free_id), \
364 callback = class_.log_result)
370 class BinaryContentsScanner(object):
372 BinaryContentsScanner provides a threadsafe method scan() to scan the
373 contents of a DBBinary object.
375 def __init__(self, binary_id):
377 The argument binary_id is the id of the DBBinary object that
380 self.binary_id = binary_id
382 def scan(self, dummy_arg = None):
384 This method does the actual scan and fills in the associated BinContents
385 property. It commits any changes to the database. The argument dummy_arg
386 is ignored but needed by our threadpool implementation.
388 session = DBConn().session()
389 binary = session.query(DBBinary).get(self.binary_id)
390 fileset = set(binary.scan_contents())
391 if len(fileset) == 0:
392 fileset.add('EMPTY_PACKAGE')
393 for filename in fileset:
394 binary.contents.append(BinContents(file = filename))
399 def scan_all(class_, limit = None):
401 The class method scan_all() scans all binaries using multiple threads.
402 The number of binaries to be scanned can be limited with the limit
403 argument. Returns the number of processed and remaining packages as a
406 session = DBConn().session()
407 query = session.query(DBBinary).filter(DBBinary.contents == None)
408 remaining = query.count
409 if limit is not None:
410 query = query.limit(limit)
411 processed = query.count()
413 for binary in query.yield_per(100):
414 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
417 remaining = remaining()
419 return { 'processed': processed, 'remaining': remaining }
421 def binary_scan_helper(binary_id):
423 This function runs in a subprocess.
425 scanner = BinaryContentsScanner(binary_id)
429 class UnpackedSource(object):
431 UnpackedSource extracts a source package into a temporary location and
432 gives you some convinient function for accessing it.
434 def __init__(self, dscfilename):
436 The dscfilename is a name of a DSC file that will be extracted.
438 self.root_directory = os.path.join(mkdtemp(), 'root')
439 command = ('dpkg-source', '--no-copy', '--no-check', '-x', dscfilename,
441 # dpkg-source does not have a --quiet option
442 devnull = open(os.devnull, 'w')
443 check_call(command, stdout = devnull, stderr = devnull)
446 def get_root_directory(self):
448 Returns the name of the package's root directory which is the directory
449 where the debian subdirectory is located.
451 return self.root_directory
453 def get_changelog_file(self):
455 Returns a file object for debian/changelog or None if no such file exists.
457 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
459 return open(changelog_name)
463 def get_all_filenames(self):
465 Returns an iterator over all filenames. The filenames will be relative
466 to the root directory.
468 skip = len(self.root_directory) + 1
469 for root, _, files in os.walk(self.root_directory):
471 yield os.path.join(root[skip:], name)
475 Removes all temporary files.
477 if self.root_directory is None:
479 parent_directory = os.path.dirname(self.root_directory)
480 rmtree(parent_directory)
481 self.root_directory = None
490 class SourceContentsScanner(object):
492 SourceContentsScanner provides a method scan() to scan the contents of a
495 def __init__(self, source_id):
497 The argument source_id is the id of the DBSource object that
500 self.source_id = source_id
504 This method does the actual scan and fills in the associated SrcContents
505 property. It commits any changes to the database.
507 session = DBConn().session()
508 source = session.query(DBSource).get(self.source_id)
509 fileset = set(source.scan_contents())
510 for filename in fileset:
511 source.contents.append(SrcContents(file = filename))
516 def scan_all(class_, limit = None):
518 The class method scan_all() scans all source using multiple processes.
519 The number of sources to be scanned can be limited with the limit
520 argument. Returns the number of processed and remaining packages as a
523 session = DBConn().session()
524 query = session.query(DBSource).filter(DBSource.contents == None)
525 remaining = query.count
526 if limit is not None:
527 query = query.limit(limit)
528 processed = query.count()
530 for source in query.yield_per(100):
531 pool.apply_async(source_scan_helper, (source.source_id, ))
534 remaining = remaining()
536 return { 'processed': processed, 'remaining': remaining }
538 def source_scan_helper(source_id):
540 This function runs in a subprocess.
543 scanner = SourceContentsScanner(source_id)