3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component = None):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'arch_all': get_architecture('all', self.session).arch_id,
62 'arch': self.architecture.arch_id,
63 'type_id': self.overridetype.overridetype_id,
64 'type': self.overridetype.overridetype,
67 if self.component is not None:
68 params['component'] = self.component.component_id
70 create temp table newest_binaries (
71 id integer primary key,
74 create index newest_binaries_by_package on newest_binaries (package);
76 insert into newest_binaries (id, package)
77 select distinct on (package) id, package from binaries
78 where type = :type and
79 (architecture = :arch_all or architecture = :arch) and
80 id in (select bin from bin_associations where suite = :suite)
81 order by package, version desc;
86 (select o.package, s.section
87 from override o, section s
88 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
89 o.component = :component)
91 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
92 from newest_binaries b, bin_contents bc, unique_override o
93 where b.id = bc.binary_id and o.package = b.package
98 create temp table newest_binaries (
99 id integer primary key,
102 create index newest_binaries_by_package on newest_binaries (package);
104 insert into newest_binaries (id, package)
105 select distinct on (package) id, package from binaries
106 where type = :type and
107 (architecture = :arch_all or architecture = :arch) and
108 id in (select bin from bin_associations where suite = :suite)
109 order by package, version desc;
114 (select distinct on (o.package, s.section) o.package, s.section
115 from override o, section s
116 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
117 order by o.package, s.section, o.modified desc)
119 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
120 from newest_binaries b, bin_contents bc, unique_override o
121 where b.id = bc.binary_id and o.package = b.package
124 return self.session.query("file", "pkglist").from_statement(sql). \
127 def formatline(self, filename, package_list):
129 Returns a formatted string for the filename argument.
131 return "%-55s %s\n" % (filename, package_list)
135 Yields a new line of the Contents-$arch.gz file in filename order.
137 for filename, package_list in self.query().yield_per(100):
138 yield self.formatline(filename, package_list)
139 # end transaction to return connection to pool
140 self.session.rollback()
144 Returns a list of lines for the Contents-$arch.gz file.
146 return [item for item in self.fetch()]
150 Returns a writer object.
153 'suite': self.suite.suite_name,
154 'architecture': self.architecture.arch_string,
156 if self.component is not None:
157 values['component'] = self.component.component_name
158 return BinaryContentsFileWriter(**values)
160 def get_header(self):
162 Returns the header for the Contents files as a string.
166 filename = os.path.join(Config()['Dir::Templates'], 'contents')
167 header_file = open(filename)
168 return header_file.read()
173 def write_file(self):
175 Write the output file.
177 writer = self.writer()
179 file.write(self.get_header())
180 for item in self.fetch():
185 class SourceContentsWriter(object):
187 SourceContentsWriter writes the Contents-source.gz files.
189 def __init__(self, suite, component):
191 self.component = component
192 self.session = suite.session()
196 Returns a query object that is doing most of the work.
199 'suite_id': self.suite.suite_id,
200 'component_id': self.component.component_id,
204 create temp table newest_sources (
205 id integer primary key,
208 create index sources_binaries_by_source on newest_sources (source);
210 insert into newest_sources (id, source)
211 select distinct on (source) s.id, s.source from source s
212 join files f on f.id = s.file
213 join location l on l.id = f.location
214 where s.id in (select source from src_associations where suite = :suite_id)
215 and l.component = :component_id
216 order by source, version desc;
218 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
219 from newest_sources s, src_contents sc
220 where s.id = sc.source_id group by sc.file'''
222 return self.session.query("file", "pkglist").from_statement(sql). \
225 def formatline(self, filename, package_list):
227 Returns a formatted string for the filename argument.
229 return "%s\t%s\n" % (filename, package_list)
233 Yields a new line of the Contents-source.gz file in filename order.
235 for filename, package_list in self.query().yield_per(100):
236 yield self.formatline(filename, package_list)
237 # end transaction to return connection to pool
238 self.session.rollback()
242 Returns a list of lines for the Contents-source.gz file.
244 return [item for item in self.fetch()]
246 def output_filename(self):
248 Returns the name of the output file.
251 'root': Config()['Dir::Root'],
252 'suite': self.suite.suite_name,
253 'component': self.component.component_name
255 return "%(root)s/dists/%(suite)s/%(component)s/Contents-source.gz" % values
257 def write_file(self):
259 Write the output file.
261 command = ['gzip', '--rsyncable']
262 final_filename = self.output_filename()
263 temp_filename = final_filename + '.new'
264 output_file = open(temp_filename, 'w')
265 gzip = Popen(command, stdin = PIPE, stdout = output_file)
266 for item in self.fetch():
267 gzip.stdin.write(item)
271 os.chmod(temp_filename, 0664)
272 os.rename(temp_filename, final_filename)
275 def binary_helper(suite_id, arch_id, overridetype_id, component_id = None):
277 This function is called in a new subprocess and multiprocessing wants a top
280 session = DBConn().session(work_mem = 1000)
281 suite = Suite.get(suite_id, session)
282 architecture = Architecture.get(arch_id, session)
283 overridetype = OverrideType.get(overridetype_id, session)
284 log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype]
285 if component_id is None:
288 component = Component.get(component_id, session)
289 log_message.append(component.component_name)
290 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
291 contents_writer.write_file()
294 def source_helper(suite_id, component_id):
296 This function is called in a new subprocess and multiprocessing wants a top
299 session = DBConn().session(work_mem = 1000)
300 suite = Suite.get(suite_id, session)
301 component = Component.get(component_id, session)
302 log_message = [suite.suite_name, 'source', component.component_name]
303 contents_writer = SourceContentsWriter(suite, component)
304 contents_writer.write_file()
307 class ContentsWriter(object):
309 Loop over all suites, architectures, overridetypes, and components to write
313 def log_result(class_, result):
315 Writes a result message to the logfile.
317 class_.logger.log(result)
320 def write_all(class_, logger, suite_names = [], force = False):
322 Writes all Contents files for suites in list suite_names which defaults
323 to all 'touchable' suites if not specified explicitely. Untouchable
324 suites will be included if the force argument is set to True.
326 class_.logger = logger
327 session = DBConn().session()
328 suite_query = session.query(Suite)
329 if len(suite_names) > 0:
330 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
332 suite_query = suite_query.filter_by(untouchable = False)
333 deb_id = get_override_type('deb', session).overridetype_id
334 udeb_id = get_override_type('udeb', session).overridetype_id
335 main_id = get_component('main', session).component_id
336 contrib_id = get_component('contrib', session).component_id
337 non_free_id = get_component('non-free', session).component_id
339 for suite in suite_query:
340 suite_id = suite.suite_id
341 # handle source packages
342 pool.apply_async(source_helper, (suite_id, main_id),
343 callback = class_.log_result)
344 pool.apply_async(source_helper, (suite_id, contrib_id),
345 callback = class_.log_result)
346 pool.apply_async(source_helper, (suite_id, non_free_id),
347 callback = class_.log_result)
348 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
349 arch_id = architecture.arch_id
350 # handle 'deb' packages
351 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id), \
352 callback = class_.log_result)
353 # handle 'udeb' packages for 'main' and 'non-free'
354 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, main_id), \
355 callback = class_.log_result)
356 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, non_free_id), \
357 callback = class_.log_result)
363 class BinaryContentsScanner(object):
365 BinaryContentsScanner provides a threadsafe method scan() to scan the
366 contents of a DBBinary object.
368 def __init__(self, binary_id):
370 The argument binary_id is the id of the DBBinary object that
373 self.binary_id = binary_id
375 def scan(self, dummy_arg = None):
377 This method does the actual scan and fills in the associated BinContents
378 property. It commits any changes to the database. The argument dummy_arg
379 is ignored but needed by our threadpool implementation.
381 session = DBConn().session()
382 binary = session.query(DBBinary).get(self.binary_id)
383 fileset = set(binary.scan_contents())
384 if len(fileset) == 0:
385 fileset.add('EMPTY_PACKAGE')
386 for filename in fileset:
387 binary.contents.append(BinContents(file = filename))
392 def scan_all(class_, limit = None):
394 The class method scan_all() scans all binaries using multiple threads.
395 The number of binaries to be scanned can be limited with the limit
396 argument. Returns the number of processed and remaining packages as a
399 session = DBConn().session()
400 query = session.query(DBBinary).filter(DBBinary.contents == None)
401 remaining = query.count
402 if limit is not None:
403 query = query.limit(limit)
404 processed = query.count()
406 for binary in query.yield_per(100):
407 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
410 remaining = remaining()
412 return { 'processed': processed, 'remaining': remaining }
414 def binary_scan_helper(binary_id):
416 This function runs in a subprocess.
418 scanner = BinaryContentsScanner(binary_id)
422 def subprocess_setup():
423 # Python installs a SIGPIPE handler by default. This is usually not what
424 # non-Python subprocesses expect.
425 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
427 class UnpackedSource(object):
429 UnpackedSource extracts a source package into a temporary location and
430 gives you some convinient function for accessing it.
432 def __init__(self, dscfilename):
434 The dscfilename is a name of a DSC file that will be extracted.
436 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
437 self.root_directory = os.path.join(temp_directory, 'root')
438 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
439 dscfilename, self.root_directory)
440 check_call(command, preexec_fn = subprocess_setup)
442 def get_root_directory(self):
444 Returns the name of the package's root directory which is the directory
445 where the debian subdirectory is located.
447 return self.root_directory
449 def get_changelog_file(self):
451 Returns a file object for debian/changelog or None if no such file exists.
453 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
455 return open(changelog_name)
459 def get_all_filenames(self):
461 Returns an iterator over all filenames. The filenames will be relative
462 to the root directory.
464 skip = len(self.root_directory) + 1
465 for root, _, files in os.walk(self.root_directory):
467 yield os.path.join(root[skip:], name)
471 Removes all temporary files.
473 if self.root_directory is None:
475 parent_directory = os.path.dirname(self.root_directory)
476 rmtree(parent_directory)
477 self.root_directory = None
486 class SourceContentsScanner(object):
488 SourceContentsScanner provides a method scan() to scan the contents of a
491 def __init__(self, source_id):
493 The argument source_id is the id of the DBSource object that
496 self.source_id = source_id
500 This method does the actual scan and fills in the associated SrcContents
501 property. It commits any changes to the database.
503 session = DBConn().session()
504 source = session.query(DBSource).get(self.source_id)
505 fileset = set(source.scan_contents())
506 for filename in fileset:
507 source.contents.append(SrcContents(file = filename))
512 def scan_all(class_, limit = None):
514 The class method scan_all() scans all source using multiple processes.
515 The number of sources to be scanned can be limited with the limit
516 argument. Returns the number of processed and remaining packages as a
519 session = DBConn().session()
520 query = session.query(DBSource).filter(DBSource.contents == None)
521 remaining = query.count
522 if limit is not None:
523 query = query.limit(limit)
524 processed = query.count()
526 for source in query.yield_per(100):
527 pool.apply_async(source_scan_helper, (source.source_id, ))
530 remaining = remaining()
532 return { 'processed': processed, 'remaining': remaining }
534 def source_scan_helper(source_id):
536 This function runs in a subprocess.
539 scanner = SourceContentsScanner(source_id)