3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;'''
81 self.session.execute(sql_create_temp, params=params)
87 (select o.package, s.section
88 from override o, section s
89 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
90 o.component = :component)
92 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
93 from newest_binaries b, bin_contents bc, unique_override o
94 where b.id = bc.binary_id and o.package = b.package
97 return self.session.query("file", "pkglist").from_statement(sql). \
100 def formatline(self, filename, package_list):
102 Returns a formatted string for the filename argument.
104 return "%-55s %s\n" % (filename, package_list)
108 Yields a new line of the Contents-$arch.gz file in filename order.
110 for filename, package_list in self.query().yield_per(100):
111 yield self.formatline(filename, package_list)
112 # end transaction to return connection to pool
113 self.session.rollback()
117 Returns a list of lines for the Contents-$arch.gz file.
119 return [item for item in self.fetch()]
123 Returns a writer object.
126 'suite': self.suite.suite_name,
127 'component': self.component.component_name,
128 'debtype': self.overridetype.overridetype,
129 'architecture': self.architecture.arch_string,
131 return BinaryContentsFileWriter(**values)
133 def get_header(self):
135 Returns the header for the Contents files as a string.
139 filename = os.path.join(Config()['Dir::Templates'], 'contents')
140 header_file = open(filename)
141 return header_file.read()
146 def write_file(self):
148 Write the output file.
150 writer = self.writer()
152 file.write(self.get_header())
153 for item in self.fetch():
158 class SourceContentsWriter(object):
160 SourceContentsWriter writes the Contents-source.gz files.
162 def __init__(self, suite, component):
164 self.component = component
165 self.session = suite.session()
169 Returns a query object that is doing most of the work.
172 'suite_id': self.suite.suite_id,
173 'component_id': self.component.component_id,
176 sql_create_temp = '''
177 create temp table newest_sources (
178 id integer primary key,
181 create index sources_binaries_by_source on newest_sources (source);
183 insert into newest_sources (id, source)
184 select distinct on (source) s.id, s.source from source s
185 join files f on f.id = s.file
186 join location l on l.id = f.location
187 where s.id in (select source from src_associations where suite = :suite_id)
188 and l.component = :component_id
189 order by source, version desc;'''
190 self.session.execute(sql_create_temp, params=params)
193 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
194 from newest_sources s, src_contents sc
195 where s.id = sc.source_id group by sc.file'''
197 return self.session.query("file", "pkglist").from_statement(sql). \
200 def formatline(self, filename, package_list):
202 Returns a formatted string for the filename argument.
204 return "%s\t%s\n" % (filename, package_list)
208 Yields a new line of the Contents-source.gz file in filename order.
210 for filename, package_list in self.query().yield_per(100):
211 yield self.formatline(filename, package_list)
212 # end transaction to return connection to pool
213 self.session.rollback()
217 Returns a list of lines for the Contents-source.gz file.
219 return [item for item in self.fetch()]
223 Returns a writer object.
226 'suite': self.suite.suite_name,
227 'component': self.component.component_name
229 return SourceContentsFileWriter(**values)
231 def write_file(self):
233 Write the output file.
235 writer = self.writer()
237 for item in self.fetch():
242 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
244 This function is called in a new subprocess and multiprocessing wants a top
247 session = DBConn().session(work_mem = 1000)
248 suite = Suite.get(suite_id, session)
249 architecture = Architecture.get(arch_id, session)
250 overridetype = OverrideType.get(overridetype_id, session)
251 component = Component.get(component_id, session)
252 log_message = [suite.suite_name, architecture.arch_string, \
253 overridetype.overridetype, component.component_name]
254 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
255 contents_writer.write_file()
259 def source_helper(suite_id, component_id):
261 This function is called in a new subprocess and multiprocessing wants a top
264 session = DBConn().session(work_mem = 1000)
265 suite = Suite.get(suite_id, session)
266 component = Component.get(component_id, session)
267 log_message = [suite.suite_name, 'source', component.component_name]
268 contents_writer = SourceContentsWriter(suite, component)
269 contents_writer.write_file()
273 class ContentsWriter(object):
275 Loop over all suites, architectures, overridetypes, and components to write
279 def log_result(class_, result):
281 Writes a result message to the logfile.
283 class_.logger.log(result)
286 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
288 Writes all Contents files for suites in list suite_names which defaults
289 to all 'touchable' suites if not specified explicitely. Untouchable
290 suites will be included if the force argument is set to True.
292 class_.logger = logger
293 session = DBConn().session()
294 suite_query = session.query(Suite)
295 if len(suite_names) > 0:
296 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
297 component_query = session.query(Component)
298 if len(component_names) > 0:
299 component_query = component_query.filter(Component.component_name.in_(component_names))
301 suite_query = suite_query.filter_by(untouchable = False)
302 deb_id = get_override_type('deb', session).overridetype_id
303 udeb_id = get_override_type('udeb', session).overridetype_id
305 for suite in suite_query:
306 suite_id = suite.suite_id
307 for component in component_query:
308 component_id = component.component_id
309 # handle source packages
310 pool.apply_async(source_helper, (suite_id, component_id),
311 callback = class_.log_result)
312 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
313 arch_id = architecture.arch_id
314 # handle 'deb' packages
315 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
316 callback = class_.log_result)
317 # handle 'udeb' packages
318 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
319 callback = class_.log_result)
325 class BinaryContentsScanner(object):
327 BinaryContentsScanner provides a threadsafe method scan() to scan the
328 contents of a DBBinary object.
330 def __init__(self, binary_id):
332 The argument binary_id is the id of the DBBinary object that
335 self.binary_id = binary_id
337 def scan(self, dummy_arg = None):
339 This method does the actual scan and fills in the associated BinContents
340 property. It commits any changes to the database. The argument dummy_arg
341 is ignored but needed by our threadpool implementation.
343 session = DBConn().session()
344 binary = session.query(DBBinary).get(self.binary_id)
345 fileset = set(binary.scan_contents())
346 if len(fileset) == 0:
347 fileset.add('EMPTY_PACKAGE')
348 for filename in fileset:
349 binary.contents.append(BinContents(file = filename))
354 def scan_all(class_, limit = None):
356 The class method scan_all() scans all binaries using multiple threads.
357 The number of binaries to be scanned can be limited with the limit
358 argument. Returns the number of processed and remaining packages as a
361 session = DBConn().session()
362 query = session.query(DBBinary).filter(DBBinary.contents == None)
363 remaining = query.count
364 if limit is not None:
365 query = query.limit(limit)
366 processed = query.count()
368 for binary in query.yield_per(100):
369 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
372 remaining = remaining()
374 return { 'processed': processed, 'remaining': remaining }
376 def binary_scan_helper(binary_id):
378 This function runs in a subprocess.
380 scanner = BinaryContentsScanner(binary_id)
384 def subprocess_setup():
385 # Python installs a SIGPIPE handler by default. This is usually not what
386 # non-Python subprocesses expect.
387 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
389 class UnpackedSource(object):
391 UnpackedSource extracts a source package into a temporary location and
392 gives you some convinient function for accessing it.
394 def __init__(self, dscfilename):
396 The dscfilename is a name of a DSC file that will be extracted.
398 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
399 self.root_directory = os.path.join(temp_directory, 'root')
400 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
401 dscfilename, self.root_directory)
402 check_call(command, preexec_fn = subprocess_setup)
404 def get_root_directory(self):
406 Returns the name of the package's root directory which is the directory
407 where the debian subdirectory is located.
409 return self.root_directory
411 def get_changelog_file(self):
413 Returns a file object for debian/changelog or None if no such file exists.
415 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
417 return open(changelog_name)
421 def get_all_filenames(self):
423 Returns an iterator over all filenames. The filenames will be relative
424 to the root directory.
426 skip = len(self.root_directory) + 1
427 for root, _, files in os.walk(self.root_directory):
429 yield os.path.join(root[skip:], name)
433 Removes all temporary files.
435 if self.root_directory is None:
437 parent_directory = os.path.dirname(self.root_directory)
438 rmtree(parent_directory)
439 self.root_directory = None
448 class SourceContentsScanner(object):
450 SourceContentsScanner provides a method scan() to scan the contents of a
453 def __init__(self, source_id):
455 The argument source_id is the id of the DBSource object that
458 self.source_id = source_id
462 This method does the actual scan and fills in the associated SrcContents
463 property. It commits any changes to the database.
465 session = DBConn().session()
466 source = session.query(DBSource).get(self.source_id)
467 fileset = set(source.scan_contents())
468 for filename in fileset:
469 source.contents.append(SrcContents(file = filename))
474 def scan_all(class_, limit = None):
476 The class method scan_all() scans all source using multiple processes.
477 The number of sources to be scanned can be limited with the limit
478 argument. Returns the number of processed and remaining packages as a
481 session = DBConn().session()
482 query = session.query(DBSource).filter(DBSource.contents == None)
483 remaining = query.count
484 if limit is not None:
485 query = query.limit(limit)
486 processed = query.count()
488 for source in query.yield_per(100):
489 pool.apply_async(source_scan_helper, (source.source_id, ))
492 remaining = remaining()
494 return { 'processed': processed, 'remaining': remaining }
496 def source_scan_helper(source_id):
498 This function runs in a subprocess.
501 scanner = SourceContentsScanner(source_id)
503 except Exception as e: