3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;
85 (select o.package, s.section
86 from override o, section s
87 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
88 o.component = :component)
90 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
91 from newest_binaries b, bin_contents bc, unique_override o
92 where b.id = bc.binary_id and o.package = b.package
95 return self.session.query("file", "pkglist").from_statement(sql). \
98 def formatline(self, filename, package_list):
100 Returns a formatted string for the filename argument.
102 return "%-55s %s\n" % (filename, package_list)
106 Yields a new line of the Contents-$arch.gz file in filename order.
108 for filename, package_list in self.query().yield_per(100):
109 yield self.formatline(filename, package_list)
110 # end transaction to return connection to pool
111 self.session.rollback()
115 Returns a list of lines for the Contents-$arch.gz file.
117 return [item for item in self.fetch()]
121 Returns a writer object.
124 'suite': self.suite.suite_name,
125 'debtype': self.overridetype.overridetype,
126 'architecture': self.architecture.arch_string,
128 return BinaryContentsFileWriter(**values)
130 def get_header(self):
132 Returns the header for the Contents files as a string.
136 filename = os.path.join(Config()['Dir::Templates'], 'contents')
137 header_file = open(filename)
138 return header_file.read()
143 def write_file(self):
145 Write the output file.
147 writer = self.writer()
149 file.write(self.get_header())
150 for item in self.fetch():
155 class SourceContentsWriter(object):
157 SourceContentsWriter writes the Contents-source.gz files.
159 def __init__(self, suite, component):
161 self.component = component
162 self.session = suite.session()
166 Returns a query object that is doing most of the work.
169 'suite_id': self.suite.suite_id,
170 'component_id': self.component.component_id,
174 create temp table newest_sources (
175 id integer primary key,
178 create index sources_binaries_by_source on newest_sources (source);
180 insert into newest_sources (id, source)
181 select distinct on (source) s.id, s.source from source s
182 join files f on f.id = s.file
183 join location l on l.id = f.location
184 where s.id in (select source from src_associations where suite = :suite_id)
185 and l.component = :component_id
186 order by source, version desc;
188 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
189 from newest_sources s, src_contents sc
190 where s.id = sc.source_id group by sc.file'''
192 return self.session.query("file", "pkglist").from_statement(sql). \
195 def formatline(self, filename, package_list):
197 Returns a formatted string for the filename argument.
199 return "%s\t%s\n" % (filename, package_list)
203 Yields a new line of the Contents-source.gz file in filename order.
205 for filename, package_list in self.query().yield_per(100):
206 yield self.formatline(filename, package_list)
207 # end transaction to return connection to pool
208 self.session.rollback()
212 Returns a list of lines for the Contents-source.gz file.
214 return [item for item in self.fetch()]
218 Returns a writer object.
221 'suite': self.suite.suite_name,
222 'component': self.component.component_name
224 return SourceContentsFileWriter(**values)
226 def write_file(self):
228 Write the output file.
230 writer = self.writer()
232 for item in self.fetch():
237 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
239 This function is called in a new subprocess and multiprocessing wants a top
242 session = DBConn().session(work_mem = 1000)
243 suite = Suite.get(suite_id, session)
244 architecture = Architecture.get(arch_id, session)
245 overridetype = OverrideType.get(overridetype_id, session)
246 component = Component.get(component_id, session)
247 log_message = [suite.suite_name, architecture.arch_string, \
248 overridetype.overridetype, component.component_name]
249 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
250 contents_writer.write_file()
253 def source_helper(suite_id, component_id):
255 This function is called in a new subprocess and multiprocessing wants a top
258 session = DBConn().session(work_mem = 1000)
259 suite = Suite.get(suite_id, session)
260 component = Component.get(component_id, session)
261 log_message = [suite.suite_name, 'source', component.component_name]
262 contents_writer = SourceContentsWriter(suite, component)
263 contents_writer.write_file()
266 class ContentsWriter(object):
268 Loop over all suites, architectures, overridetypes, and components to write
272 def log_result(class_, result):
274 Writes a result message to the logfile.
276 class_.logger.log(result)
279 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
281 Writes all Contents files for suites in list suite_names which defaults
282 to all 'touchable' suites if not specified explicitely. Untouchable
283 suites will be included if the force argument is set to True.
285 class_.logger = logger
286 session = DBConn().session()
287 suite_query = session.query(Suite)
288 if len(suite_names) > 0:
289 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
290 component_query = session.query(Component)
291 if len(component_names) > 0:
292 component_query = component_query.filter(Component.component_name.in_(component_names))
294 suite_query = suite_query.filter_by(untouchable = False)
295 deb_id = get_override_type('deb', session).overridetype_id
296 udeb_id = get_override_type('udeb', session).overridetype_id
298 for suite in suite_query:
299 suite_id = suite.suite_id
300 for component in component_query:
301 component_id = component.component_id
302 # handle source packages
303 pool.apply_async(source_helper, (suite_id, component_id),
304 callback = class_.log_result)
305 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
306 arch_id = architecture.arch_id
307 # handle 'deb' packages
308 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
309 callback = class_.log_result)
310 # handle 'udeb' packages
311 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
312 callback = class_.log_result)
318 class BinaryContentsScanner(object):
320 BinaryContentsScanner provides a threadsafe method scan() to scan the
321 contents of a DBBinary object.
323 def __init__(self, binary_id):
325 The argument binary_id is the id of the DBBinary object that
328 self.binary_id = binary_id
330 def scan(self, dummy_arg = None):
332 This method does the actual scan and fills in the associated BinContents
333 property. It commits any changes to the database. The argument dummy_arg
334 is ignored but needed by our threadpool implementation.
336 session = DBConn().session()
337 binary = session.query(DBBinary).get(self.binary_id)
338 fileset = set(binary.scan_contents())
339 if len(fileset) == 0:
340 fileset.add('EMPTY_PACKAGE')
341 for filename in fileset:
342 binary.contents.append(BinContents(file = filename))
347 def scan_all(class_, limit = None):
349 The class method scan_all() scans all binaries using multiple threads.
350 The number of binaries to be scanned can be limited with the limit
351 argument. Returns the number of processed and remaining packages as a
354 session = DBConn().session()
355 query = session.query(DBBinary).filter(DBBinary.contents == None)
356 remaining = query.count
357 if limit is not None:
358 query = query.limit(limit)
359 processed = query.count()
361 for binary in query.yield_per(100):
362 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
365 remaining = remaining()
367 return { 'processed': processed, 'remaining': remaining }
369 def binary_scan_helper(binary_id):
371 This function runs in a subprocess.
373 scanner = BinaryContentsScanner(binary_id)
377 def subprocess_setup():
378 # Python installs a SIGPIPE handler by default. This is usually not what
379 # non-Python subprocesses expect.
380 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
382 class UnpackedSource(object):
384 UnpackedSource extracts a source package into a temporary location and
385 gives you some convinient function for accessing it.
387 def __init__(self, dscfilename):
389 The dscfilename is a name of a DSC file that will be extracted.
391 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
392 self.root_directory = os.path.join(temp_directory, 'root')
393 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
394 dscfilename, self.root_directory)
395 check_call(command, preexec_fn = subprocess_setup)
397 def get_root_directory(self):
399 Returns the name of the package's root directory which is the directory
400 where the debian subdirectory is located.
402 return self.root_directory
404 def get_changelog_file(self):
406 Returns a file object for debian/changelog or None if no such file exists.
408 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
410 return open(changelog_name)
414 def get_all_filenames(self):
416 Returns an iterator over all filenames. The filenames will be relative
417 to the root directory.
419 skip = len(self.root_directory) + 1
420 for root, _, files in os.walk(self.root_directory):
422 yield os.path.join(root[skip:], name)
426 Removes all temporary files.
428 if self.root_directory is None:
430 parent_directory = os.path.dirname(self.root_directory)
431 rmtree(parent_directory)
432 self.root_directory = None
441 class SourceContentsScanner(object):
443 SourceContentsScanner provides a method scan() to scan the contents of a
446 def __init__(self, source_id):
448 The argument source_id is the id of the DBSource object that
451 self.source_id = source_id
455 This method does the actual scan and fills in the associated SrcContents
456 property. It commits any changes to the database.
458 session = DBConn().session()
459 source = session.query(DBSource).get(self.source_id)
460 fileset = set(source.scan_contents())
461 for filename in fileset:
462 source.contents.append(SrcContents(file = filename))
467 def scan_all(class_, limit = None):
469 The class method scan_all() scans all source using multiple processes.
470 The number of sources to be scanned can be limited with the limit
471 argument. Returns the number of processed and remaining packages as a
474 session = DBConn().session()
475 query = session.query(DBSource).filter(DBSource.contents == None)
476 remaining = query.count
477 if limit is not None:
478 query = query.limit(limit)
479 processed = query.count()
481 for source in query.yield_per(100):
482 pool.apply_async(source_scan_helper, (source.source_id, ))
485 remaining = remaining()
487 return { 'processed': processed, 'remaining': remaining }
489 def source_scan_helper(source_id):
491 This function runs in a subprocess.
494 scanner = SourceContentsScanner(source_id)