3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from subprocess import Popen, PIPE, check_call
35 from tempfile import mkdtemp
40 class BinaryContentsWriter(object):
42 BinaryContentsWriter writes the Contents-$arch.gz files.
44 def __init__(self, suite, architecture, overridetype, component):
46 self.architecture = architecture
47 self.overridetype = overridetype
48 self.component = component
49 self.session = suite.session()
53 Returns a query object that is doing most of the work.
55 overridesuite = self.suite
56 if self.suite.overridesuite is not None:
57 overridesuite = get_suite(self.suite.overridesuite, self.session)
59 'suite': self.suite.suite_id,
60 'overridesuite': overridesuite.suite_id,
61 'component': self.component.component_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;
85 (select o.package, s.section
86 from override o, section s
87 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
88 o.component = :component)
90 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
91 from newest_binaries b, bin_contents bc, unique_override o
92 where b.id = bc.binary_id and o.package = b.package
95 return self.session.query("file", "pkglist").from_statement(sql). \
98 def formatline(self, filename, package_list):
100 Returns a formatted string for the filename argument.
102 return "%-55s %s\n" % (filename, package_list)
106 Yields a new line of the Contents-$arch.gz file in filename order.
108 for filename, package_list in self.query().yield_per(100):
109 yield self.formatline(filename, package_list)
110 # end transaction to return connection to pool
111 self.session.rollback()
115 Returns a list of lines for the Contents-$arch.gz file.
117 return [item for item in self.fetch()]
121 Returns a writer object.
124 'suite': self.suite.suite_name,
125 'component': self.component.component_name,
126 'debtype': self.overridetype.overridetype,
127 'architecture': self.architecture.arch_string,
129 return BinaryContentsFileWriter(**values)
131 def get_header(self):
133 Returns the header for the Contents files as a string.
137 filename = os.path.join(Config()['Dir::Templates'], 'contents')
138 header_file = open(filename)
139 return header_file.read()
144 def write_file(self):
146 Write the output file.
148 writer = self.writer()
150 file.write(self.get_header())
151 for item in self.fetch():
156 class SourceContentsWriter(object):
158 SourceContentsWriter writes the Contents-source.gz files.
160 def __init__(self, suite, component):
162 self.component = component
163 self.session = suite.session()
167 Returns a query object that is doing most of the work.
170 'suite_id': self.suite.suite_id,
171 'component_id': self.component.component_id,
175 create temp table newest_sources (
176 id integer primary key,
179 create index sources_binaries_by_source on newest_sources (source);
181 insert into newest_sources (id, source)
182 select distinct on (source) s.id, s.source from source s
183 join files f on f.id = s.file
184 join location l on l.id = f.location
185 where s.id in (select source from src_associations where suite = :suite_id)
186 and l.component = :component_id
187 order by source, version desc;
189 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
190 from newest_sources s, src_contents sc
191 where s.id = sc.source_id group by sc.file'''
193 return self.session.query("file", "pkglist").from_statement(sql). \
196 def formatline(self, filename, package_list):
198 Returns a formatted string for the filename argument.
200 return "%s\t%s\n" % (filename, package_list)
204 Yields a new line of the Contents-source.gz file in filename order.
206 for filename, package_list in self.query().yield_per(100):
207 yield self.formatline(filename, package_list)
208 # end transaction to return connection to pool
209 self.session.rollback()
213 Returns a list of lines for the Contents-source.gz file.
215 return [item for item in self.fetch()]
219 Returns a writer object.
222 'suite': self.suite.suite_name,
223 'component': self.component.component_name
225 return SourceContentsFileWriter(**values)
227 def write_file(self):
229 Write the output file.
231 writer = self.writer()
233 for item in self.fetch():
238 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
240 This function is called in a new subprocess and multiprocessing wants a top
243 session = DBConn().session(work_mem = 1000)
244 suite = Suite.get(suite_id, session)
245 architecture = Architecture.get(arch_id, session)
246 overridetype = OverrideType.get(overridetype_id, session)
247 component = Component.get(component_id, session)
248 log_message = [suite.suite_name, architecture.arch_string, \
249 overridetype.overridetype, component.component_name]
250 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
251 contents_writer.write_file()
254 def source_helper(suite_id, component_id):
256 This function is called in a new subprocess and multiprocessing wants a top
259 session = DBConn().session(work_mem = 1000)
260 suite = Suite.get(suite_id, session)
261 component = Component.get(component_id, session)
262 log_message = [suite.suite_name, 'source', component.component_name]
263 contents_writer = SourceContentsWriter(suite, component)
264 contents_writer.write_file()
267 class ContentsWriter(object):
269 Loop over all suites, architectures, overridetypes, and components to write
273 def log_result(class_, result):
275 Writes a result message to the logfile.
277 class_.logger.log(result)
280 def write_all(class_, logger, suite_names = [], component_names = [], force = False):
282 Writes all Contents files for suites in list suite_names which defaults
283 to all 'touchable' suites if not specified explicitely. Untouchable
284 suites will be included if the force argument is set to True.
286 class_.logger = logger
287 session = DBConn().session()
288 suite_query = session.query(Suite)
289 if len(suite_names) > 0:
290 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
291 component_query = session.query(Component)
292 if len(component_names) > 0:
293 component_query = component_query.filter(Component.component_name.in_(component_names))
295 suite_query = suite_query.filter_by(untouchable = False)
296 deb_id = get_override_type('deb', session).overridetype_id
297 udeb_id = get_override_type('udeb', session).overridetype_id
299 for suite in suite_query:
300 suite_id = suite.suite_id
301 for component in component_query:
302 component_id = component.component_id
303 # handle source packages
304 pool.apply_async(source_helper, (suite_id, component_id),
305 callback = class_.log_result)
306 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
307 arch_id = architecture.arch_id
308 # handle 'deb' packages
309 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
310 callback = class_.log_result)
311 # handle 'udeb' packages
312 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
313 callback = class_.log_result)
319 class BinaryContentsScanner(object):
321 BinaryContentsScanner provides a threadsafe method scan() to scan the
322 contents of a DBBinary object.
324 def __init__(self, binary_id):
326 The argument binary_id is the id of the DBBinary object that
329 self.binary_id = binary_id
331 def scan(self, dummy_arg = None):
333 This method does the actual scan and fills in the associated BinContents
334 property. It commits any changes to the database. The argument dummy_arg
335 is ignored but needed by our threadpool implementation.
337 session = DBConn().session()
338 binary = session.query(DBBinary).get(self.binary_id)
339 fileset = set(binary.scan_contents())
340 if len(fileset) == 0:
341 fileset.add('EMPTY_PACKAGE')
342 for filename in fileset:
343 binary.contents.append(BinContents(file = filename))
348 def scan_all(class_, limit = None):
350 The class method scan_all() scans all binaries using multiple threads.
351 The number of binaries to be scanned can be limited with the limit
352 argument. Returns the number of processed and remaining packages as a
355 session = DBConn().session()
356 query = session.query(DBBinary).filter(DBBinary.contents == None)
357 remaining = query.count
358 if limit is not None:
359 query = query.limit(limit)
360 processed = query.count()
362 for binary in query.yield_per(100):
363 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
366 remaining = remaining()
368 return { 'processed': processed, 'remaining': remaining }
370 def binary_scan_helper(binary_id):
372 This function runs in a subprocess.
374 scanner = BinaryContentsScanner(binary_id)
378 def subprocess_setup():
379 # Python installs a SIGPIPE handler by default. This is usually not what
380 # non-Python subprocesses expect.
381 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
383 class UnpackedSource(object):
385 UnpackedSource extracts a source package into a temporary location and
386 gives you some convinient function for accessing it.
388 def __init__(self, dscfilename):
390 The dscfilename is a name of a DSC file that will be extracted.
392 temp_directory = mkdtemp(dir = Config()['Dir::TempPath'])
393 self.root_directory = os.path.join(temp_directory, 'root')
394 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
395 dscfilename, self.root_directory)
396 check_call(command, preexec_fn = subprocess_setup)
398 def get_root_directory(self):
400 Returns the name of the package's root directory which is the directory
401 where the debian subdirectory is located.
403 return self.root_directory
405 def get_changelog_file(self):
407 Returns a file object for debian/changelog or None if no such file exists.
409 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
411 return open(changelog_name)
415 def get_all_filenames(self):
417 Returns an iterator over all filenames. The filenames will be relative
418 to the root directory.
420 skip = len(self.root_directory) + 1
421 for root, _, files in os.walk(self.root_directory):
423 yield os.path.join(root[skip:], name)
427 Removes all temporary files.
429 if self.root_directory is None:
431 parent_directory = os.path.dirname(self.root_directory)
432 rmtree(parent_directory)
433 self.root_directory = None
442 class SourceContentsScanner(object):
444 SourceContentsScanner provides a method scan() to scan the contents of a
447 def __init__(self, source_id):
449 The argument source_id is the id of the DBSource object that
452 self.source_id = source_id
456 This method does the actual scan and fills in the associated SrcContents
457 property. It commits any changes to the database.
459 session = DBConn().session()
460 source = session.query(DBSource).get(self.source_id)
461 fileset = set(source.scan_contents())
462 for filename in fileset:
463 source.contents.append(SrcContents(file = filename))
468 def scan_all(class_, limit = None):
470 The class method scan_all() scans all source using multiple processes.
471 The number of sources to be scanned can be limited with the limit
472 argument. Returns the number of processed and remaining packages as a
475 session = DBConn().session()
476 query = session.query(DBSource).filter(DBSource.contents == None)
477 remaining = query.count
478 if limit is not None:
479 query = query.limit(limit)
480 processed = query.count()
482 for source in query.yield_per(100):
483 pool.apply_async(source_scan_helper, (source.source_id, ))
486 remaining = remaining()
488 return { 'processed': processed, 'remaining': remaining }
490 def source_scan_helper(source_id):
492 This function runs in a subprocess.
495 scanner = SourceContentsScanner(source_id)