3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
32 from multiprocessing import Pool
33 from shutil import rmtree
34 from tempfile import mkdtemp
36 import daklib.daksubprocess
39 class BinaryContentsWriter(object):
41 BinaryContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component):
45 self.architecture = architecture
46 self.overridetype = overridetype
47 self.component = component
48 self.session = suite.session()
52 Returns a query object that is doing most of the work.
54 overridesuite = self.suite
55 if self.suite.overridesuite is not None:
56 overridesuite = get_suite(self.suite.overridesuite, self.session)
58 'suite': self.suite.suite_id,
59 'overridesuite': overridesuite.suite_id,
60 'component': self.component.component_id,
61 'arch_all': get_architecture('all', self.session).arch_id,
62 'arch': self.architecture.arch_id,
63 'type_id': self.overridetype.overridetype_id,
64 'type': self.overridetype.overridetype,
68 create temp table newest_binaries (
69 id integer primary key,
72 create index newest_binaries_by_package on newest_binaries (package);
74 insert into newest_binaries (id, package)
75 select distinct on (package) id, package from binaries
76 where type = :type and
77 (architecture = :arch_all or architecture = :arch) and
78 id in (select bin from bin_associations where suite = :suite)
79 order by package, version desc;'''
80 self.session.execute(sql_create_temp, params=params)
86 (select o.package, s.section
87 from override o, section s
88 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
89 o.component = :component)
91 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
92 from newest_binaries b, bin_contents bc, unique_override o
93 where b.id = bc.binary_id and o.package = b.package
96 return self.session.query("file", "pkglist").from_statement(sql). \
99 def formatline(self, filename, package_list):
101 Returns a formatted string for the filename argument.
103 return "%-55s %s\n" % (filename, package_list)
107 Yields a new line of the Contents-$arch.gz file in filename order.
109 for filename, package_list in self.query().yield_per(100):
110 yield self.formatline(filename, package_list)
111 # end transaction to return connection to pool
112 self.session.rollback()
116 Returns a list of lines for the Contents-$arch.gz file.
118 return [item for item in self.fetch()]
122 Returns a writer object.
125 'archive': self.suite.archive.path,
126 'suite': self.suite.suite_name,
127 'component': self.component.component_name,
128 'debtype': self.overridetype.overridetype,
129 'architecture': self.architecture.arch_string,
131 return BinaryContentsFileWriter(**values)
133 def get_header(self):
135 Returns the header for the Contents files as a string.
137 filename = os.path.join(Config()['Dir::Templates'], 'contents')
138 with open(filename) as header_file:
139 return header_file.read()
141 def write_file(self):
143 Write the output file.
145 writer = self.writer()
147 file.write(self.get_header())
148 for item in self.fetch():
153 class SourceContentsWriter(object):
155 SourceContentsWriter writes the Contents-source.gz files.
157 def __init__(self, suite, component):
159 self.component = component
160 self.session = suite.session()
164 Returns a query object that is doing most of the work.
167 'suite_id': self.suite.suite_id,
168 'component_id': self.component.component_id,
171 sql_create_temp = '''
172 create temp table newest_sources (
173 id integer primary key,
176 create index sources_binaries_by_source on newest_sources (source);
178 insert into newest_sources (id, source)
179 select distinct on (source) s.id, s.source from source s
180 join files_archive_map af on s.file = af.file_id
181 where s.id in (select source from src_associations where suite = :suite_id)
182 and af.component_id = :component_id
183 order by source, version desc;'''
184 self.session.execute(sql_create_temp, params=params)
187 select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
188 from newest_sources s, src_contents sc
189 where s.id = sc.source_id group by sc.file'''
191 return self.session.query("file", "pkglist").from_statement(sql). \
194 def formatline(self, filename, package_list):
196 Returns a formatted string for the filename argument.
198 return "%s\t%s\n" % (filename, package_list)
202 Yields a new line of the Contents-source.gz file in filename order.
204 for filename, package_list in self.query().yield_per(100):
205 yield self.formatline(filename, package_list)
206 # end transaction to return connection to pool
207 self.session.rollback()
211 Returns a list of lines for the Contents-source.gz file.
213 return [item for item in self.fetch()]
217 Returns a writer object.
220 'archive': self.suite.archive.path,
221 'suite': self.suite.suite_name,
222 'component': self.component.component_name
224 return SourceContentsFileWriter(**values)
226 def write_file(self):
228 Write the output file.
230 writer = self.writer()
232 for item in self.fetch():
237 def binary_helper(suite_id, arch_id, overridetype_id, component_id):
239 This function is called in a new subprocess and multiprocessing wants a top
242 session = DBConn().session(work_mem = 1000)
243 suite = Suite.get(suite_id, session)
244 architecture = Architecture.get(arch_id, session)
245 overridetype = OverrideType.get(overridetype_id, session)
246 component = Component.get(component_id, session)
247 log_message = [suite.suite_name, architecture.arch_string, \
248 overridetype.overridetype, component.component_name]
249 contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
250 contents_writer.write_file()
254 def source_helper(suite_id, component_id):
256 This function is called in a new subprocess and multiprocessing wants a top
259 session = DBConn().session(work_mem = 1000)
260 suite = Suite.get(suite_id, session)
261 component = Component.get(component_id, session)
262 log_message = [suite.suite_name, 'source', component.component_name]
263 contents_writer = SourceContentsWriter(suite, component)
264 contents_writer.write_file()
268 class ContentsWriter(object):
270 Loop over all suites, architectures, overridetypes, and components to write
274 def log_result(class_, result):
276 Writes a result message to the logfile.
278 class_.logger.log(result)
281 def write_all(class_, logger, archive_names = [], suite_names = [], component_names = [], force = False):
283 Writes all Contents files for suites in list suite_names which defaults
284 to all 'touchable' suites if not specified explicitely. Untouchable
285 suites will be included if the force argument is set to True.
287 class_.logger = logger
288 session = DBConn().session()
289 suite_query = session.query(Suite)
290 if len(archive_names) > 0:
291 suite_query = suite_query.join(Suite.archive).filter(Archive.archive_name.in_(archive_names))
292 if len(suite_names) > 0:
293 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
294 component_query = session.query(Component)
295 if len(component_names) > 0:
296 component_query = component_query.filter(Component.component_name.in_(component_names))
298 suite_query = suite_query.filter(Suite.untouchable == False)
299 deb_id = get_override_type('deb', session).overridetype_id
300 udeb_id = get_override_type('udeb', session).overridetype_id
303 # Lock tables so that nobody can change things underneath us
304 session.execute("LOCK TABLE bin_contents IN SHARE MODE")
305 session.execute("LOCK TABLE src_contents IN SHARE MODE")
307 for suite in suite_query:
308 suite_id = suite.suite_id
309 for component in component_query:
310 component_id = component.component_id
311 # handle source packages
312 pool.apply_async(source_helper, (suite_id, component_id),
313 callback = class_.log_result)
314 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
315 arch_id = architecture.arch_id
316 # handle 'deb' packages
317 pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
318 callback = class_.log_result)
319 # handle 'udeb' packages
320 pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
321 callback = class_.log_result)
327 class BinaryContentsScanner(object):
329 BinaryContentsScanner provides a threadsafe method scan() to scan the
330 contents of a DBBinary object.
332 def __init__(self, binary_id):
334 The argument binary_id is the id of the DBBinary object that
337 self.binary_id = binary_id
339 def scan(self, dummy_arg = None):
341 This method does the actual scan and fills in the associated BinContents
342 property. It commits any changes to the database. The argument dummy_arg
343 is ignored but needed by our threadpool implementation.
345 session = DBConn().session()
346 binary = session.query(DBBinary).get(self.binary_id)
347 fileset = set(binary.scan_contents())
348 if len(fileset) == 0:
349 fileset.add('EMPTY_PACKAGE')
350 for filename in fileset:
351 binary.contents.append(BinContents(file = filename))
356 def scan_all(class_, limit = None):
358 The class method scan_all() scans all binaries using multiple threads.
359 The number of binaries to be scanned can be limited with the limit
360 argument. Returns the number of processed and remaining packages as a
363 session = DBConn().session()
364 query = session.query(DBBinary).filter(DBBinary.contents == None)
365 remaining = query.count
366 if limit is not None:
367 query = query.limit(limit)
368 processed = query.count()
370 for binary in query.yield_per(100):
371 pool.apply_async(binary_scan_helper, (binary.binary_id, ))
374 remaining = remaining()
376 return { 'processed': processed, 'remaining': remaining }
378 def binary_scan_helper(binary_id):
380 This function runs in a subprocess.
382 scanner = BinaryContentsScanner(binary_id)
385 class UnpackedSource(object):
387 UnpackedSource extracts a source package into a temporary location and
388 gives you some convinient function for accessing it.
390 def __init__(self, dscfilename, tmpbasedir=None):
392 The dscfilename is a name of a DSC file that will be extracted.
394 basedir = tmpbasedir if tmpbasedir else Config()['Dir::TempPath']
395 temp_directory = mkdtemp(dir = basedir)
396 self.root_directory = os.path.join(temp_directory, 'root')
397 command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
398 dscfilename, self.root_directory)
399 daklib.daksubprocess.check_call(command)
401 def get_root_directory(self):
403 Returns the name of the package's root directory which is the directory
404 where the debian subdirectory is located.
406 return self.root_directory
408 def get_changelog_file(self):
410 Returns a file object for debian/changelog or None if no such file exists.
412 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
414 return open(changelog_name)
418 def get_all_filenames(self):
420 Returns an iterator over all filenames. The filenames will be relative
421 to the root directory.
423 skip = len(self.root_directory) + 1
424 for root, _, files in os.walk(self.root_directory):
426 yield os.path.join(root[skip:], name)
430 Removes all temporary files.
432 if self.root_directory is None:
434 parent_directory = os.path.dirname(self.root_directory)
435 rmtree(parent_directory)
436 self.root_directory = None
445 class SourceContentsScanner(object):
447 SourceContentsScanner provides a method scan() to scan the contents of a
450 def __init__(self, source_id):
452 The argument source_id is the id of the DBSource object that
455 self.source_id = source_id
459 This method does the actual scan and fills in the associated SrcContents
460 property. It commits any changes to the database.
462 session = DBConn().session()
463 source = session.query(DBSource).get(self.source_id)
464 fileset = set(source.scan_contents())
465 for filename in fileset:
466 source.contents.append(SrcContents(file = filename))
471 def scan_all(class_, limit = None):
473 The class method scan_all() scans all source using multiple processes.
474 The number of sources to be scanned can be limited with the limit
475 argument. Returns the number of processed and remaining packages as a
478 session = DBConn().session()
479 query = session.query(DBSource).filter(DBSource.contents == None)
480 remaining = query.count
481 if limit is not None:
482 query = query.limit(limit)
483 processed = query.count()
485 for source in query.yield_per(100):
486 pool.apply_async(source_scan_helper, (source.source_id, ))
489 remaining = remaining()
491 return { 'processed': processed, 'remaining': remaining }
493 def source_scan_helper(source_id):
495 This function runs in a subprocess.
498 scanner = SourceContentsScanner(source_id)
500 except Exception as e: