from daklib.dbconn import *
from daklib.config import Config
-from daklib.threadpool import ThreadPool
-from multiprocessing import Pool
-from sqlalchemy import desc, or_
-from subprocess import Popen, PIPE, call
+from multiprocessing import Pool
+from shutil import rmtree
+from subprocess import Popen, PIPE, check_call
+from tempfile import mkdtemp
import os.path
'''
Returns a query object that is doing most of the work.
'''
+ overridesuite = self.suite
+ if self.suite.overridesuite is not None:
+ overridesuite = get_suite(self.suite.overridesuite, self.session)
params = {
- 'suite': self.suite.suite_id,
- 'arch_all': get_architecture('all', self.session).arch_id,
- 'arch': self.architecture.arch_id,
- 'type_id': self.overridetype.overridetype_id,
- 'type': self.overridetype.overridetype,
+ 'suite': self.suite.suite_id,
+ 'overridesuite': overridesuite.suite_id,
+ 'arch_all': get_architecture('all', self.session).arch_id,
+ 'arch': self.architecture.arch_id,
+ 'type_id': self.overridetype.overridetype_id,
+ 'type': self.overridetype.overridetype,
}
if self.component is not None:
unique_override as
(select o.package, s.section
from override o, section s
- where o.suite = :suite and o.type = :type_id and o.section = s.id and
+ where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
o.component = :component)
-select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
from newest_binaries b, bin_contents bc, unique_override o
where b.id = bc.binary_id and o.package = b.package
- order by bc.file, b.package'''
+ group by bc.file'''
else:
sql = '''
unique_override as
(select distinct on (o.package, s.section) o.package, s.section
from override o, section s
- where o.suite = :suite and o.type = :type_id and o.section = s.id
+ where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
order by o.package, s.section, o.modified desc)
-select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
from newest_binaries b, bin_contents bc, unique_override o
where b.id = bc.binary_id and o.package = b.package
- order by bc.file, b.package'''
+ group by bc.file'''
- return self.session.query("file", "package").from_statement(sql). \
+ return self.session.query("file", "pkglist").from_statement(sql). \
params(params)
def formatline(self, filename, package_list):
'''
Returns a formatted string for the filename argument.
'''
- package_list = ','.join(package_list)
return "%-55s %s\n" % (filename, package_list)
def fetch(self):
'''
Yields a new line of the Contents-$arch.gz file in filename order.
'''
- last_filename = None
- package_list = []
- for filename, package in self.query().yield_per(100):
- if filename != last_filename:
- if last_filename is not None:
- yield self.formatline(last_filename, package_list)
- last_filename = filename
- package_list = []
- package_list.append(package)
- if last_filename is not None:
- yield self.formatline(last_filename, package_list)
+ for filename, package_list in self.query().yield_per(100):
+ yield self.formatline(filename, package_list)
# end transaction to return connection to pool
self.session.rollback()
Write the output file.
'''
command = ['gzip', '--rsyncable']
- output_file = open(self.output_filename(), 'w')
+ final_filename = self.output_filename()
+ temp_filename = final_filename + '.new'
+ output_file = open(temp_filename, 'w')
gzip = Popen(command, stdin = PIPE, stdout = output_file)
gzip.stdin.write(self.get_header())
for item in self.fetch():
gzip.stdin.close()
output_file.close()
gzip.wait()
+ os.chmod(temp_filename, 0664)
+ os.rename(temp_filename, final_filename)
@classmethod
- def write_all(class_, suite_names = [], force = False):
+ def log_result(class_, result):
+ '''
+ Writes a result message to the logfile.
+ '''
+ class_.logger.log(result)
+
+ @classmethod
+ def write_all(class_, logger, suite_names = [], force = False):
'''
Writes all Contents files for suites in list suite_names which defaults
to all 'touchable' suites if not specified explicitely. Untouchable
suites will be included if the force argument is set to True.
'''
+ class_.logger = logger
session = DBConn().session()
suite_query = session.query(Suite)
if len(suite_names) > 0:
suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
if not force:
suite_query = suite_query.filter_by(untouchable = False)
+ deb_id = get_override_type('deb', session).overridetype_id
+ udeb_id = get_override_type('udeb', session).overridetype_id
+ main_id = get_component('main', session).component_id
+ non_free_id = get_component('non-free', session).component_id
pool = Pool()
for suite in suite_query:
+ suite_id = suite.suite_id
for architecture in suite.get_architectures(skipsrc = True, skipall = True):
+ arch_id = architecture.arch_id
# handle 'deb' packages
- command = ['dak', 'contents', '-s', suite.suite_name, \
- 'generate_helper', architecture.arch_string, 'deb']
- pool.apply_async(call, (command, ))
+ pool.apply_async(generate_helper, (suite_id, arch_id, deb_id), \
+ callback = class_.log_result)
# handle 'udeb' packages for 'main' and 'non-free'
- command = ['dak', 'contents', '-s', suite.suite_name, \
- 'generate_helper', architecture.arch_string, 'udeb', 'main']
- pool.apply_async(call, (command, ))
- command = ['dak', 'contents', '-s', suite.suite_name, \
- 'generate_helper', architecture.arch_string, 'udeb', 'non-free']
- pool.apply_async(call, (command, ))
+ pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, main_id), \
+ callback = class_.log_result)
+ pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, non_free_id), \
+ callback = class_.log_result)
pool.close()
pool.join()
session.close()
-
-class ContentsScanner(object):
+def generate_helper(suite_id, arch_id, overridetype_id, component_id = None):
+ '''
+ This function is called in a new subprocess.
+ '''
+ session = DBConn().session()
+ suite = Suite.get(suite_id, session)
+ architecture = Architecture.get(arch_id, session)
+ overridetype = OverrideType.get(overridetype_id, session)
+ log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype]
+ if component_id is None:
+ component = None
+ else:
+ component = Component.get(component_id, session)
+ log_message.append(component.component_name)
+ contents_writer = ContentsWriter(suite, architecture, overridetype, component)
+ contents_writer.write_file()
+ return log_message
+
+
+class BinaryContentsScanner(object):
'''
- ContentsScanner provides a threadsafe method scan() to scan the contents of
- a DBBinary object.
+ BinaryContentsScanner provides a threadsafe method scan() to scan the
+ contents of a DBBinary object.
'''
- def __init__(self, binary):
+ def __init__(self, binary_id):
'''
- The argument binary is the actual DBBinary object that should be
- scanned.
+ The argument binary_id is the id of the DBBinary object that
+ should be scanned.
'''
- self.binary_id = binary.binary_id
+ self.binary_id = binary_id
def scan(self, dummy_arg = None):
'''
'''
session = DBConn().session()
binary = session.query(DBBinary).get(self.binary_id)
- for filename in binary.scan_contents():
+ fileset = set(binary.scan_contents())
+ if len(fileset) == 0:
+ fileset.add('EMPTY_PACKAGE')
+ for filename in fileset:
binary.contents.append(BinContents(file = filename))
session.commit()
session.close()
if limit is not None:
query = query.limit(limit)
processed = query.count()
- threadpool = ThreadPool()
+ pool = Pool()
for binary in query.yield_per(100):
- threadpool.queueTask(ContentsScanner(binary).scan)
- threadpool.joinAll()
+ pool.apply_async(binary_scan_helper, (binary.binary_id, ))
+ pool.close()
+ pool.join()
+ remaining = remaining()
+ session.close()
+ return { 'processed': processed, 'remaining': remaining }
+
+def binary_scan_helper(binary_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ scanner = BinaryContentsScanner(binary_id)
+ scanner.scan()
+
+
+class UnpackedSource(object):
+ '''
+ UnpackedSource extracts a source package into a temporary location and
+ gives you some convinient function for accessing it.
+ '''
+ def __init__(self, dscfilename):
+ '''
+ The dscfilename is a name of a DSC file that will be extracted.
+ '''
+ self.root_directory = os.path.join(mkdtemp(), 'root')
+ command = ('dpkg-source', '--no-copy', '--no-check', '-x', dscfilename,
+ self.root_directory)
+ # dpkg-source does not have a --quiet option
+ devnull = open(os.devnull, 'w')
+ check_call(command, stdout = devnull, stderr = devnull)
+ devnull.close()
+
+ def get_root_directory(self):
+ '''
+ Returns the name of the package's root directory which is the directory
+ where the debian subdirectory is located.
+ '''
+ return self.root_directory
+
+ def get_changelog_file(self):
+ '''
+ Returns a file object for debian/changelog or None if no such file exists.
+ '''
+ changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
+ try:
+ return open(changelog_name)
+ except IOError:
+ return None
+
+ def get_all_filenames(self):
+ '''
+ Returns an iterator over all filenames. The filenames will be relative
+ to the root directory.
+ '''
+ skip = len(self.root_directory) + 1
+ for root, _, files in os.walk(self.root_directory):
+ for name in files:
+ yield os.path.join(root[skip:], name)
+
+ def cleanup(self):
+ '''
+ Removes all temporary files.
+ '''
+ if self.root_directory is None:
+ return
+ parent_directory = os.path.dirname(self.root_directory)
+ rmtree(parent_directory)
+ self.root_directory = None
+
+ def __del__(self):
+ '''
+ Enforce cleanup.
+ '''
+ self.cleanup()
+
+
+class SourceContentsScanner(object):
+ '''
+ SourceContentsScanner provides a method scan() to scan the contents of a
+ DBSource object.
+ '''
+ def __init__(self, source_id):
+ '''
+ The argument source_id is the id of the DBSource object that
+ should be scanned.
+ '''
+ self.source_id = source_id
+
+ def scan(self):
+ '''
+ This method does the actual scan and fills in the associated SrcContents
+ property. It commits any changes to the database.
+ '''
+ session = DBConn().session()
+ source = session.query(DBSource).get(self.source_id)
+ fileset = set(source.scan_contents())
+ for filename in fileset:
+ source.contents.append(SrcContents(file = filename))
+ session.commit()
+ session.close()
+
+ @classmethod
+ def scan_all(class_, limit = None):
+ '''
+ The class method scan_all() scans all source using multiple processes.
+ The number of sources to be scanned can be limited with the limit
+ argument. Returns the number of processed and remaining packages as a
+ dict.
+ '''
+ session = DBConn().session()
+ query = session.query(DBSource).filter(DBSource.contents == None)
+ remaining = query.count
+ if limit is not None:
+ query = query.limit(limit)
+ processed = query.count()
+ pool = Pool()
+ for source in query.yield_per(100):
+ pool.apply_async(source_scan_helper, (source.source_id, ))
+ pool.close()
+ pool.join()
remaining = remaining()
session.close()
return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(source_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ try:
+ scanner = SourceContentsScanner(source_id)
+ scanner.scan()
+ except Exception, e:
+ print e
+