From: Torsten Werner Date: Thu, 24 Mar 2011 08:36:52 +0000 (+0100) Subject: Merge branch 'contents' into pkgsrc X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=3342185a1c804f3fe03822c21bad57ce9db009ed;hp=12440bbf9173956e70800c86e6e773f067f7e2ab;p=dak.git Merge branch 'contents' into pkgsrc --- diff --git a/config/debian/cron.unchecked b/config/debian/cron.unchecked index 7b81e511..10bd930c 100755 --- a/config/debian/cron.unchecked +++ b/config/debian/cron.unchecked @@ -106,5 +106,5 @@ if [ ! -z "$changes" ]; then do_dists fi -dak contents -l 10000 scan +dak contents -l 10000 binary-scan pg_timestamp postunchecked diff --git a/dak/contents.py b/dak/contents.py index d763f869..ee904b2a 100755 --- a/dak/contents.py +++ b/dak/contents.py @@ -40,7 +40,8 @@ import apt_pkg from daklib.config import Config from daklib.dbconn import * -from daklib.contents import ContentsScanner, ContentsWriter +from daklib.contents import BinaryContentsScanner, ContentsWriter, \ + SourceContentsScanner from daklib import daklog from daklib import utils @@ -53,8 +54,13 @@ SUBCOMMANDS generate generate Contents-$arch.gz files - scan - scan the debs in the existing pool and load contents into the bin_contents table + scan-source + scan the source packages in the existing pool and load contents into + the src_contents table + + scan-binary + scan the (u)debs in the existing pool and load contents into the + bin_contents table OPTIONS -h, --help @@ -67,7 +73,7 @@ OPTIONS for generate -f, --force write Contents files for suites marked as untouchable, too -OPTIONS for scan +OPTIONS for scan-source and scan-binary -l, --limit=NUMBER maximum number of packages to scan """ @@ -82,9 +88,19 @@ def write_all(cnf, suite_names = [], force = None): ################################################################################ -def scan_all(cnf, limit): - Logger = daklog.Logger(cnf.Cnf, 'contents scan') - result = ContentsScanner.scan_all(limit) +def binary_scan_all(cnf, limit): + Logger = daklog.Logger(cnf.Cnf, 'contents scan-binary') + result = BinaryContentsScanner.scan_all(limit) + processed = '%(processed)d packages processed' % result + remaining = '%(remaining)d packages remaining' % result + Logger.log([processed, remaining]) + Logger.close() + +################################################################################ + +def source_scan_all(cnf, limit): + Logger = daklog.Logger(cnf.Cnf, 'contents scan-source') + result = SourceContentsScanner.scan_all(limit) processed = '%(processed)d packages processed' % result remaining = '%(remaining)d packages remaining' % result Logger.log([processed, remaining]) @@ -113,8 +129,12 @@ def main(): if len(options['Limit']) > 0: limit = int(options['Limit']) - if args[0] == 'scan': - scan_all(cnf, limit) + if args[0] == 'scan-source': + source_scan_all(cnf, limit) + return + + if args[0] == 'scan-binary': + binary_scan_all(cnf, limit) return suite_names = utils.split_args(options['Suite']) diff --git a/daklib/contents.py b/daklib/contents.py index a158e8fc..2a29b2e5 100755 --- a/daklib/contents.py +++ b/daklib/contents.py @@ -190,12 +190,8 @@ select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package gzip.stdin.close() output_file.close() gzip.wait() - try: - os.remove(final_filename) - except: - pass + os.chmod(temp_filename, 0664) os.rename(temp_filename, final_filename) - os.chmod(final_filename, 0664) @classmethod def log_result(class_, result): @@ -258,10 +254,10 @@ def generate_helper(suite_id, arch_id, overridetype_id, component_id = None): return log_message -class ContentsScanner(object): +class BinaryContentsScanner(object): ''' - ContentsScanner provides a threadsafe method scan() to scan the contents of - a DBBinary object. + BinaryContentsScanner provides a threadsafe method scan() to scan the + contents of a DBBinary object. ''' def __init__(self, binary_id): ''' @@ -302,18 +298,18 @@ class ContentsScanner(object): processed = query.count() pool = Pool() for binary in query.yield_per(100): - pool.apply_async(scan_helper, (binary.binary_id, )) + pool.apply_async(binary_scan_helper, (binary.binary_id, )) pool.close() pool.join() remaining = remaining() session.close() return { 'processed': processed, 'remaining': remaining } -def scan_helper(binary_id): +def binary_scan_helper(binary_id): ''' This function runs in a subprocess. ''' - scanner = ContentsScanner(binary_id) + scanner = BinaryContentsScanner(binary_id) scanner.scan() @@ -376,3 +372,63 @@ class UnpackedSource(object): Enforce cleanup. ''' self.cleanup() + + +class SourceContentsScanner(object): + ''' + SourceContentsScanner provides a method scan() to scan the contents of a + DBSource object. + ''' + def __init__(self, source_id): + ''' + The argument source_id is the id of the DBSource object that + should be scanned. + ''' + self.source_id = source_id + + def scan(self): + ''' + This method does the actual scan and fills in the associated SrcContents + property. It commits any changes to the database. + ''' + session = DBConn().session() + source = session.query(DBSource).get(self.source_id) + fileset = set(source.scan_contents()) + for filename in fileset: + source.contents.append(SrcContents(file = filename)) + session.commit() + session.close() + + @classmethod + def scan_all(class_, limit = None): + ''' + The class method scan_all() scans all source using multiple processes. + The number of sources to be scanned can be limited with the limit + argument. Returns the number of processed and remaining packages as a + dict. + ''' + session = DBConn().session() + query = session.query(DBSource).filter(DBSource.contents == None) + remaining = query.count + if limit is not None: + query = query.limit(limit) + processed = query.count() + pool = Pool() + for source in query.yield_per(100): + pool.apply_async(source_scan_helper, (source.source_id, )) + pool.close() + pool.join() + remaining = remaining() + session.close() + return { 'processed': processed, 'remaining': remaining } + +def source_scan_helper(source_id): + ''' + This function runs in a subprocess. + ''' + try: + scanner = SourceContentsScanner(source_id) + scanner.scan() + except Exception, e: + print e + diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 6317b584..d4caf01c 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -2195,6 +2195,18 @@ __all__.append('get_sections') ################################################################################ +class SrcContents(ORMObject): + def __init__(self, file = None, source = None): + self.file = file + self.source = source + + def properties(self): + return ['file', 'source'] + +__all__.append('SrcContents') + +################################################################################ + from debian.debfile import Deb822 # Temporary Deb822 subclass to fix bugs with : handling; see #597249 @@ -2285,6 +2297,25 @@ class DBSource(ORMObject): metadata = association_proxy('key', 'value') + def scan_contents(self): + ''' + Returns a set of names for non directories. The path names are + normalized after converting them from either utf-8 or iso8859-1 + encoding. + ''' + fullpath = self.poolfile.fullpath + from daklib.contents import UnpackedSource + unpacked = UnpackedSource(fullpath) + fileset = set() + for name in unpacked.get_all_filenames(): + # enforce proper utf-8 encoding + try: + name.decode('utf-8') + except UnicodeDecodeError: + name = name.decode('iso8859-1').encode('utf-8') + fileset.add(name) + return fileset + __all__.append('DBSource') @session_wrapper @@ -3077,6 +3108,7 @@ class DBConn(object): 'source_acl', 'source_metadata', 'src_associations', + 'src_contents', 'src_format', 'src_uploaders', 'suite', @@ -3380,6 +3412,12 @@ class DBConn(object): backref=backref('contents', lazy='dynamic', cascade='all')), file = self.tbl_bin_contents.c.file)) + mapper(SrcContents, self.tbl_src_contents, + properties = dict( + source = relation(DBSource, + backref=backref('contents', lazy='dynamic', cascade='all')), + file = self.tbl_src_contents.c.file)) + mapper(MetadataKey, self.tbl_metadata_keys, properties = dict( key_id = self.tbl_metadata_keys.c.key_id, diff --git a/tests/dbtest_contents.py b/tests/dbtest_contents.py index 90fe4966..e3128161 100755 --- a/tests/dbtest_contents.py +++ b/tests/dbtest_contents.py @@ -3,7 +3,8 @@ from db_test import DBDakTestCase, fixture from daklib.dbconn import * -from daklib.contents import ContentsWriter, ContentsScanner, UnpackedSource +from daklib.contents import ContentsWriter, BinaryContentsScanner, \ + UnpackedSource, SourceContentsScanner from os.path import normpath from sqlalchemy.exc import FlushError, IntegrityError @@ -161,13 +162,16 @@ class ContentsTestCase(DBDakTestCase): self.session.delete(self.binary['hello_2.2-1_i386']) self.session.commit() - def test_scan_contents(self): + def test_binary_scan_contents(self): + ''' + Tests the BinaryContentsScanner. + ''' self.setup_binaries() filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()] self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'], filelist) self.session.commit() - ContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan() + BinaryContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan() bin_contents_list = self.binary['hello_2.2-1_i386'].contents.order_by('file').all() self.assertEqual(2, len(bin_contents_list)) self.assertEqual('usr/bin/hello', bin_contents_list[0].file) @@ -175,10 +179,11 @@ class ContentsTestCase(DBDakTestCase): def test_unpack(self): ''' - Tests the UnpackedSource class. + Tests the UnpackedSource class and the SourceContentsScanner. ''' - self.setup_poolfiles() - dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename) + self.setup_sources() + source = self.source['hello_2.2-1'] + dscfilename = fixture('ftp/pool/' + source.poolfile.filename) unpacked = UnpackedSource(dscfilename) self.assertTrue(len(unpacked.get_root_directory()) > 0) self.assertEqual('hello (2.2-1) unstable; urgency=low\n', @@ -186,7 +191,15 @@ class ContentsTestCase(DBDakTestCase): all_filenames = set(unpacked.get_all_filenames()) self.assertEqual(8, len(all_filenames)) self.assertTrue('debian/rules' in all_filenames) + # method scan_contents() + self.assertEqual(all_filenames, source.scan_contents()) + # exception with invalid files self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname')) + # SourceContentsScanner + self.session.commit() + self.assertTrue(source.contents.count() == 0) + SourceContentsScanner(source.source_id).scan() + self.assertTrue(source.contents.count() > 0) def classes_to_clean(self): return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \