]> git.decadent.org.uk Git - dak.git/commitdiff
Merge branch 'master' into contents
authorTorsten Werner <twerner@debian.org>
Thu, 24 Mar 2011 08:35:23 +0000 (09:35 +0100)
committerTorsten Werner <twerner@debian.org>
Thu, 24 Mar 2011 08:35:23 +0000 (09:35 +0100)
Conflicts:
daklib/dbconn.py

config/debian/cron.unchecked
dak/contents.py
daklib/contents.py
daklib/dbconn.py
tests/dbtest_contents.py

index 7b81e511e2878843ae34ef733ace8ca24463b4b9..10bd930c81add3e7aab0ea43f0b8db3467398cdc 100755 (executable)
@@ -106,5 +106,5 @@ if [ ! -z "$changes" ]; then
     do_dists
 fi
 
-dak contents -l 10000 scan
+dak contents -l 10000 binary-scan
 pg_timestamp postunchecked
index d763f869ab094bc2feb1aee15c02872756e11478..ee904b2a2467e4f0d100e03e5dca3396e3587b14 100755 (executable)
@@ -40,7 +40,8 @@ import apt_pkg
 
 from daklib.config import Config
 from daklib.dbconn import *
-from daklib.contents import ContentsScanner, ContentsWriter
+from daklib.contents import BinaryContentsScanner, ContentsWriter, \
+    SourceContentsScanner
 from daklib import daklog
 from daklib import utils
 
@@ -53,8 +54,13 @@ SUBCOMMANDS
     generate
         generate Contents-$arch.gz files
 
-    scan
-        scan the debs in the existing pool and load contents into the bin_contents table
+    scan-source
+        scan the source packages in the existing pool and load contents into
+        the src_contents table
+
+    scan-binary
+        scan the (u)debs in the existing pool and load contents into the
+        bin_contents table
 
 OPTIONS
      -h, --help
@@ -67,7 +73,7 @@ OPTIONS for generate
      -f, --force
         write Contents files for suites marked as untouchable, too
 
-OPTIONS for scan
+OPTIONS for scan-source and scan-binary
      -l, --limit=NUMBER
         maximum number of packages to scan
 """
@@ -82,9 +88,19 @@ def write_all(cnf, suite_names = [], force = None):
 
 ################################################################################
 
-def scan_all(cnf, limit):
-    Logger = daklog.Logger(cnf.Cnf, 'contents scan')
-    result = ContentsScanner.scan_all(limit)
+def binary_scan_all(cnf, limit):
+    Logger = daklog.Logger(cnf.Cnf, 'contents scan-binary')
+    result = BinaryContentsScanner.scan_all(limit)
+    processed = '%(processed)d packages processed' % result
+    remaining = '%(remaining)d packages remaining' % result
+    Logger.log([processed, remaining])
+    Logger.close()
+
+################################################################################
+
+def source_scan_all(cnf, limit):
+    Logger = daklog.Logger(cnf.Cnf, 'contents scan-source')
+    result = SourceContentsScanner.scan_all(limit)
     processed = '%(processed)d packages processed' % result
     remaining = '%(remaining)d packages remaining' % result
     Logger.log([processed, remaining])
@@ -113,8 +129,12 @@ def main():
     if len(options['Limit']) > 0:
         limit = int(options['Limit'])
 
-    if args[0] == 'scan':
-        scan_all(cnf, limit)
+    if args[0] == 'scan-source':
+        source_scan_all(cnf, limit)
+        return
+
+    if args[0] == 'scan-binary':
+        binary_scan_all(cnf, limit)
         return
 
     suite_names = utils.split_args(options['Suite'])
index a158e8fcd4bb7d2ed27e8547b79b03d1cbd84b8a..2a29b2e55b5080eadc574e559edf38a22ae1615a 100755 (executable)
@@ -190,12 +190,8 @@ select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package
         gzip.stdin.close()
         output_file.close()
         gzip.wait()
-        try:
-            os.remove(final_filename)
-        except:
-            pass
+        os.chmod(temp_filename, 0664)
         os.rename(temp_filename, final_filename)
-        os.chmod(final_filename, 0664)
 
     @classmethod
     def log_result(class_, result):
@@ -258,10 +254,10 @@ def generate_helper(suite_id, arch_id, overridetype_id, component_id = None):
     return log_message
 
 
-class ContentsScanner(object):
+class BinaryContentsScanner(object):
     '''
-    ContentsScanner provides a threadsafe method scan() to scan the contents of
-    a DBBinary object.
+    BinaryContentsScanner provides a threadsafe method scan() to scan the
+    contents of a DBBinary object.
     '''
     def __init__(self, binary_id):
         '''
@@ -302,18 +298,18 @@ class ContentsScanner(object):
         processed = query.count()
         pool = Pool()
         for binary in query.yield_per(100):
-            pool.apply_async(scan_helper, (binary.binary_id, ))
+            pool.apply_async(binary_scan_helper, (binary.binary_id, ))
         pool.close()
         pool.join()
         remaining = remaining()
         session.close()
         return { 'processed': processed, 'remaining': remaining }
 
-def scan_helper(binary_id):
+def binary_scan_helper(binary_id):
     '''
     This function runs in a subprocess.
     '''
-    scanner = ContentsScanner(binary_id)
+    scanner = BinaryContentsScanner(binary_id)
     scanner.scan()
 
 
@@ -376,3 +372,63 @@ class UnpackedSource(object):
         Enforce cleanup.
         '''
         self.cleanup()
+
+
+class SourceContentsScanner(object):
+    '''
+    SourceContentsScanner provides a method scan() to scan the contents of a
+    DBSource object.
+    '''
+    def __init__(self, source_id):
+        '''
+        The argument source_id is the id of the DBSource object that
+        should be scanned.
+        '''
+        self.source_id = source_id
+
+    def scan(self):
+        '''
+        This method does the actual scan and fills in the associated SrcContents
+        property. It commits any changes to the database.
+        '''
+        session = DBConn().session()
+        source = session.query(DBSource).get(self.source_id)
+        fileset = set(source.scan_contents())
+        for filename in fileset:
+            source.contents.append(SrcContents(file = filename))
+        session.commit()
+        session.close()
+
+    @classmethod
+    def scan_all(class_, limit = None):
+        '''
+        The class method scan_all() scans all source using multiple processes.
+        The number of sources to be scanned can be limited with the limit
+        argument. Returns the number of processed and remaining packages as a
+        dict.
+        '''
+        session = DBConn().session()
+        query = session.query(DBSource).filter(DBSource.contents == None)
+        remaining = query.count
+        if limit is not None:
+            query = query.limit(limit)
+        processed = query.count()
+        pool = Pool()
+        for source in query.yield_per(100):
+            pool.apply_async(source_scan_helper, (source.source_id, ))
+        pool.close()
+        pool.join()
+        remaining = remaining()
+        session.close()
+        return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(source_id):
+    '''
+    This function runs in a subprocess.
+    '''
+    try:
+        scanner = SourceContentsScanner(source_id)
+        scanner.scan()
+    except Exception, e:
+        print e
+
index 6317b5844467a260865d5c7c67996eda537d7822..d4caf01c9176d60980c13933090950efa477aab9 100755 (executable)
@@ -2195,6 +2195,18 @@ __all__.append('get_sections')
 
 ################################################################################
 
+class SrcContents(ORMObject):
+    def __init__(self, file = None, source = None):
+        self.file = file
+        self.source = source
+
+    def properties(self):
+        return ['file', 'source']
+
+__all__.append('SrcContents')
+
+################################################################################
+
 from debian.debfile import Deb822
 
 # Temporary Deb822 subclass to fix bugs with : handling; see #597249
@@ -2285,6 +2297,25 @@ class DBSource(ORMObject):
 
     metadata = association_proxy('key', 'value')
 
+    def scan_contents(self):
+        '''
+        Returns a set of names for non directories. The path names are
+        normalized after converting them from either utf-8 or iso8859-1
+        encoding.
+        '''
+        fullpath = self.poolfile.fullpath
+        from daklib.contents import UnpackedSource
+        unpacked = UnpackedSource(fullpath)
+        fileset = set()
+        for name in unpacked.get_all_filenames():
+            # enforce proper utf-8 encoding
+            try:
+                name.decode('utf-8')
+            except UnicodeDecodeError:
+                name = name.decode('iso8859-1').encode('utf-8')
+            fileset.add(name)
+        return fileset
+
 __all__.append('DBSource')
 
 @session_wrapper
@@ -3077,6 +3108,7 @@ class DBConn(object):
             'source_acl',
             'source_metadata',
             'src_associations',
+            'src_contents',
             'src_format',
             'src_uploaders',
             'suite',
@@ -3380,6 +3412,12 @@ class DBConn(object):
                     backref=backref('contents', lazy='dynamic', cascade='all')),
                 file = self.tbl_bin_contents.c.file))
 
+        mapper(SrcContents, self.tbl_src_contents,
+            properties = dict(
+                source = relation(DBSource,
+                    backref=backref('contents', lazy='dynamic', cascade='all')),
+                file = self.tbl_src_contents.c.file))
+
         mapper(MetadataKey, self.tbl_metadata_keys,
             properties = dict(
                 key_id = self.tbl_metadata_keys.c.key_id,
index 90fe49666484a6e35ee19ac688fe0a440f7dfc4b..e3128161780893ea6188165a9f91a7e5cfaa768f 100755 (executable)
@@ -3,7 +3,8 @@
 from db_test import DBDakTestCase, fixture
 
 from daklib.dbconn import *
-from daklib.contents import ContentsWriter, ContentsScanner, UnpackedSource
+from daklib.contents import ContentsWriter, BinaryContentsScanner, \
+    UnpackedSource, SourceContentsScanner
 
 from os.path import normpath
 from sqlalchemy.exc import FlushError, IntegrityError
@@ -161,13 +162,16 @@ class ContentsTestCase(DBDakTestCase):
         self.session.delete(self.binary['hello_2.2-1_i386'])
         self.session.commit()
 
-    def test_scan_contents(self):
+    def test_binary_scan_contents(self):
+        '''
+        Tests the BinaryContentsScanner.
+        '''
         self.setup_binaries()
         filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
         self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
             filelist)
         self.session.commit()
-        ContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
+        BinaryContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
         bin_contents_list = self.binary['hello_2.2-1_i386'].contents.order_by('file').all()
         self.assertEqual(2, len(bin_contents_list))
         self.assertEqual('usr/bin/hello', bin_contents_list[0].file)
@@ -175,10 +179,11 @@ class ContentsTestCase(DBDakTestCase):
 
     def test_unpack(self):
         '''
-        Tests the UnpackedSource class.
+        Tests the UnpackedSource class and the SourceContentsScanner.
         '''
-        self.setup_poolfiles()
-        dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename)
+        self.setup_sources()
+        source = self.source['hello_2.2-1']
+        dscfilename = fixture('ftp/pool/' + source.poolfile.filename)
         unpacked = UnpackedSource(dscfilename)
         self.assertTrue(len(unpacked.get_root_directory()) > 0)
         self.assertEqual('hello (2.2-1) unstable; urgency=low\n',
@@ -186,7 +191,15 @@ class ContentsTestCase(DBDakTestCase):
         all_filenames = set(unpacked.get_all_filenames())
         self.assertEqual(8, len(all_filenames))
         self.assertTrue('debian/rules' in all_filenames)
+        # method scan_contents()
+        self.assertEqual(all_filenames, source.scan_contents())
+        # exception with invalid files
         self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname'))
+        # SourceContentsScanner
+        self.session.commit()
+        self.assertTrue(source.contents.count() == 0)
+        SourceContentsScanner(source.source_id).scan()
+        self.assertTrue(source.contents.count() > 0)
 
     def classes_to_clean(self):
         return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \