]> git.decadent.org.uk Git - dak.git/commitdiff
Add class SourceContentsScanner.
authorTorsten Werner <twerner@debian.org>
Wed, 23 Mar 2011 19:03:33 +0000 (20:03 +0100)
committerTorsten Werner <twerner@debian.org>
Wed, 23 Mar 2011 19:03:33 +0000 (20:03 +0100)
Signed-off-by: Torsten Werner <twerner@debian.org>
daklib/contents.py
daklib/dbconn.py
tests/dbtest_contents.py

index df571e10ac962dd1114008bc19baae4fe0ae704c..dffb425e3cc2e5f8c5245de9e209fdaaa7a1f454 100755 (executable)
@@ -302,14 +302,14 @@ class BinaryContentsScanner(object):
         processed = query.count()
         pool = Pool()
         for binary in query.yield_per(100):
-            pool.apply_async(scan_helper, (binary.binary_id, ))
+            pool.apply_async(binary_scan_helper, (binary.binary_id, ))
         pool.close()
         pool.join()
         remaining = remaining()
         session.close()
         return { 'processed': processed, 'remaining': remaining }
 
-def scan_helper(binary_id):
+def binary_scan_helper(binary_id):
     '''
     This function runs in a subprocess.
     '''
@@ -376,3 +376,60 @@ class UnpackedSource(object):
         Enforce cleanup.
         '''
         self.cleanup()
+
+
+class SourceContentsScanner(object):
+    '''
+    SourceContentsScanner provides a method scan() to scan the contents of a
+    DBSource object.
+    '''
+    def __init__(self, source_id):
+        '''
+        The argument source_id is the id of the DBSource object that
+        should be scanned.
+        '''
+        self.source_id = source_id
+
+    def scan(self):
+        '''
+        This method does the actual scan and fills in the associated SrcContents
+        property. It commits any changes to the database.
+        '''
+        session = DBConn().session()
+        source = session.query(DBSource).get(self.source_id)
+        fileset = set(source.scan_contents())
+        for filename in fileset:
+            source.contents.append(SrcContents(file = filename))
+        session.commit()
+        session.close()
+
+    @classmethod
+    def scan_all(class_, limit = None):
+        '''
+        The class method scan_all() scans all source using multiple processes.
+        The number of sources to be scanned can be limited with the limit
+        argument. Returns the number of processed and remaining packages as a
+        dict.
+        '''
+        session = DBConn().session()
+        query = session.query(DBSource).filter(DBSource.contents == None)
+        remaining = query.count
+        if limit is not None:
+            query = query.limit(limit)
+        processed = query.count()
+        pool = Pool()
+        for source in query.yield_per(100):
+            pool.apply_async(source_scan_helper, (source.source_id, ))
+        pool.close()
+        pool.join()
+        remaining = remaining()
+        session.close()
+        return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(binary_id):
+    '''
+    This function runs in a subprocess.
+    '''
+    scanner = SourceContentsScanner(source_id)
+    scanner.scan()
+
index 6782c081145944e57c22a1bf718872e00c7106c9..014f1082e4b0216f7a74b55c261ff89e95156e6b 100755 (executable)
@@ -2157,6 +2157,18 @@ __all__.append('get_sections')
 
 ################################################################################
 
+class SrcContents(ORMObject):
+    def __init__(self, file = None, source = None):
+        self.file = file
+        self.source = source
+
+    def properties(self):
+        return ['file', 'source']
+
+__all__.append('SrcContents')
+
+################################################################################
+
 class DBSource(ORMObject):
     def __init__(self, source = None, version = None, maintainer = None, \
         changedby = None, poolfile = None, install_date = None):
@@ -2178,6 +2190,25 @@ class DBSource(ORMObject):
 
     metadata = association_proxy('key', 'value')
 
+    def scan_contents(self):
+        '''
+        Returns a set of names for non directories. The path names are
+        normalized after converting them from either utf-8 or iso8859-1
+        encoding.
+        '''
+        fullpath = self.poolfile.fullpath
+        from daklib.contents import UnpackedSource
+        unpacked = UnpackedSource(fullpath)
+        fileset = set()
+        for name in unpacked.get_all_filenames():
+            # enforce proper utf-8 encoding
+            try:
+                name.decode('utf-8')
+            except UnicodeDecodeError:
+                name = name.decode('iso8859-1').encode('utf-8')
+            fileset.add(name)
+        return fileset
+
 __all__.append('DBSource')
 
 @session_wrapper
@@ -2910,6 +2941,7 @@ class DBConn(object):
             'source_acl',
             'source_metadata',
             'src_associations',
+            'src_contents',
             'src_format',
             'src_uploaders',
             'suite',
@@ -3213,6 +3245,12 @@ class DBConn(object):
                     backref=backref('contents', lazy='dynamic', cascade='all')),
                 file = self.tbl_bin_contents.c.file))
 
+        mapper(SrcContents, self.tbl_src_contents,
+            properties = dict(
+                source = relation(DBSource,
+                    backref=backref('contents', lazy='dynamic', cascade='all')),
+                file = self.tbl_src_contents.c.file))
+
         mapper(MetadataKey, self.tbl_metadata_keys,
             properties = dict(
                 key_id = self.tbl_metadata_keys.c.key_id,
index 957307ba4d1bbd9e6b5408426aabbd5bb45716b7..e3128161780893ea6188165a9f91a7e5cfaa768f 100755 (executable)
@@ -3,7 +3,8 @@
 from db_test import DBDakTestCase, fixture
 
 from daklib.dbconn import *
-from daklib.contents import ContentsWriter, BinaryContentsScanner, UnpackedSource
+from daklib.contents import ContentsWriter, BinaryContentsScanner, \
+    UnpackedSource, SourceContentsScanner
 
 from os.path import normpath
 from sqlalchemy.exc import FlushError, IntegrityError
@@ -161,7 +162,10 @@ class ContentsTestCase(DBDakTestCase):
         self.session.delete(self.binary['hello_2.2-1_i386'])
         self.session.commit()
 
-    def test_scan_contents(self):
+    def test_binary_scan_contents(self):
+        '''
+        Tests the BinaryContentsScanner.
+        '''
         self.setup_binaries()
         filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
         self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
@@ -175,10 +179,11 @@ class ContentsTestCase(DBDakTestCase):
 
     def test_unpack(self):
         '''
-        Tests the UnpackedSource class.
+        Tests the UnpackedSource class and the SourceContentsScanner.
         '''
-        self.setup_poolfiles()
-        dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename)
+        self.setup_sources()
+        source = self.source['hello_2.2-1']
+        dscfilename = fixture('ftp/pool/' + source.poolfile.filename)
         unpacked = UnpackedSource(dscfilename)
         self.assertTrue(len(unpacked.get_root_directory()) > 0)
         self.assertEqual('hello (2.2-1) unstable; urgency=low\n',
@@ -186,7 +191,15 @@ class ContentsTestCase(DBDakTestCase):
         all_filenames = set(unpacked.get_all_filenames())
         self.assertEqual(8, len(all_filenames))
         self.assertTrue('debian/rules' in all_filenames)
+        # method scan_contents()
+        self.assertEqual(all_filenames, source.scan_contents())
+        # exception with invalid files
         self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname'))
+        # SourceContentsScanner
+        self.session.commit()
+        self.assertTrue(source.contents.count() == 0)
+        SourceContentsScanner(source.source_id).scan()
+        self.assertTrue(source.contents.count() > 0)
 
     def classes_to_clean(self):
         return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \