processed = query.count()
pool = Pool()
for binary in query.yield_per(100):
- pool.apply_async(scan_helper, (binary.binary_id, ))
+ pool.apply_async(binary_scan_helper, (binary.binary_id, ))
pool.close()
pool.join()
remaining = remaining()
session.close()
return { 'processed': processed, 'remaining': remaining }
-def scan_helper(binary_id):
+def binary_scan_helper(binary_id):
'''
This function runs in a subprocess.
'''
Enforce cleanup.
'''
self.cleanup()
+
+
+class SourceContentsScanner(object):
+ '''
+ SourceContentsScanner provides a method scan() to scan the contents of a
+ DBSource object.
+ '''
+ def __init__(self, source_id):
+ '''
+ The argument source_id is the id of the DBSource object that
+ should be scanned.
+ '''
+ self.source_id = source_id
+
+ def scan(self):
+ '''
+ This method does the actual scan and fills in the associated SrcContents
+ property. It commits any changes to the database.
+ '''
+ session = DBConn().session()
+ source = session.query(DBSource).get(self.source_id)
+ fileset = set(source.scan_contents())
+ for filename in fileset:
+ source.contents.append(SrcContents(file = filename))
+ session.commit()
+ session.close()
+
+ @classmethod
+ def scan_all(class_, limit = None):
+ '''
+ The class method scan_all() scans all source using multiple processes.
+ The number of sources to be scanned can be limited with the limit
+ argument. Returns the number of processed and remaining packages as a
+ dict.
+ '''
+ session = DBConn().session()
+ query = session.query(DBSource).filter(DBSource.contents == None)
+ remaining = query.count
+ if limit is not None:
+ query = query.limit(limit)
+ processed = query.count()
+ pool = Pool()
+ for source in query.yield_per(100):
+ pool.apply_async(source_scan_helper, (source.source_id, ))
+ pool.close()
+ pool.join()
+ remaining = remaining()
+ session.close()
+ return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(binary_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ scanner = SourceContentsScanner(source_id)
+ scanner.scan()
+
################################################################################
+class SrcContents(ORMObject):
+ def __init__(self, file = None, source = None):
+ self.file = file
+ self.source = source
+
+ def properties(self):
+ return ['file', 'source']
+
+__all__.append('SrcContents')
+
+################################################################################
+
class DBSource(ORMObject):
def __init__(self, source = None, version = None, maintainer = None, \
changedby = None, poolfile = None, install_date = None):
metadata = association_proxy('key', 'value')
+ def scan_contents(self):
+ '''
+ Returns a set of names for non directories. The path names are
+ normalized after converting them from either utf-8 or iso8859-1
+ encoding.
+ '''
+ fullpath = self.poolfile.fullpath
+ from daklib.contents import UnpackedSource
+ unpacked = UnpackedSource(fullpath)
+ fileset = set()
+ for name in unpacked.get_all_filenames():
+ # enforce proper utf-8 encoding
+ try:
+ name.decode('utf-8')
+ except UnicodeDecodeError:
+ name = name.decode('iso8859-1').encode('utf-8')
+ fileset.add(name)
+ return fileset
+
__all__.append('DBSource')
@session_wrapper
'source_acl',
'source_metadata',
'src_associations',
+ 'src_contents',
'src_format',
'src_uploaders',
'suite',
backref=backref('contents', lazy='dynamic', cascade='all')),
file = self.tbl_bin_contents.c.file))
+ mapper(SrcContents, self.tbl_src_contents,
+ properties = dict(
+ source = relation(DBSource,
+ backref=backref('contents', lazy='dynamic', cascade='all')),
+ file = self.tbl_src_contents.c.file))
+
mapper(MetadataKey, self.tbl_metadata_keys,
properties = dict(
key_id = self.tbl_metadata_keys.c.key_id,
from db_test import DBDakTestCase, fixture
from daklib.dbconn import *
-from daklib.contents import ContentsWriter, BinaryContentsScanner, UnpackedSource
+from daklib.contents import ContentsWriter, BinaryContentsScanner, \
+ UnpackedSource, SourceContentsScanner
from os.path import normpath
from sqlalchemy.exc import FlushError, IntegrityError
self.session.delete(self.binary['hello_2.2-1_i386'])
self.session.commit()
- def test_scan_contents(self):
+ def test_binary_scan_contents(self):
+ '''
+ Tests the BinaryContentsScanner.
+ '''
self.setup_binaries()
filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
def test_unpack(self):
'''
- Tests the UnpackedSource class.
+ Tests the UnpackedSource class and the SourceContentsScanner.
'''
- self.setup_poolfiles()
- dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename)
+ self.setup_sources()
+ source = self.source['hello_2.2-1']
+ dscfilename = fixture('ftp/pool/' + source.poolfile.filename)
unpacked = UnpackedSource(dscfilename)
self.assertTrue(len(unpacked.get_root_directory()) > 0)
self.assertEqual('hello (2.2-1) unstable; urgency=low\n',
all_filenames = set(unpacked.get_all_filenames())
self.assertEqual(8, len(all_filenames))
self.assertTrue('debian/rules' in all_filenames)
+ # method scan_contents()
+ self.assertEqual(all_filenames, source.scan_contents())
+ # exception with invalid files
self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname'))
+ # SourceContentsScanner
+ self.session.commit()
+ self.assertTrue(source.contents.count() == 0)
+ SourceContentsScanner(source.source_id).scan()
+ self.assertTrue(source.contents.count() > 0)
def classes_to_clean(self):
return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \