do_dists
fi
-dak contents -l 10000 scan
+dak contents -l 10000 binary-scan
pg_timestamp postunchecked
from daklib.config import Config
from daklib.dbconn import *
-from daklib.contents import ContentsScanner, ContentsWriter
+from daklib.contents import BinaryContentsScanner, ContentsWriter, \
+ SourceContentsScanner
from daklib import daklog
from daklib import utils
generate
generate Contents-$arch.gz files
- scan
- scan the debs in the existing pool and load contents into the bin_contents table
+ scan-source
+ scan the source packages in the existing pool and load contents into
+ the src_contents table
+
+ scan-binary
+ scan the (u)debs in the existing pool and load contents into the
+ bin_contents table
OPTIONS
-h, --help
-f, --force
write Contents files for suites marked as untouchable, too
-OPTIONS for scan
+OPTIONS for scan-source and scan-binary
-l, --limit=NUMBER
maximum number of packages to scan
"""
################################################################################
-def scan_all(cnf, limit):
- Logger = daklog.Logger(cnf.Cnf, 'contents scan')
- result = ContentsScanner.scan_all(limit)
+def binary_scan_all(cnf, limit):
+ Logger = daklog.Logger(cnf.Cnf, 'contents scan-binary')
+ result = BinaryContentsScanner.scan_all(limit)
+ processed = '%(processed)d packages processed' % result
+ remaining = '%(remaining)d packages remaining' % result
+ Logger.log([processed, remaining])
+ Logger.close()
+
+################################################################################
+
+def source_scan_all(cnf, limit):
+ Logger = daklog.Logger(cnf.Cnf, 'contents scan-source')
+ result = SourceContentsScanner.scan_all(limit)
processed = '%(processed)d packages processed' % result
remaining = '%(remaining)d packages remaining' % result
Logger.log([processed, remaining])
if len(options['Limit']) > 0:
limit = int(options['Limit'])
- if args[0] == 'scan':
- scan_all(cnf, limit)
+ if args[0] == 'scan-source':
+ source_scan_all(cnf, limit)
+ return
+
+ if args[0] == 'scan-binary':
+ binary_scan_all(cnf, limit)
return
suite_names = utils.split_args(options['Suite'])
gzip.stdin.close()
output_file.close()
gzip.wait()
- try:
- os.remove(final_filename)
- except:
- pass
+ os.chmod(temp_filename, 0664)
os.rename(temp_filename, final_filename)
- os.chmod(final_filename, 0664)
@classmethod
def log_result(class_, result):
return log_message
-class ContentsScanner(object):
+class BinaryContentsScanner(object):
'''
- ContentsScanner provides a threadsafe method scan() to scan the contents of
- a DBBinary object.
+ BinaryContentsScanner provides a threadsafe method scan() to scan the
+ contents of a DBBinary object.
'''
def __init__(self, binary_id):
'''
processed = query.count()
pool = Pool()
for binary in query.yield_per(100):
- pool.apply_async(scan_helper, (binary.binary_id, ))
+ pool.apply_async(binary_scan_helper, (binary.binary_id, ))
pool.close()
pool.join()
remaining = remaining()
session.close()
return { 'processed': processed, 'remaining': remaining }
-def scan_helper(binary_id):
+def binary_scan_helper(binary_id):
'''
This function runs in a subprocess.
'''
- scanner = ContentsScanner(binary_id)
+ scanner = BinaryContentsScanner(binary_id)
scanner.scan()
Enforce cleanup.
'''
self.cleanup()
+
+
+class SourceContentsScanner(object):
+ '''
+ SourceContentsScanner provides a method scan() to scan the contents of a
+ DBSource object.
+ '''
+ def __init__(self, source_id):
+ '''
+ The argument source_id is the id of the DBSource object that
+ should be scanned.
+ '''
+ self.source_id = source_id
+
+ def scan(self):
+ '''
+ This method does the actual scan and fills in the associated SrcContents
+ property. It commits any changes to the database.
+ '''
+ session = DBConn().session()
+ source = session.query(DBSource).get(self.source_id)
+ fileset = set(source.scan_contents())
+ for filename in fileset:
+ source.contents.append(SrcContents(file = filename))
+ session.commit()
+ session.close()
+
+ @classmethod
+ def scan_all(class_, limit = None):
+ '''
+ The class method scan_all() scans all source using multiple processes.
+ The number of sources to be scanned can be limited with the limit
+ argument. Returns the number of processed and remaining packages as a
+ dict.
+ '''
+ session = DBConn().session()
+ query = session.query(DBSource).filter(DBSource.contents == None)
+ remaining = query.count
+ if limit is not None:
+ query = query.limit(limit)
+ processed = query.count()
+ pool = Pool()
+ for source in query.yield_per(100):
+ pool.apply_async(source_scan_helper, (source.source_id, ))
+ pool.close()
+ pool.join()
+ remaining = remaining()
+ session.close()
+ return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(source_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ try:
+ scanner = SourceContentsScanner(source_id)
+ scanner.scan()
+ except Exception, e:
+ print e
+
################################################################################
+class SrcContents(ORMObject):
+ def __init__(self, file = None, source = None):
+ self.file = file
+ self.source = source
+
+ def properties(self):
+ return ['file', 'source']
+
+__all__.append('SrcContents')
+
+################################################################################
+
from debian.debfile import Deb822
# Temporary Deb822 subclass to fix bugs with : handling; see #597249
metadata = association_proxy('key', 'value')
+ def scan_contents(self):
+ '''
+ Returns a set of names for non directories. The path names are
+ normalized after converting them from either utf-8 or iso8859-1
+ encoding.
+ '''
+ fullpath = self.poolfile.fullpath
+ from daklib.contents import UnpackedSource
+ unpacked = UnpackedSource(fullpath)
+ fileset = set()
+ for name in unpacked.get_all_filenames():
+ # enforce proper utf-8 encoding
+ try:
+ name.decode('utf-8')
+ except UnicodeDecodeError:
+ name = name.decode('iso8859-1').encode('utf-8')
+ fileset.add(name)
+ return fileset
+
__all__.append('DBSource')
@session_wrapper
'source_acl',
'source_metadata',
'src_associations',
+ 'src_contents',
'src_format',
'src_uploaders',
'suite',
backref=backref('contents', lazy='dynamic', cascade='all')),
file = self.tbl_bin_contents.c.file))
+ mapper(SrcContents, self.tbl_src_contents,
+ properties = dict(
+ source = relation(DBSource,
+ backref=backref('contents', lazy='dynamic', cascade='all')),
+ file = self.tbl_src_contents.c.file))
+
mapper(MetadataKey, self.tbl_metadata_keys,
properties = dict(
key_id = self.tbl_metadata_keys.c.key_id,
from db_test import DBDakTestCase, fixture
from daklib.dbconn import *
-from daklib.contents import ContentsWriter, ContentsScanner, UnpackedSource
+from daklib.contents import ContentsWriter, BinaryContentsScanner, \
+ UnpackedSource, SourceContentsScanner
from os.path import normpath
from sqlalchemy.exc import FlushError, IntegrityError
self.session.delete(self.binary['hello_2.2-1_i386'])
self.session.commit()
- def test_scan_contents(self):
+ def test_binary_scan_contents(self):
+ '''
+ Tests the BinaryContentsScanner.
+ '''
self.setup_binaries()
filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
filelist)
self.session.commit()
- ContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
+ BinaryContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
bin_contents_list = self.binary['hello_2.2-1_i386'].contents.order_by('file').all()
self.assertEqual(2, len(bin_contents_list))
self.assertEqual('usr/bin/hello', bin_contents_list[0].file)
def test_unpack(self):
'''
- Tests the UnpackedSource class.
+ Tests the UnpackedSource class and the SourceContentsScanner.
'''
- self.setup_poolfiles()
- dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename)
+ self.setup_sources()
+ source = self.source['hello_2.2-1']
+ dscfilename = fixture('ftp/pool/' + source.poolfile.filename)
unpacked = UnpackedSource(dscfilename)
self.assertTrue(len(unpacked.get_root_directory()) > 0)
self.assertEqual('hello (2.2-1) unstable; urgency=low\n',
all_filenames = set(unpacked.get_all_filenames())
self.assertEqual(8, len(all_filenames))
self.assertTrue('debian/rules' in all_filenames)
+ # method scan_contents()
+ self.assertEqual(all_filenames, source.scan_contents())
+ # exception with invalid files
self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname'))
+ # SourceContentsScanner
+ self.session.commit()
+ self.assertTrue(source.contents.count() == 0)
+ SourceContentsScanner(source.source_id).scan()
+ self.assertTrue(source.contents.count() > 0)
def classes_to_clean(self):
return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \