DB
{
- Name "backports";
- Host "";
- Port -1;
+ Service "backports";
+ // PoolSize should be at least ThreadCount + 1
+ PoolSize 5;
+ // MaxOverflow shouldn't exceed postgresql.conf's max_connections - PoolSize
+ MaxOverflow 13;
+ // should be false for encoding == SQL_ASCII
+ Unicode "false"
};
SuiteMappings
do_dists
fi
-dak contents -l 10000 scan
+dak contents -l 10000 binary-scan
pg_timestamp postunchecked
from daklib.config import Config
from daklib.dbconn import *
-from daklib.contents import ContentsScanner, ContentsWriter
+from daklib.contents import BinaryContentsScanner, ContentsWriter, \
+ SourceContentsScanner
from daklib import daklog
from daklib import utils
generate
generate Contents-$arch.gz files
- scan
- scan the debs in the existing pool and load contents into the bin_contents table
+ scan-source
+ scan the source packages in the existing pool and load contents into
+ the src_contents table
+
+ scan-binary
+ scan the (u)debs in the existing pool and load contents into the
+ bin_contents table
OPTIONS
-h, --help
-f, --force
write Contents files for suites marked as untouchable, too
-OPTIONS for scan
+OPTIONS for scan-source and scan-binary
-l, --limit=NUMBER
maximum number of packages to scan
"""
################################################################################
-def scan_all(cnf, limit):
- Logger = daklog.Logger(cnf.Cnf, 'contents scan')
- result = ContentsScanner.scan_all(limit)
+def binary_scan_all(cnf, limit):
+ Logger = daklog.Logger(cnf.Cnf, 'contents scan-binary')
+ result = BinaryContentsScanner.scan_all(limit)
+ processed = '%(processed)d packages processed' % result
+ remaining = '%(remaining)d packages remaining' % result
+ Logger.log([processed, remaining])
+ Logger.close()
+
+################################################################################
+
+def source_scan_all(cnf, limit):
+ Logger = daklog.Logger(cnf.Cnf, 'contents scan-source')
+ result = SourceContentsScanner.scan_all(limit)
processed = '%(processed)d packages processed' % result
remaining = '%(remaining)d packages remaining' % result
Logger.log([processed, remaining])
if len(options['Limit']) > 0:
limit = int(options['Limit'])
- if args[0] == 'scan':
- scan_all(cnf, limit)
+ if args[0] == 'scan-source':
+ source_scan_all(cnf, limit)
+ return
+
+ if args[0] == 'scan-binary':
+ binary_scan_all(cnf, limit)
return
suite_names = utils.split_args(options['Suite'])
"Generate Packages/Sources files"),
("contents",
"Generate content files"),
+ ("metadata",
+ "Load data for packages/sources files"),
("generate-index-diffs",
"Generate .diff/Index files"),
("clean-suites",
--- /dev/null
+#!/usr/bin/env python
+# coding=utf8
+
+"""
+Permission fixups
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2011 Mark Hymers <mhy@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+import psycopg2
+from daklib.dak_exceptions import DBUpdateError
+from socket import gethostname;
+
+################################################################################
+def do_update(self):
+ """
+ Fix up permissions
+ """
+ print __doc__
+ try:
+ c = self.db.cursor()
+
+ c.execute("GRANT SELECT, UPDATE, INSERT ON binaries_metadata TO ftpmaster")
+ c.execute("GRANT SELECT ON binaries_metadata TO public")
+ c.execute("GRANT USAGE ON metadata_keys_key_id_seq TO ftpmaster")
+ c.execute("GRANT SELECT, UPDATE, INSERT ON source_metadata TO ftpmaster")
+ c.execute("GRANT SELECT ON source_metadata TO public")
+ c.execute("GRANT SELECT, UPDATE, INSERT ON metadata_keys TO ftpmaster")
+ c.execute("GRANT SELECT ON metadata_keys TO public")
+ c.execute("GRANT SELECT, UPDATE, INSERT ON extra_src_references TO ftpmaster")
+ c.execute("GRANT SELECT ON extra_src_references TO public")
+ c.execute("GRANT SELECT, UPDATE, INSERT ON src_contents TO ftpmaster")
+ c.execute("GRANT SELECT ON src_contents TO public")
+ c.execute("GRANT USAGE ON changelogs_text_id_seq TO ftpmaster")
+ c.execute("GRANT SELECT ON changes_pending_files_map TO public")
+ c.execute("GRANT SELECT ON config TO public")
+
+ c.execute("UPDATE config SET value = '49' WHERE name = 'db_revision'")
+ self.db.commit()
+
+ except psycopg2.ProgrammingError, msg:
+ self.db.rollback()
+ raise DBUpdateError, 'Unable to apply sick update 49, rollback issued. Error message : %s' % (str(msg))
from daklib import utils
import apt_pkg, os, stat, sys
-def fetch(query, args, session):
- return [path + filename for (path, filename) in \
- session.execute(query, args).fetchall()]
-
-def getSources(suite, component, session, timestamp):
- extra_cond = ""
- if timestamp:
- extra_cond = "AND extract(epoch from sa.created) > %d" % timestamp
- query = """
- SELECT l.path, f.filename
- FROM source s
- JOIN src_associations sa
- ON s.id = sa.source AND sa.suite = :suite %s
- JOIN files f
- ON s.file = f.id
- JOIN location l
- ON f.location = l.id AND l.component = :component
- ORDER BY filename
- """ % extra_cond
- args = { 'suite': suite.suite_id,
- 'component': component.component_id }
- return fetch(query, args, session)
-
-def getBinaries(suite, component, architecture, type, session, timestamp):
- extra_cond = ""
- if timestamp:
- extra_cond = "AND extract(epoch from ba.created) > %d" % timestamp
- query = """
-CREATE TEMP TABLE b_candidates (
- source integer,
- file integer,
- architecture integer);
-
-INSERT INTO b_candidates (source, file, architecture)
- SELECT b.source, b.file, b.architecture
- FROM binaries b
- JOIN bin_associations ba ON b.id = ba.bin
- WHERE b.type = :type AND ba.suite = :suite AND
- b.architecture IN (2, :architecture) %s;
-
-CREATE TEMP TABLE gf_candidates (
- filename text,
- path text,
- architecture integer,
- src integer,
- source text);
-
-INSERT INTO gf_candidates (filename, path, architecture, src, source)
- SELECT f.filename, l.path, bc.architecture, bc.source as src, s.source
- FROM b_candidates bc
- JOIN source s ON bc.source = s.id
- JOIN files f ON bc.file = f.id
- JOIN location l ON f.location = l.id
- WHERE l.component = :component;
-
-WITH arch_any AS
-
- (SELECT path, filename FROM gf_candidates
- WHERE architecture > 2),
-
- arch_all_with_any AS
- (SELECT path, filename FROM gf_candidates
- WHERE architecture = 2 AND
- src IN (SELECT src FROM gf_candidates WHERE architecture > 2)),
-
- arch_all_without_any AS
- (SELECT path, filename FROM gf_candidates
- WHERE architecture = 2 AND
- source NOT IN (SELECT DISTINCT source FROM gf_candidates WHERE architecture > 2)),
-
- filelist AS
- (SELECT * FROM arch_any
- UNION
- SELECT * FROM arch_all_with_any
- UNION
- SELECT * FROM arch_all_without_any)
-
- SELECT * FROM filelist ORDER BY filename
- """ % extra_cond
- args = { 'suite': suite.suite_id,
- 'component': component.component_id,
- 'architecture': architecture.arch_id,
- 'type': type }
- return fetch(query, args, session)
+from daklib.lists import getSources, getBinaries
def listPath(suite, component, architecture = None, type = None,
incremental_mode = False):
(file, timestamp) = listPath(suite, component,
incremental_mode = incremental_mode)
session = DBConn().session()
- for filename in getSources(suite, component, session, timestamp):
+ for _, filename in getSources(suite, component, session, timestamp):
file.write(filename + '\n')
session.close()
file.close()
(file, timestamp) = listPath(suite, component, architecture, type,
incremental_mode)
session = DBConn().session()
- for filename in getBinaries(suite, component, architecture, type,
+ for _, filename in getBinaries(suite, component, architecture, type,
session, timestamp):
file.write(filename + '\n')
session.close()
--- /dev/null
+#!/usr/bin/env python
+"""
+Import data for Package/Sources files from .deb and .dsc files
+@copyright: 2011 Torsten Werner <twerner@debian.org>
+@copyright: 2011 Mark Hymers <mhy@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+################################################################################
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+# < mvo> that screams for consolidation in libapt at least (that then in turn can
+# use libdpkg ... ) - I guess the "d" means delayed ;)
+
+# (whilst discussing adding xz support to dak, and therefore python-apt, and
+# therefore libapt-pkg)
+
+################################################################################
+
+import sys
+import apt_pkg
+
+from daklib.config import Config
+from daklib.dbconn import *
+from daklib.metadata import MetadataScanner
+from daklib import daklog
+from daklib import utils
+
+################################################################################
+
+def usage (exit_code=0):
+ print """Usage: dak metadata [options] subcommand
+
+SUBCOMMANDS
+ scan-source
+ scan the dsc files in the existing pool and load metadata into the database
+
+ scan-binary
+ scan the deb files in the existing pool and load metadata into the database
+
+OPTIONS
+ -h, --help
+ show this help and exit
+
+OPTIONS for scan
+ -l, --limit=NUMBER
+ maximum number of items to scan
+"""
+ sys.exit(exit_code)
+
+################################################################################
+
+def scan_all(cnf, mode, limit):
+ Logger = daklog.Logger(cnf.Cnf, 'metadata scan (%s)' % mode)
+ result = MetadataScanner.scan_all(mode, limit)
+ processed = '%(processed)d %(type)s processed' % result
+ remaining = '%(remaining)d %(type)s remaining' % result
+ Logger.log([processed, remaining])
+ Logger.close()
+
+################################################################################
+
+def main():
+ cnf = Config()
+ cnf['Metadata::Options::Help'] = ''
+ cnf['Metadata::Options::Suite'] = ''
+ cnf['Metadata::Options::Limit'] = ''
+ cnf['Metadata::Options::Force'] = ''
+ arguments = [('h', "help", 'Metadata::Options::Help'),
+ ('s', "suite", 'Metadata::Options::Suite', "HasArg"),
+ ('l', "limit", 'Metadata::Options::Limit', "HasArg"),
+ ('f', "force", 'Metadata::Options::Force'),
+ ]
+ args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments, sys.argv)
+ options = cnf.SubTree('Metadata::Options')
+
+ if (len(args) != 1) or options['Help']:
+ usage()
+
+ limit = None
+ if len(options['Limit']) > 0:
+ limit = int(options['Limit'])
+
+ if args[0] == 'scan-source':
+ scan_all(cnf, 'source', limit)
+ return
+ elif args[0] == 'scan-binary':
+ scan_all(cnf, 'binary', limit)
+ return
+
+ suite_names = utils.split_args(options['Suite'])
+
+ force = bool(options['Force'])
+
+ if args[0] == 'generate':
+ raise NotImplementError
+
+ usage()
+
+
+if __name__ == '__main__':
+ main()
################################################################################
Cnf = None
-required_database_schema = 48
+required_database_schema = 49
################################################################################
gzip.stdin.close()
output_file.close()
gzip.wait()
- try:
- os.remove(final_filename)
- except:
- pass
+ os.chmod(temp_filename, 0664)
os.rename(temp_filename, final_filename)
- os.chmod(final_filename, 0664)
@classmethod
def log_result(class_, result):
return log_message
-class ContentsScanner(object):
+class BinaryContentsScanner(object):
'''
- ContentsScanner provides a threadsafe method scan() to scan the contents of
- a DBBinary object.
+ BinaryContentsScanner provides a threadsafe method scan() to scan the
+ contents of a DBBinary object.
'''
def __init__(self, binary_id):
'''
processed = query.count()
pool = Pool()
for binary in query.yield_per(100):
- pool.apply_async(scan_helper, (binary.binary_id, ))
+ pool.apply_async(binary_scan_helper, (binary.binary_id, ))
pool.close()
pool.join()
remaining = remaining()
session.close()
return { 'processed': processed, 'remaining': remaining }
-def scan_helper(binary_id):
+def binary_scan_helper(binary_id):
'''
This function runs in a subprocess.
'''
- scanner = ContentsScanner(binary_id)
+ scanner = BinaryContentsScanner(binary_id)
scanner.scan()
Enforce cleanup.
'''
self.cleanup()
+
+
+class SourceContentsScanner(object):
+ '''
+ SourceContentsScanner provides a method scan() to scan the contents of a
+ DBSource object.
+ '''
+ def __init__(self, source_id):
+ '''
+ The argument source_id is the id of the DBSource object that
+ should be scanned.
+ '''
+ self.source_id = source_id
+
+ def scan(self):
+ '''
+ This method does the actual scan and fills in the associated SrcContents
+ property. It commits any changes to the database.
+ '''
+ session = DBConn().session()
+ source = session.query(DBSource).get(self.source_id)
+ fileset = set(source.scan_contents())
+ for filename in fileset:
+ source.contents.append(SrcContents(file = filename))
+ session.commit()
+ session.close()
+
+ @classmethod
+ def scan_all(class_, limit = None):
+ '''
+ The class method scan_all() scans all source using multiple processes.
+ The number of sources to be scanned can be limited with the limit
+ argument. Returns the number of processed and remaining packages as a
+ dict.
+ '''
+ session = DBConn().session()
+ query = session.query(DBSource).filter(DBSource.contents == None)
+ remaining = query.count
+ if limit is not None:
+ query = query.limit(limit)
+ processed = query.count()
+ pool = Pool()
+ for source in query.yield_per(100):
+ pool.apply_async(source_scan_helper, (source.source_id, ))
+ pool.close()
+ pool.join()
+ remaining = remaining()
+ session.close()
+ return { 'processed': processed, 'remaining': remaining }
+
+def source_scan_helper(source_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ try:
+ scanner = SourceContentsScanner(source_id)
+ scanner.scan()
+ except Exception, e:
+ print e
+
# list
value = len(value)
elif hasattr(value, 'count'):
- # query
+ # query (but not during validation)
+ if self.in_validation:
+ continue
value = value.count()
else:
raise KeyError('Do not understand property %s.' % property)
validation_message = \
"Validation failed because property '%s' must not be empty in object\n%s"
+ in_validation = False
+
def validate(self):
'''
This function validates the not NULL constraints as returned by
getattr(self, property + '_id') is not None:
continue
if not hasattr(self, property) or getattr(self, property) is None:
- raise DBUpdateError(self.validation_message % \
- (property, str(self)))
+ # str() might lead to races due to a 2nd flush
+ self.in_validation = True
+ message = self.validation_message % (property, str(self))
+ self.in_validation = False
+ raise DBUpdateError(message)
@classmethod
@session_wrapper
self.poolfile = poolfile
self.binarytype = binarytype
+ @property
+ def pkid(self):
+ return self.binary_id
+
def properties(self):
return ['package', 'version', 'maintainer', 'source', 'architecture', \
'poolfile', 'binarytype', 'fingerprint', 'install_date', \
dpkg.stdout.close()
dpkg.wait()
+ def read_control(self):
+ '''
+ Reads the control information from a binary.
+
+ @rtype: text
+ @return: stanza text of the control section.
+ '''
+ import apt_inst
+ fullpath = self.poolfile.fullpath
+ deb_file = open(fullpath, 'r')
+ stanza = apt_inst.debExtractControl(deb_file)
+ deb_file.close()
+
+ return stanza
+
+ def read_control_fields(self):
+ '''
+ Reads the control information from a binary and return
+ as a dictionary.
+
+ @rtype: dict
+ @return: fields of the control section as a dictionary.
+ '''
+ import apt_pkg
+ stanza = self.read_control()
+ return apt_pkg.TagSection(stanza)
+
__all__.append('DBBinary')
@session_wrapper
################################################################################
+class SrcContents(ORMObject):
+ def __init__(self, file = None, source = None):
+ self.file = file
+ self.source = source
+
+ def properties(self):
+ return ['file', 'source']
+
+__all__.append('SrcContents')
+
+################################################################################
+
+from debian.debfile import Deb822
+
+# Temporary Deb822 subclass to fix bugs with : handling; see #597249
+class Dak822(Deb822):
+ def _internal_parser(self, sequence, fields=None):
+ # The key is non-whitespace, non-colon characters before any colon.
+ key_part = r"^(?P<key>[^: \t\n\r\f\v]+)\s*:\s*"
+ single = re.compile(key_part + r"(?P<data>\S.*?)\s*$")
+ multi = re.compile(key_part + r"$")
+ multidata = re.compile(r"^\s(?P<data>.+?)\s*$")
+
+ wanted_field = lambda f: fields is None or f in fields
+
+ if isinstance(sequence, basestring):
+ sequence = sequence.splitlines()
+
+ curkey = None
+ content = ""
+ for line in self.gpg_stripped_paragraph(sequence):
+ m = single.match(line)
+ if m:
+ if curkey:
+ self[curkey] = content
+
+ if not wanted_field(m.group('key')):
+ curkey = None
+ continue
+
+ curkey = m.group('key')
+ content = m.group('data')
+ continue
+
+ m = multi.match(line)
+ if m:
+ if curkey:
+ self[curkey] = content
+
+ if not wanted_field(m.group('key')):
+ curkey = None
+ continue
+
+ curkey = m.group('key')
+ content = ""
+ continue
+
+ m = multidata.match(line)
+ if m:
+ content += '\n' + line # XXX not m.group('data')?
+ continue
+
+ if curkey:
+ self[curkey] = content
+
+
class DBSource(ORMObject):
def __init__(self, source = None, version = None, maintainer = None, \
changedby = None, poolfile = None, install_date = None):
self.poolfile = poolfile
self.install_date = install_date
+ @property
+ def pkid(self):
+ return self.source_id
+
def properties(self):
return ['source', 'source_id', 'maintainer', 'changedby', \
'fingerprint', 'poolfile', 'version', 'suites_count', \
return ['source', 'version', 'install_date', 'maintainer', \
'changedby', 'poolfile', 'install_date']
+ def read_control_fields(self):
+ '''
+ Reads the control information from a dsc
+
+ @rtype: tuple
+ @return: fields is the dsc information in a dictionary form
+ '''
+ fullpath = self.poolfile.fullpath
+ fields = Dak822(open(self.poolfile.fullpath, 'r'))
+ return fields
+
metadata = association_proxy('key', 'value')
+ def scan_contents(self):
+ '''
+ Returns a set of names for non directories. The path names are
+ normalized after converting them from either utf-8 or iso8859-1
+ encoding.
+ '''
+ fullpath = self.poolfile.fullpath
+ from daklib.contents import UnpackedSource
+ unpacked = UnpackedSource(fullpath)
+ fileset = set()
+ for name in unpacked.get_all_filenames():
+ # enforce proper utf-8 encoding
+ try:
+ name.decode('utf-8')
+ except UnicodeDecodeError:
+ name = name.decode('iso8859-1').encode('utf-8')
+ fileset.add(name)
+ return fileset
+
__all__.append('DBSource')
@session_wrapper
__all__.append('get_source_in_suite')
+@session_wrapper
+def import_metadata_into_db(obj, session=None):
+ """
+ This routine works on either DBBinary or DBSource objects and imports
+ their metadata into the database
+ """
+ fields = obj.read_control_fields()
+ for k in fields.keys():
+ try:
+ # Try raw ASCII
+ val = str(fields[k])
+ except UnicodeEncodeError:
+ # Fall back to UTF-8
+ try:
+ val = fields[k].encode('utf-8')
+ except UnicodeEncodeError:
+ # Finally try iso8859-1
+ val = fields[k].encode('iso8859-1')
+ # Otherwise we allow the exception to percolate up and we cause
+ # a reject as someone is playing silly buggers
+
+ obj.metadata[get_or_set_metadatakey(k, session)] = val
+
+ session.commit_or_flush()
+
+__all__.append('import_metadata_into_db')
+
+
################################################################################
@session_wrapper
# session.rollback()
# raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename)
- return poolfile
+ return bin, poolfile
__all__.append('add_deb_to_db')
'overrides_count']
def not_null_constraints(self):
- return ['suite_name', 'version']
+ return ['suite_name']
def __eq__(self, val):
if isinstance(val, str):
__all__.append('MetadataKey')
+@session_wrapper
+def get_or_set_metadatakey(keyname, session=None):
+ """
+ Returns MetadataKey object for given uidname.
+
+ If no matching keyname is found, a row is inserted.
+
+ @type uidname: string
+ @param uidname: The keyname to add
+
+ @type session: SQLAlchemy
+ @param session: Optional SQL session object (a temporary one will be
+ generated if not supplied). If not passed, a commit will be performed at
+ the end of the function, otherwise the caller is responsible for commiting.
+
+ @rtype: MetadataKey
+ @return: the metadatakey object for the given keyname
+ """
+
+ q = session.query(MetadataKey).filter_by(key=keyname)
+
+ try:
+ ret = q.one()
+ except NoResultFound:
+ ret = MetadataKey(keyname)
+ session.add(ret)
+ session.commit_or_flush()
+
+ return ret
+
+__all__.append('get_or_set_metadatakey')
+
################################################################################
class BinaryMetadata(ORMObject):
'source_acl',
'source_metadata',
'src_associations',
+ 'src_contents',
'src_format',
'src_uploaders',
'suite',
backref=backref('contents', lazy='dynamic', cascade='all')),
file = self.tbl_bin_contents.c.file))
+ mapper(SrcContents, self.tbl_src_contents,
+ properties = dict(
+ source = relation(DBSource,
+ backref=backref('contents', lazy='dynamic', cascade='all')),
+ file = self.tbl_src_contents.c.file))
+
mapper(MetadataKey, self.tbl_metadata_keys,
properties = dict(
key_id = self.tbl_metadata_keys.c.key_id,
--- /dev/null
+#!/usr/bin/python
+
+"""
+Helper functions for list generating commands (Packages, Sources).
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2009-2011 Torsten Werner <twerner@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+def fetch(query, args, session):
+ for (id, path, filename) in session.execute(query, args).fetchall():
+ yield (id, path + filename)
+
+def getSources(suite, component, session, timestamp = None):
+ '''
+ Calculates the sources in suite and component optionally limited by
+ sources newer than timestamp. Returns a generator that yields a
+ tuple of source id and full pathname to the dsc file. See function
+ writeSourceList() in dak/generate_filelist.py for an example that
+ uses this function.
+ '''
+ extra_cond = ""
+ if timestamp:
+ extra_cond = "AND extract(epoch from sa.created) > %d" % timestamp
+ query = """
+ SELECT s.id, l.path, f.filename
+ FROM source s
+ JOIN src_associations sa
+ ON s.id = sa.source AND sa.suite = :suite %s
+ JOIN files f
+ ON s.file = f.id
+ JOIN location l
+ ON f.location = l.id AND l.component = :component
+ ORDER BY filename
+ """ % extra_cond
+ args = { 'suite': suite.suite_id,
+ 'component': component.component_id }
+ return fetch(query, args, session)
+
+def getBinaries(suite, component, architecture, type, session, timestamp = None):
+ '''
+ Calculates the binaries in suite and component of architecture and
+ type 'deb' or 'udeb' optionally limited to binaries newer than
+ timestamp. Returns a generator that yields a tuple of binary id and
+ full pathname to the u(deb) file. See function writeBinaryList() in
+ dak/generate_filelist.py for an example that uses this function.
+ '''
+ extra_cond = ""
+ if timestamp:
+ extra_cond = "AND extract(epoch from ba.created) > %d" % timestamp
+ query = """
+CREATE TEMP TABLE b_candidates (
+ id integer,
+ source integer,
+ file integer,
+ architecture integer);
+
+INSERT INTO b_candidates (id, source, file, architecture)
+ SELECT b.id, b.source, b.file, b.architecture
+ FROM binaries b
+ JOIN bin_associations ba ON b.id = ba.bin
+ WHERE b.type = :type AND ba.suite = :suite AND
+ b.architecture IN (2, :architecture) %s;
+
+CREATE TEMP TABLE gf_candidates (
+ id integer,
+ filename text,
+ path text,
+ architecture integer,
+ src integer,
+ source text);
+
+INSERT INTO gf_candidates (id, filename, path, architecture, src, source)
+ SELECT bc.id, f.filename, l.path, bc.architecture, bc.source as src, s.source
+ FROM b_candidates bc
+ JOIN source s ON bc.source = s.id
+ JOIN files f ON bc.file = f.id
+ JOIN location l ON f.location = l.id
+ WHERE l.component = :component;
+
+WITH arch_any AS
+
+ (SELECT id, path, filename FROM gf_candidates
+ WHERE architecture > 2),
+
+ arch_all_with_any AS
+ (SELECT id, path, filename FROM gf_candidates
+ WHERE architecture = 2 AND
+ src IN (SELECT src FROM gf_candidates WHERE architecture > 2)),
+
+ arch_all_without_any AS
+ (SELECT id, path, filename FROM gf_candidates
+ WHERE architecture = 2 AND
+ source NOT IN (SELECT DISTINCT source FROM gf_candidates WHERE architecture > 2)),
+
+ filelist AS
+ (SELECT * FROM arch_any
+ UNION
+ SELECT * FROM arch_all_with_any
+ UNION
+ SELECT * FROM arch_all_without_any)
+
+ SELECT * FROM filelist ORDER BY filename
+ """ % extra_cond
+ args = { 'suite': suite.suite_id,
+ 'component': component.component_id,
+ 'architecture': architecture.arch_id,
+ 'type': type }
+ return fetch(query, args, session)
+
--- /dev/null
+#!/usr/bin/env python
+"""
+Helper code for packages and sources generation.
+
+@contact: Debian FTPMaster <ftpmaster@debian.org>
+@copyright: 2011 Torsten Werner <twerner@debian.org>
+@copyright: 2011 Mark Hymers <mhy@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+################################################################################
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+from daklib.dbconn import *
+from daklib.config import Config
+
+from multiprocessing import Pool
+from subprocess import Popen, PIPE
+
+import os.path
+
+class MetadataScanner(object):
+ '''
+ MetadataScanner provides a threadsafe method scan() to scan the metadata of
+ a DBSource or DBBinary object depending on what is passed as dbclass'''
+
+ def __init__(self, dbclass, pkid, verbose=True):
+ '''
+ The argument binary_id is the id of the DBBinary object that
+
+ should be scanned.
+ '''
+ self.verbose = True
+ self.dbclass = dbclass
+ self.pkid = pkid
+
+ def scan(self, dummy_arg = None):
+ '''
+ This method does the actual scan and fills in the associated metadata
+ property. It commits any changes to the database. The argument dummy_arg
+ is ignored but needed by our threadpool implementation.
+ '''
+ obj = None
+ fullpath = 'UNKNOWN PATH'
+
+ session = DBConn().session()
+ try:
+ obj = session.query(self.dbclass).get(self.pkid)
+ fullpath = obj.poolfile.fullpath
+ import_metadata_into_db(obj, session=session)
+ if self.verbose:
+ print "Imported %s (%s)" % (self.pkid, fullpath)
+ session.commit()
+ except Exception, e:
+ print "Failed to import %s [id=%s; fullpath=%s]" % (self.dbclass.__name__, self.pkid, fullpath)
+ print "Exception: ", e
+ session.rollback()
+
+ session.close()
+
+ @classmethod
+ def scan_all(class_, scantype='source', limit = None):
+ '''
+ The class method scan_all() scans all sources using multiple threads.
+ The number of sources to be scanned can be limited with the limit
+ argument. Returns the number of processed and remaining files as a
+ dict.
+ '''
+ session = DBConn().session()
+ if scantype == 'source':
+ dbclass = DBSource
+ query = session.query(DBSource).filter(~DBSource.source_id.in_(session.query(SourceMetadata.source_id.distinct())))
+ t = 'sources'
+ else:
+ # Otherwise binary
+ dbclass = DBBinary
+ query = session.query(DBBinary).filter(~DBBinary.binary_id.in_(session.query(BinaryMetadata.binary_id.distinct())))
+ t = 'binaries'
+
+ remaining = query.count
+ if limit is not None:
+ query = query.limit(limit)
+ processed = query.count()
+ pool = Pool(processes=10)
+ for obj in query.yield_per(100):
+ pool.apply_async(scan_helper, (dbclass, obj.pkid, ))
+ pool.close()
+ pool.join()
+ remaining = remaining()
+ session.close()
+ return { 'processed': processed, 'remaining': remaining , 'type': t}
+
+def scan_helper(dbclass, source_id):
+ '''
+ This function runs in a subprocess.
+ '''
+ scanner = MetadataScanner(dbclass, source_id)
+ scanner.scan()
print "Installing."
self.logger.log(["installing changes", self.pkg.changes_file])
+ binaries = []
poolfiles = []
# Add the .dsc file to the DB first
# Add .deb / .udeb files to the DB (type is always deb, dbtype is udeb/deb)
for newfile, entry in self.pkg.files.items():
if entry["type"] == "deb":
- poolfiles.append(add_deb_to_db(self, newfile, session))
+ b, pf = add_deb_to_db(self, newfile, session)
+ binaries.append(b)
+ poolfiles.append(pf)
# If this is a sourceful diff only upload that is moving
# cross-component we need to copy the .orig files into the new
# Our SQL session will automatically start a new transaction after
# the last commit
+ # Now ensure that the metadata has been added
+ # This has to be done after we copy the files into the pool
+ # For source if we have it:
+ if self.pkg.changes["architecture"].has_key("source"):
+ import_metadata_into_db(source, session)
+
+ # Now for any of our binaries
+ for b in binaries:
+ import_metadata_into_db(b, session)
+
+ session.commit()
+
# Move the .changes into the 'done' directory
utils.move(self.pkg.changes_file,
os.path.join(cnf["Dir::Queue::Done"], os.path.basename(self.pkg.changes_file)))
<mhy> I wish they wouldnt leave biscuits out, thats just tempting. Damnit.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-< mvo> that screams for consolidation in libapt at least (that then in turn can use libdpkg … ) - I guess the "d" means delayed ;)
-
-(whilst discussing adding xz support to dak, and therefore python-apt, and
-therefore libapt-pkg)
-
from db_test import DBDakTestCase, fixture
from daklib.dbconn import *
-from daklib.contents import ContentsWriter, ContentsScanner, UnpackedSource
+from daklib.contents import ContentsWriter, BinaryContentsScanner, \
+ UnpackedSource, SourceContentsScanner
from os.path import normpath
from sqlalchemy.exc import FlushError, IntegrityError
self.session.delete(self.binary['hello_2.2-1_i386'])
self.session.commit()
- def test_scan_contents(self):
+ def test_binary_scan_contents(self):
+ '''
+ Tests the BinaryContentsScanner.
+ '''
self.setup_binaries()
filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
filelist)
self.session.commit()
- ContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
+ BinaryContentsScanner(self.binary['hello_2.2-1_i386'].binary_id).scan()
bin_contents_list = self.binary['hello_2.2-1_i386'].contents.order_by('file').all()
self.assertEqual(2, len(bin_contents_list))
self.assertEqual('usr/bin/hello', bin_contents_list[0].file)
def test_unpack(self):
'''
- Tests the UnpackedSource class.
+ Tests the UnpackedSource class and the SourceContentsScanner.
'''
- self.setup_poolfiles()
- dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename)
+ self.setup_sources()
+ source = self.source['hello_2.2-1']
+ dscfilename = fixture('ftp/pool/' + source.poolfile.filename)
unpacked = UnpackedSource(dscfilename)
self.assertTrue(len(unpacked.get_root_directory()) > 0)
self.assertEqual('hello (2.2-1) unstable; urgency=low\n',
all_filenames = set(unpacked.get_all_filenames())
self.assertEqual(8, len(all_filenames))
self.assertTrue('debian/rules' in all_filenames)
+ # method scan_contents()
+ self.assertEqual(all_filenames, source.scan_contents())
+ # exception with invalid files
self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname'))
+ # SourceContentsScanner
+ self.session.commit()
+ self.assertTrue(source.contents.count() == 0)
+ SourceContentsScanner(source.source_id).scan()
+ self.assertTrue(source.contents.count() > 0)
def classes_to_clean(self):
return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \
from db_test import DBDakTestCase
from daklib.dbconn import Architecture, Suite
+from daklib.dak_exceptions import DBUpdateError
try:
# python >= 2.6
architecture.suites = [sid, squeeze]
self.assertTrue(re.search('"suites_count": 2', str(architecture)))
+ def test_validation(self):
+ suite = Suite()
+ self.session.add(suite)
+ self.assertRaises(DBUpdateError, self.session.flush)
+
if __name__ == '__main__':
unittest.main()
'sha1sum': 'deadbeef',
'sha256sum': 'deadbeef'}
upload = Upload(pkg)
- poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session)
+ bin, poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session)
self.session.refresh(poolfile)
self.session.refresh(poolfile.binary)
self.assertEqual('main/h/hello/hello_2.2-2_i386.deb', poolfile.filename)