From: Torsten Werner Date: Thu, 24 Mar 2011 08:36:45 +0000 (+0100) Subject: Merge branch 'pkgsrc' of ftp-master.debian.org:public_html/dak into pkgsrc X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=12440bbf9173956e70800c86e6e773f067f7e2ab;hp=4689163b3fcfaf6ec71bcf321e4245ace8d6879a;p=dak.git Merge branch 'pkgsrc' of ftp-master.debian.org:public_html/dak into pkgsrc --- diff --git a/config/backports/dak.conf b/config/backports/dak.conf index 16745748..d97c52ba 100644 --- a/config/backports/dak.conf +++ b/config/backports/dak.conf @@ -243,9 +243,13 @@ Dir DB { - Name "backports"; - Host ""; - Port -1; + Service "backports"; + // PoolSize should be at least ThreadCount + 1 + PoolSize 5; + // MaxOverflow shouldn't exceed postgresql.conf's max_connections - PoolSize + MaxOverflow 13; + // should be false for encoding == SQL_ASCII + Unicode "false" }; SuiteMappings diff --git a/config/backports/dinstall.functions b/config/backports/dinstall.functions index 27854f8b..0dfe19ec 100644 --- a/config/backports/dinstall.functions +++ b/config/backports/dinstall.functions @@ -322,7 +322,7 @@ function mkfilesindices() { ARCHLIST=$(tempfile) - log "Querying $PGDATABASE..." + log "Querying postgres..." echo 'SELECT l.path, f.filename, a.arch_string FROM location l JOIN files f ON (f.location = l.id) LEFT OUTER JOIN (binaries b JOIN architecture a ON (b.architecture = a.id)) ON (f.id = b.file)' | psql -At | sed 's/|//;s,^/srv/ftp-master.debian.org/ftp,.,' | sort >$ARCHLIST includedirs () { diff --git a/config/debian-security/dak.conf b/config/debian-security/dak.conf index 58f77c11..b03e0f89 100644 --- a/config/debian-security/dak.conf +++ b/config/debian-security/dak.conf @@ -262,10 +262,13 @@ Dir DB { - Name "obscurity"; - Host ""; - Port -1; - + Service "obscurity"; + // PoolSize should be at least ThreadCount + 1 + PoolSize 5; + // MaxOverflow shouldn't exceed postgresql.conf's max_connections - PoolSize + MaxOverflow 13; + // should be false for encoding == SQL_ASCII + Unicode "false" }; Architectures diff --git a/config/debian/dinstall.functions b/config/debian/dinstall.functions index e4de479e..cac7c7c5 100644 --- a/config/debian/dinstall.functions +++ b/config/debian/dinstall.functions @@ -295,7 +295,7 @@ function mkfilesindices() { ARCHLIST=$(tempfile) - log "Querying $PGDATABASE..." + log "Querying postgres" echo 'SELECT l.path, f.filename, a.arch_string FROM location l JOIN files f ON (f.location = l.id) LEFT OUTER JOIN (binaries b JOIN architecture a ON (b.architecture = a.id)) ON (f.id = b.file)' | psql -At | sed 's/|//;s,^/srv/ftp-master.debian.org/ftp,.,' | sort >$ARCHLIST includedirs () { diff --git a/dak/dak.py b/dak/dak.py index 5a659d8c..ad99a5a0 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -86,6 +86,8 @@ def init(): "Generate Packages/Sources files"), ("contents", "Generate content files"), + ("metadata", + "Load data for packages/sources files"), ("generate-index-diffs", "Generate .diff/Index files"), ("clean-suites", diff --git a/dak/dakdb/update48.py b/dak/dakdb/update48.py new file mode 100755 index 00000000..67ea8c5b --- /dev/null +++ b/dak/dakdb/update48.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Suite.version can be null + +@contact: Debian FTP Master +@copyright: 2011 Joerg Jaspert +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import psycopg2 +from daklib.dak_exceptions import DBUpdateError +from socket import gethostname; + +################################################################################ +def do_update(self): + """ + Add table for source contents. + """ + print __doc__ + try: + c = self.db.cursor() + + c.execute("ALTER TABLE suite ALTER COLUMN version DROP NOT NULL") + c.execute("UPDATE suite SET version=NULL WHERE version='-'") + + c.execute("UPDATE config SET value = '48' WHERE name = 'db_revision'") + self.db.commit() + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + raise DBUpdateError, 'Unable to apply sick update 48, rollback issued. Error message : %s' % (str(msg)) diff --git a/dak/dakdb/update49.py b/dak/dakdb/update49.py new file mode 100755 index 00000000..5ff1545c --- /dev/null +++ b/dak/dakdb/update49.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Permission fixups + +@contact: Debian FTP Master +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import psycopg2 +from daklib.dak_exceptions import DBUpdateError +from socket import gethostname; + +################################################################################ +def do_update(self): + """ + Fix up permissions + """ + print __doc__ + try: + c = self.db.cursor() + + c.execute("GRANT SELECT, UPDATE, INSERT ON binaries_metadata TO ftpmaster") + c.execute("GRANT SELECT ON binaries_metadata TO public") + c.execute("GRANT USAGE ON metadata_keys_key_id_seq TO ftpmaster") + c.execute("GRANT SELECT, UPDATE, INSERT ON source_metadata TO ftpmaster") + c.execute("GRANT SELECT ON source_metadata TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON metadata_keys TO ftpmaster") + c.execute("GRANT SELECT ON metadata_keys TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON extra_src_references TO ftpmaster") + c.execute("GRANT SELECT ON extra_src_references TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON src_contents TO ftpmaster") + c.execute("GRANT SELECT ON src_contents TO public") + c.execute("GRANT USAGE ON changelogs_text_id_seq TO ftpmaster") + c.execute("GRANT SELECT ON changes_pending_files_map TO public") + c.execute("GRANT SELECT ON config TO public") + + c.execute("UPDATE config SET value = '49' WHERE name = 'db_revision'") + self.db.commit() + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + raise DBUpdateError, 'Unable to apply sick update 49, rollback issued. Error message : %s' % (str(msg)) diff --git a/dak/make_changelog.py b/dak/make_changelog.py index 0a76bcc1..398c3526 100755 --- a/dak/make_changelog.py +++ b/dak/make_changelog.py @@ -52,12 +52,12 @@ Generate changelog entry between two suites import os import sys import apt_pkg -from commands import getstatusoutput from glob import glob from shutil import rmtree from daklib.dbconn import * from daklib import utils from daklib.config import Config +from daklib.contents import UnpackedSource from daklib.regexes import re_no_epoch ################################################################################ @@ -202,14 +202,11 @@ def export_files(session, pool, clpool, temppath): pass os.link(os.path.join(path, file), os.path.join(path, link)) - tempdir = utils.temp_dirname(parent=temppath) - os.rmdir(tempdir) - for p in unpack.keys(): package = os.path.splitext(os.path.basename(p))[0].split('_') - cmd = 'dpkg-source --no-check --no-copy -x %s %s' % (p, tempdir) - (result, output) = getstatusoutput(cmd) - if not result: + try: + unpacked = UnpackedSource(p) + tempdir = unpacked.get_root_directory() stats['unpack'] += 1 for file in files: for f in glob(os.path.join(tempdir, 'debian', '*%s' % file)): @@ -227,13 +224,11 @@ def export_files(session, pool, clpool, temppath): pass os.link(version, suite) stats['created'] += 1 - else: - print 'make-changelog: unable to unpack %s_%s: %s' \ - % (package[0], package[1], output) + unpacked.cleanup() + except Exception, e: + print 'make-changelog: unable to unpack %s\n%s' % (p, e) stats['errors'] += 1 - rmtree(tempdir) - for root, dirs, files in os.walk(clpool): if len(files): if root.split('/')[-1] not in sources.keys(): diff --git a/dak/metadata.py b/dak/metadata.py new file mode 100755 index 00000000..f40c9431 --- /dev/null +++ b/dak/metadata.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +""" +Import data for Package/Sources files from .deb and .dsc files +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# < mvo> that screams for consolidation in libapt at least (that then in turn can +# use libdpkg ... ) - I guess the "d" means delayed ;) + +# (whilst discussing adding xz support to dak, and therefore python-apt, and +# therefore libapt-pkg) + +################################################################################ + +import sys +import apt_pkg + +from daklib.config import Config +from daklib.dbconn import * +from daklib.metadata import MetadataScanner +from daklib import daklog +from daklib import utils + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak metadata [options] subcommand + +SUBCOMMANDS + scan-source + scan the dsc files in the existing pool and load metadata into the database + + scan-binary + scan the deb files in the existing pool and load metadata into the database + +OPTIONS + -h, --help + show this help and exit + +OPTIONS for scan + -l, --limit=NUMBER + maximum number of items to scan +""" + sys.exit(exit_code) + +################################################################################ + +def scan_all(cnf, mode, limit): + Logger = daklog.Logger(cnf.Cnf, 'metadata scan (%s)' % mode) + result = MetadataScanner.scan_all(mode, limit) + processed = '%(processed)d %(type)s processed' % result + remaining = '%(remaining)d %(type)s remaining' % result + Logger.log([processed, remaining]) + Logger.close() + +################################################################################ + +def main(): + cnf = Config() + cnf['Metadata::Options::Help'] = '' + cnf['Metadata::Options::Suite'] = '' + cnf['Metadata::Options::Limit'] = '' + cnf['Metadata::Options::Force'] = '' + arguments = [('h', "help", 'Metadata::Options::Help'), + ('s', "suite", 'Metadata::Options::Suite', "HasArg"), + ('l', "limit", 'Metadata::Options::Limit', "HasArg"), + ('f', "force", 'Metadata::Options::Force'), + ] + args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments, sys.argv) + options = cnf.SubTree('Metadata::Options') + + if (len(args) != 1) or options['Help']: + usage() + + limit = None + if len(options['Limit']) > 0: + limit = int(options['Limit']) + + if args[0] == 'scan-source': + scan_all(cnf, 'source', limit) + return + elif args[0] == 'scan-binary': + scan_all(cnf, 'binary', limit) + return + + suite_names = utils.split_args(options['Suite']) + + force = bool(options['Force']) + + if args[0] == 'generate': + raise NotImplementError + + usage() + + +if __name__ == '__main__': + main() diff --git a/dak/update_db.py b/dak/update_db.py index 985051ec..77d00976 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -46,7 +46,7 @@ from daklib.daklog import Logger ################################################################################ Cnf = None -required_database_schema = 47 +required_database_schema = 49 ################################################################################ @@ -123,15 +123,18 @@ Updates dak's database schema to the lastest version. You should disable crontab try: # Build a connect string - connect_str = "dbname=%s"% (cnf["DB::Name"]) - if cnf["DB::Host"] != '': connect_str += " host=%s" % (cnf["DB::Host"]) - if cnf["DB::Port"] != '-1': connect_str += " port=%d" % (int(cnf["DB::Port"])) + if cnf.has_key("DB::Service"): + connect_str = "service=%s" % cnf["DB::Service"] + else: + connect_str = "dbname=%s"% (cnf["DB::Name"]) + if cnf["DB::Host"] != '': connect_str += " host=%s" % (cnf["DB::Host"]) + if cnf["DB::Port"] != '-1': connect_str += " port=%d" % (int(cnf["DB::Port"])) self.db = psycopg2.connect(connect_str) except: print "FATAL: Failed connect to database" - pass + sys.exit(1) database_revision = int(self.get_db_rev()) logger.log(['transaction id before update: %s' % self.get_transaction_id()]) diff --git a/daklib/contents.py b/daklib/contents.py index 4a0b3ae2..a158e8fc 100755 --- a/daklib/contents.py +++ b/daklib/contents.py @@ -29,7 +29,9 @@ from daklib.dbconn import * from daklib.config import Config from multiprocessing import Pool -from subprocess import Popen, PIPE +from shutil import rmtree +from subprocess import Popen, PIPE, check_call +from tempfile import mkdtemp import os.path @@ -313,3 +315,64 @@ def scan_helper(binary_id): ''' scanner = ContentsScanner(binary_id) scanner.scan() + + +class UnpackedSource(object): + ''' + UnpackedSource extracts a source package into a temporary location and + gives you some convinient function for accessing it. + ''' + def __init__(self, dscfilename): + ''' + The dscfilename is a name of a DSC file that will be extracted. + ''' + self.root_directory = os.path.join(mkdtemp(), 'root') + command = ('dpkg-source', '--no-copy', '--no-check', '-x', dscfilename, + self.root_directory) + # dpkg-source does not have a --quiet option + devnull = open(os.devnull, 'w') + check_call(command, stdout = devnull, stderr = devnull) + devnull.close() + + def get_root_directory(self): + ''' + Returns the name of the package's root directory which is the directory + where the debian subdirectory is located. + ''' + return self.root_directory + + def get_changelog_file(self): + ''' + Returns a file object for debian/changelog or None if no such file exists. + ''' + changelog_name = os.path.join(self.root_directory, 'debian', 'changelog') + try: + return open(changelog_name) + except IOError: + return None + + def get_all_filenames(self): + ''' + Returns an iterator over all filenames. The filenames will be relative + to the root directory. + ''' + skip = len(self.root_directory) + 1 + for root, _, files in os.walk(self.root_directory): + for name in files: + yield os.path.join(root[skip:], name) + + def cleanup(self): + ''' + Removes all temporary files. + ''' + if self.root_directory is None: + return + parent_directory = os.path.dirname(self.root_directory) + rmtree(parent_directory) + self.root_directory = None + + def __del__(self): + ''' + Enforce cleanup. + ''' + self.cleanup() diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 6782c081..6317b584 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -204,7 +204,9 @@ class ORMObject(object): # list value = len(value) elif hasattr(value, 'count'): - # query + # query (but not during validation) + if self.in_validation: + continue value = value.count() else: raise KeyError('Do not understand property %s.' % property) @@ -258,6 +260,8 @@ class ORMObject(object): validation_message = \ "Validation failed because property '%s' must not be empty in object\n%s" + in_validation = False + def validate(self): ''' This function validates the not NULL constraints as returned by @@ -272,8 +276,11 @@ class ORMObject(object): getattr(self, property + '_id') is not None: continue if not hasattr(self, property) or getattr(self, property) is None: - raise DBUpdateError(self.validation_message % \ - (property, str(self))) + # str() might lead to races due to a 2nd flush + self.in_validation = True + message = self.validation_message % (property, str(self)) + self.in_validation = False + raise DBUpdateError(message) @classmethod @session_wrapper @@ -492,6 +499,10 @@ class DBBinary(ORMObject): self.poolfile = poolfile self.binarytype = binarytype + @property + def pkid(self): + return self.binary_id + def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ @@ -529,6 +540,33 @@ class DBBinary(ORMObject): dpkg.stdout.close() dpkg.wait() + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: text + @return: stanza text of the control section. + ''' + import apt_inst + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file) + deb_file.close() + + return stanza + + def read_control_fields(self): + ''' + Reads the control information from a binary and return + as a dictionary. + + @rtype: dict + @return: fields of the control section as a dictionary. + ''' + import apt_pkg + stanza = self.read_control() + return apt_pkg.TagSection(stanza) + __all__.append('DBBinary') @session_wrapper @@ -2157,6 +2195,60 @@ __all__.append('get_sections') ################################################################################ +from debian.debfile import Deb822 + +# Temporary Deb822 subclass to fix bugs with : handling; see #597249 +class Dak822(Deb822): + def _internal_parser(self, sequence, fields=None): + # The key is non-whitespace, non-colon characters before any colon. + key_part = r"^(?P[^: \t\n\r\f\v]+)\s*:\s*" + single = re.compile(key_part + r"(?P\S.*?)\s*$") + multi = re.compile(key_part + r"$") + multidata = re.compile(r"^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = m.group('data') + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] = content + + class DBSource(ORMObject): def __init__(self, source = None, version = None, maintainer = None, \ changedby = None, poolfile = None, install_date = None): @@ -2167,6 +2259,10 @@ class DBSource(ORMObject): self.poolfile = poolfile self.install_date = install_date + @property + def pkid(self): + return self.source_id + def properties(self): return ['source', 'source_id', 'maintainer', 'changedby', \ 'fingerprint', 'poolfile', 'version', 'suites_count', \ @@ -2176,6 +2272,17 @@ class DBSource(ORMObject): return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control_fields(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: fields is the dsc information in a dictionary form + ''' + fullpath = self.poolfile.fullpath + fields = Dak822(open(self.poolfile.fullpath, 'r')) + return fields + metadata = association_proxy('key', 'value') __all__.append('DBSource') @@ -2321,6 +2428,34 @@ def get_source_in_suite(source, suite, session=None): __all__.append('get_source_in_suite') +@session_wrapper +def import_metadata_into_db(obj, session=None): + """ + This routine works on either DBBinary or DBSource objects and imports + their metadata into the database + """ + fields = obj.read_control_fields() + for k in fields.keys(): + try: + # Try raw ASCII + val = str(fields[k]) + except UnicodeEncodeError: + # Fall back to UTF-8 + try: + val = fields[k].encode('utf-8') + except UnicodeEncodeError: + # Finally try iso8859-1 + val = fields[k].encode('iso8859-1') + # Otherwise we allow the exception to percolate up and we cause + # a reject as someone is playing silly buggers + + obj.metadata[get_or_set_metadatakey(k, session)] = val + + session.commit_or_flush() + +__all__.append('import_metadata_into_db') + + ################################################################################ @session_wrapper @@ -2497,7 +2632,7 @@ def add_deb_to_db(u, filename, session=None): # session.rollback() # raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename) - return poolfile + return bin, poolfile __all__.append('add_deb_to_db') @@ -2564,7 +2699,7 @@ class Suite(ORMObject): 'overrides_count'] def not_null_constraints(self): - return ['suite_name', 'version'] + return ['suite_name'] def __eq__(self, val): if isinstance(val, str): @@ -2820,6 +2955,38 @@ class MetadataKey(ORMObject): __all__.append('MetadataKey') +@session_wrapper +def get_or_set_metadatakey(keyname, session=None): + """ + Returns MetadataKey object for given uidname. + + If no matching keyname is found, a row is inserted. + + @type uidname: string + @param uidname: The keyname to add + + @type session: SQLAlchemy + @param session: Optional SQL session object (a temporary one will be + generated if not supplied). If not passed, a commit will be performed at + the end of the function, otherwise the caller is responsible for commiting. + + @rtype: MetadataKey + @return: the metadatakey object for the given keyname + """ + + q = session.query(MetadataKey).filter_by(key=keyname) + + try: + ret = q.one() + except NoResultFound: + ret = MetadataKey(keyname) + session.add(ret) + session.commit_or_flush() + + return ret + +__all__.append('get_or_set_metadatakey') + ################################################################################ class BinaryMetadata(ORMObject): diff --git a/daklib/metadata.py b/daklib/metadata.py new file mode 100755 index 00000000..d88cf4fa --- /dev/null +++ b/daklib/metadata.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +""" +Helper code for packages and sources generation. + +@contact: Debian FTPMaster +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +from daklib.dbconn import * +from daklib.config import Config + +from multiprocessing import Pool +from subprocess import Popen, PIPE + +import os.path + +class MetadataScanner(object): + ''' + MetadataScanner provides a threadsafe method scan() to scan the metadata of + a DBSource or DBBinary object depending on what is passed as dbclass''' + + def __init__(self, dbclass, pkid, verbose=True): + ''' + The argument binary_id is the id of the DBBinary object that + + should be scanned. + ''' + self.verbose = True + self.dbclass = dbclass + self.pkid = pkid + + def scan(self, dummy_arg = None): + ''' + This method does the actual scan and fills in the associated metadata + property. It commits any changes to the database. The argument dummy_arg + is ignored but needed by our threadpool implementation. + ''' + obj = None + fullpath = 'UNKNOWN PATH' + + session = DBConn().session() + try: + obj = session.query(self.dbclass).get(self.pkid) + fullpath = obj.poolfile.fullpath + import_metadata_into_db(obj, session=session) + if self.verbose: + print "Imported %s (%s)" % (self.pkid, fullpath) + session.commit() + except Exception, e: + print "Failed to import %s [id=%s; fullpath=%s]" % (self.dbclass.__name__, self.pkid, fullpath) + print "Exception: ", e + session.rollback() + + session.close() + + @classmethod + def scan_all(class_, scantype='source', limit = None): + ''' + The class method scan_all() scans all sources using multiple threads. + The number of sources to be scanned can be limited with the limit + argument. Returns the number of processed and remaining files as a + dict. + ''' + session = DBConn().session() + if scantype == 'source': + dbclass = DBSource + query = session.query(DBSource).filter(~DBSource.source_id.in_(session.query(SourceMetadata.source_id.distinct()))) + t = 'sources' + else: + # Otherwise binary + dbclass = DBBinary + query = session.query(DBBinary).filter(~DBBinary.binary_id.in_(session.query(BinaryMetadata.binary_id.distinct()))) + t = 'binaries' + + remaining = query.count + if limit is not None: + query = query.limit(limit) + processed = query.count() + pool = Pool(processes=10) + for obj in query.yield_per(100): + pool.apply_async(scan_helper, (dbclass, obj.pkid, )) + pool.close() + pool.join() + remaining = remaining() + session.close() + return { 'processed': processed, 'remaining': remaining , 'type': t} + +def scan_helper(dbclass, source_id): + ''' + This function runs in a subprocess. + ''' + scanner = MetadataScanner(dbclass, source_id) + scanner.scan() diff --git a/daklib/queue.py b/daklib/queue.py index dfbe3685..52483cca 100755 --- a/daklib/queue.py +++ b/daklib/queue.py @@ -54,6 +54,7 @@ from summarystats import SummaryStats from utils import parse_changes, check_dsc_files from textutils import fix_maintainer from lintian import parse_lintian_output, generate_reject_messages +from contents import UnpackedSource # suppress some deprecation warnings in squeeze related to apt_pkg # module @@ -1265,11 +1266,10 @@ class Upload(object): os.symlink(self.pkg.orig_files[orig_file]["path"], dest) # Extract the source - cmd = "dpkg-source -sn -x %s" % (dsc_filename) - (result, output) = commands.getstatusoutput(cmd) - if (result != 0): - self.rejects.append("'dpkg-source -x' failed for %s [return code: %s]." % (dsc_filename, result)) - self.rejects.append(utils.prefix_multi_line_string(output, " [dpkg-source output:] ")) + try: + unpacked = UnpackedSource(dsc_filename) + except: + self.rejects.append("'dpkg-source -x' failed for %s." % dsc_filename) return if not cnf.Find("Dir::Queue::BTSVersionTrack"): @@ -1281,19 +1281,19 @@ class Upload(object): upstr_version = re_strip_revision.sub('', upstr_version) # Ensure the changelog file exists - changelog_filename = "%s-%s/debian/changelog" % (self.pkg.dsc["source"], upstr_version) - if not os.path.exists(changelog_filename): + changelog_file = unpacked.get_changelog_file() + if changelog_file is None: self.rejects.append("%s: debian/changelog not found in extracted source." % (dsc_filename)) return # Parse the changelog self.pkg.dsc["bts changelog"] = "" - changelog_file = utils.open_file(changelog_filename) for line in changelog_file.readlines(): m = re_changelog_versions.match(line) if m: self.pkg.dsc["bts changelog"] += line changelog_file.close() + unpacked.cleanup() # Check we found at least one revision in the changelog if not self.pkg.dsc["bts changelog"]: @@ -2025,6 +2025,7 @@ distribution.""" print "Installing." self.logger.log(["installing changes", self.pkg.changes_file]) + binaries = [] poolfiles = [] # Add the .dsc file to the DB first @@ -2037,7 +2038,9 @@ distribution.""" # Add .deb / .udeb files to the DB (type is always deb, dbtype is udeb/deb) for newfile, entry in self.pkg.files.items(): if entry["type"] == "deb": - poolfiles.append(add_deb_to_db(self, newfile, session)) + b, pf = add_deb_to_db(self, newfile, session) + binaries.append(b) + poolfiles.append(pf) # If this is a sourceful diff only upload that is moving # cross-component we need to copy the .orig files into the new @@ -2122,6 +2125,18 @@ distribution.""" # Our SQL session will automatically start a new transaction after # the last commit + # Now ensure that the metadata has been added + # This has to be done after we copy the files into the pool + # For source if we have it: + if self.pkg.changes["architecture"].has_key("source"): + import_metadata_into_db(source, session) + + # Now for any of our binaries + for b in binaries: + import_metadata_into_db(b, session) + + session.commit() + # Move the .changes into the 'done' directory utils.move(self.pkg.changes_file, os.path.join(cnf["Dir::Queue::Done"], os.path.basename(self.pkg.changes_file))) diff --git a/docs/README.quotes b/docs/README.quotes index 6b89fc0f..2b159a8f 100644 --- a/docs/README.quotes +++ b/docs/README.quotes @@ -347,10 +347,3 @@ Canadians: This is a lighthouse. Your call. I wish they wouldnt leave biscuits out, thats just tempting. Damnit. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -< mvo> that screams for consolidation in libapt at least (that then in turn can use libdpkg … ) - I guess the "d" means delayed ;) - -(whilst discussing adding xz support to dak, and therefore python-apt, and -therefore libapt-pkg) - diff --git a/tests/dbtest_contents.py b/tests/dbtest_contents.py index 158ec892..90fe4966 100755 --- a/tests/dbtest_contents.py +++ b/tests/dbtest_contents.py @@ -1,12 +1,13 @@ #!/usr/bin/env python -from db_test import DBDakTestCase +from db_test import DBDakTestCase, fixture from daklib.dbconn import * -from daklib.contents import ContentsWriter, ContentsScanner +from daklib.contents import ContentsWriter, ContentsScanner, UnpackedSource from os.path import normpath from sqlalchemy.exc import FlushError, IntegrityError +from subprocess import CalledProcessError import unittest class ContentsTestCase(DBDakTestCase): @@ -172,6 +173,21 @@ class ContentsTestCase(DBDakTestCase): self.assertEqual('usr/bin/hello', bin_contents_list[0].file) self.assertEqual('usr/share/doc/hello/copyright', bin_contents_list[1].file) + def test_unpack(self): + ''' + Tests the UnpackedSource class. + ''' + self.setup_poolfiles() + dscfilename = fixture('ftp/pool/' + self.file['hello_2.2-1.dsc'].filename) + unpacked = UnpackedSource(dscfilename) + self.assertTrue(len(unpacked.get_root_directory()) > 0) + self.assertEqual('hello (2.2-1) unstable; urgency=low\n', + unpacked.get_changelog_file().readline()) + all_filenames = set(unpacked.get_all_filenames()) + self.assertEqual(8, len(all_filenames)) + self.assertTrue('debian/rules' in all_filenames) + self.assertRaises(CalledProcessError, lambda: UnpackedSource('invalidname')) + def classes_to_clean(self): return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \ OverrideType, Maintainer, Component, Priority, PoolFile] diff --git a/tests/dbtest_ormobject.py b/tests/dbtest_ormobject.py index d1c72de4..0790e4c7 100755 --- a/tests/dbtest_ormobject.py +++ b/tests/dbtest_ormobject.py @@ -3,6 +3,7 @@ from db_test import DBDakTestCase from daklib.dbconn import Architecture, Suite +from daklib.dak_exceptions import DBUpdateError try: # python >= 2.6 @@ -35,5 +36,10 @@ class ORMObjectTestCase(DBDakTestCase): architecture.suites = [sid, squeeze] self.assertTrue(re.search('"suites_count": 2', str(architecture))) + def test_validation(self): + suite = Suite() + self.session.add(suite) + self.assertRaises(DBUpdateError, self.session.flush) + if __name__ == '__main__': unittest.main() diff --git a/tests/dbtest_packages.py b/tests/dbtest_packages.py index 2b179053..f2587709 100755 --- a/tests/dbtest_packages.py +++ b/tests/dbtest_packages.py @@ -328,7 +328,7 @@ class PackageTestCase(DBDakTestCase): 'sha1sum': 'deadbeef', 'sha256sum': 'deadbeef'} upload = Upload(pkg) - poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session) + bin, poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session) self.session.refresh(poolfile) self.session.refresh(poolfile.binary) self.assertEqual('main/h/hello/hello_2.2-2_i386.deb', poolfile.filename) diff --git a/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.debian.tar.gz b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.debian.tar.gz new file mode 100644 index 00000000..c185f1bb Binary files /dev/null and b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.debian.tar.gz differ diff --git a/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.dsc b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.dsc new file mode 100644 index 00000000..f564ce44 --- /dev/null +++ b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1.dsc @@ -0,0 +1,13 @@ +Format: 3.0 (quilt) +Source: hello +Version: 2.2-1 +Maintainer: Mr. Me +Checksums-Sha1: + 9613ac479ddb6bca7f3ec5436b27ab983733b963 147 hello_2.2.orig.tar.gz + 97cfabb792685ac19c1ddc03f7d4aa1022f626e1 462 hello_2.2-1.debian.tar.gz +Checksums-Sha256: + b041547e956f091a46030f133b6e47af15bc836771540118fec98d0913602ce0 147 hello_2.2.orig.tar.gz + da28e21cbae014b915abc8afc4be1c0b8e5148b78802dce815d5342e80cd52e7 462 hello_2.2-1.debian.tar.gz +Files: + cc4b081e2697fca88c87986b1cad905f 147 hello_2.2.orig.tar.gz + d7bdb277cbdbaad4ab700c6d5cee9b54 462 hello_2.2-1.debian.tar.gz diff --git a/tests/fixtures/ftp/pool/main/h/hello/hello_2.2.orig.tar.gz b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2.orig.tar.gz new file mode 100644 index 00000000..f3fbc183 Binary files /dev/null and b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2.orig.tar.gz differ