From: Joerg Jaspert Date: Wed, 23 Mar 2011 21:25:26 +0000 (+0100) Subject: Merge remote-tracking branch 'ansgar/fix-update-db' into merge X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=4f29e33412c9bf42023483ef6f6e8dcf2fbe0dc5;hp=1f1626eaa5ae67f461d38d07e54fb3dba51b314a;p=dak.git Merge remote-tracking branch 'ansgar/fix-update-db' into merge * ansgar/fix-update-db: Fix dak update-db Signed-off-by: Joerg Jaspert --- diff --git a/config/backports/dak.conf b/config/backports/dak.conf index 16745748..d97c52ba 100644 --- a/config/backports/dak.conf +++ b/config/backports/dak.conf @@ -243,9 +243,13 @@ Dir DB { - Name "backports"; - Host ""; - Port -1; + Service "backports"; + // PoolSize should be at least ThreadCount + 1 + PoolSize 5; + // MaxOverflow shouldn't exceed postgresql.conf's max_connections - PoolSize + MaxOverflow 13; + // should be false for encoding == SQL_ASCII + Unicode "false" }; SuiteMappings diff --git a/dak/dak.py b/dak/dak.py index 5a659d8c..ad99a5a0 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -86,6 +86,8 @@ def init(): "Generate Packages/Sources files"), ("contents", "Generate content files"), + ("metadata", + "Load data for packages/sources files"), ("generate-index-diffs", "Generate .diff/Index files"), ("clean-suites", diff --git a/dak/dakdb/update49.py b/dak/dakdb/update49.py new file mode 100755 index 00000000..5ff1545c --- /dev/null +++ b/dak/dakdb/update49.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Permission fixups + +@contact: Debian FTP Master +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import psycopg2 +from daklib.dak_exceptions import DBUpdateError +from socket import gethostname; + +################################################################################ +def do_update(self): + """ + Fix up permissions + """ + print __doc__ + try: + c = self.db.cursor() + + c.execute("GRANT SELECT, UPDATE, INSERT ON binaries_metadata TO ftpmaster") + c.execute("GRANT SELECT ON binaries_metadata TO public") + c.execute("GRANT USAGE ON metadata_keys_key_id_seq TO ftpmaster") + c.execute("GRANT SELECT, UPDATE, INSERT ON source_metadata TO ftpmaster") + c.execute("GRANT SELECT ON source_metadata TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON metadata_keys TO ftpmaster") + c.execute("GRANT SELECT ON metadata_keys TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON extra_src_references TO ftpmaster") + c.execute("GRANT SELECT ON extra_src_references TO public") + c.execute("GRANT SELECT, UPDATE, INSERT ON src_contents TO ftpmaster") + c.execute("GRANT SELECT ON src_contents TO public") + c.execute("GRANT USAGE ON changelogs_text_id_seq TO ftpmaster") + c.execute("GRANT SELECT ON changes_pending_files_map TO public") + c.execute("GRANT SELECT ON config TO public") + + c.execute("UPDATE config SET value = '49' WHERE name = 'db_revision'") + self.db.commit() + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + raise DBUpdateError, 'Unable to apply sick update 49, rollback issued. Error message : %s' % (str(msg)) diff --git a/dak/metadata.py b/dak/metadata.py new file mode 100755 index 00000000..f40c9431 --- /dev/null +++ b/dak/metadata.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +""" +Import data for Package/Sources files from .deb and .dsc files +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# < mvo> that screams for consolidation in libapt at least (that then in turn can +# use libdpkg ... ) - I guess the "d" means delayed ;) + +# (whilst discussing adding xz support to dak, and therefore python-apt, and +# therefore libapt-pkg) + +################################################################################ + +import sys +import apt_pkg + +from daklib.config import Config +from daklib.dbconn import * +from daklib.metadata import MetadataScanner +from daklib import daklog +from daklib import utils + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak metadata [options] subcommand + +SUBCOMMANDS + scan-source + scan the dsc files in the existing pool and load metadata into the database + + scan-binary + scan the deb files in the existing pool and load metadata into the database + +OPTIONS + -h, --help + show this help and exit + +OPTIONS for scan + -l, --limit=NUMBER + maximum number of items to scan +""" + sys.exit(exit_code) + +################################################################################ + +def scan_all(cnf, mode, limit): + Logger = daklog.Logger(cnf.Cnf, 'metadata scan (%s)' % mode) + result = MetadataScanner.scan_all(mode, limit) + processed = '%(processed)d %(type)s processed' % result + remaining = '%(remaining)d %(type)s remaining' % result + Logger.log([processed, remaining]) + Logger.close() + +################################################################################ + +def main(): + cnf = Config() + cnf['Metadata::Options::Help'] = '' + cnf['Metadata::Options::Suite'] = '' + cnf['Metadata::Options::Limit'] = '' + cnf['Metadata::Options::Force'] = '' + arguments = [('h', "help", 'Metadata::Options::Help'), + ('s', "suite", 'Metadata::Options::Suite', "HasArg"), + ('l', "limit", 'Metadata::Options::Limit', "HasArg"), + ('f', "force", 'Metadata::Options::Force'), + ] + args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments, sys.argv) + options = cnf.SubTree('Metadata::Options') + + if (len(args) != 1) or options['Help']: + usage() + + limit = None + if len(options['Limit']) > 0: + limit = int(options['Limit']) + + if args[0] == 'scan-source': + scan_all(cnf, 'source', limit) + return + elif args[0] == 'scan-binary': + scan_all(cnf, 'binary', limit) + return + + suite_names = utils.split_args(options['Suite']) + + force = bool(options['Force']) + + if args[0] == 'generate': + raise NotImplementError + + usage() + + +if __name__ == '__main__': + main() diff --git a/dak/update_db.py b/dak/update_db.py index b2ca7a86..77d00976 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -46,7 +46,7 @@ from daklib.daklog import Logger ################################################################################ Cnf = None -required_database_schema = 48 +required_database_schema = 49 ################################################################################ diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 6782c081..98b6c7d5 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -492,6 +492,10 @@ class DBBinary(ORMObject): self.poolfile = poolfile self.binarytype = binarytype + @property + def pkid(self): + return self.binary_id + def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ @@ -529,6 +533,33 @@ class DBBinary(ORMObject): dpkg.stdout.close() dpkg.wait() + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: text + @return: stanza text of the control section. + ''' + import apt_inst + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file) + deb_file.close() + + return stanza + + def read_control_fields(self): + ''' + Reads the control information from a binary and return + as a dictionary. + + @rtype: dict + @return: fields of the control section as a dictionary. + ''' + import apt_pkg + stanza = self.read_control() + return apt_pkg.TagSection(stanza) + __all__.append('DBBinary') @session_wrapper @@ -2157,6 +2188,60 @@ __all__.append('get_sections') ################################################################################ +from debian.debfile import Deb822 + +# Temporary Deb822 subclass to fix bugs with : handling; see #597249 +class Dak822(Deb822): + def _internal_parser(self, sequence, fields=None): + # The key is non-whitespace, non-colon characters before any colon. + key_part = r"^(?P[^: \t\n\r\f\v]+)\s*:\s*" + single = re.compile(key_part + r"(?P\S.*?)\s*$") + multi = re.compile(key_part + r"$") + multidata = re.compile(r"^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = m.group('data') + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] = content + + class DBSource(ORMObject): def __init__(self, source = None, version = None, maintainer = None, \ changedby = None, poolfile = None, install_date = None): @@ -2167,6 +2252,10 @@ class DBSource(ORMObject): self.poolfile = poolfile self.install_date = install_date + @property + def pkid(self): + return self.source_id + def properties(self): return ['source', 'source_id', 'maintainer', 'changedby', \ 'fingerprint', 'poolfile', 'version', 'suites_count', \ @@ -2176,6 +2265,17 @@ class DBSource(ORMObject): return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control_fields(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: fields is the dsc information in a dictionary form + ''' + fullpath = self.poolfile.fullpath + fields = Dak822(open(self.poolfile.fullpath, 'r')) + return fields + metadata = association_proxy('key', 'value') __all__.append('DBSource') @@ -2321,6 +2421,34 @@ def get_source_in_suite(source, suite, session=None): __all__.append('get_source_in_suite') +@session_wrapper +def import_metadata_into_db(obj, session=None): + """ + This routine works on either DBBinary or DBSource objects and imports + their metadata into the database + """ + fields = obj.read_control_fields() + for k in fields.keys(): + try: + # Try raw ASCII + val = str(fields[k]) + except UnicodeEncodeError: + # Fall back to UTF-8 + try: + val = fields[k].encode('utf-8') + except UnicodeEncodeError: + # Finally try iso8859-1 + val = fields[k].encode('iso8859-1') + # Otherwise we allow the exception to percolate up and we cause + # a reject as someone is playing silly buggers + + obj.metadata[get_or_set_metadatakey(k, session)] = val + + session.commit_or_flush() + +__all__.append('import_metadata_into_db') + + ################################################################################ @session_wrapper @@ -2497,7 +2625,7 @@ def add_deb_to_db(u, filename, session=None): # session.rollback() # raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename) - return poolfile + return bin, poolfile __all__.append('add_deb_to_db') @@ -2820,6 +2948,38 @@ class MetadataKey(ORMObject): __all__.append('MetadataKey') +@session_wrapper +def get_or_set_metadatakey(keyname, session=None): + """ + Returns MetadataKey object for given uidname. + + If no matching keyname is found, a row is inserted. + + @type uidname: string + @param uidname: The keyname to add + + @type session: SQLAlchemy + @param session: Optional SQL session object (a temporary one will be + generated if not supplied). If not passed, a commit will be performed at + the end of the function, otherwise the caller is responsible for commiting. + + @rtype: MetadataKey + @return: the metadatakey object for the given keyname + """ + + q = session.query(MetadataKey).filter_by(key=keyname) + + try: + ret = q.one() + except NoResultFound: + ret = MetadataKey(keyname) + session.add(ret) + session.commit_or_flush() + + return ret + +__all__.append('get_or_set_metadatakey') + ################################################################################ class BinaryMetadata(ORMObject): diff --git a/daklib/metadata.py b/daklib/metadata.py new file mode 100755 index 00000000..d88cf4fa --- /dev/null +++ b/daklib/metadata.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +""" +Helper code for packages and sources generation. + +@contact: Debian FTPMaster +@copyright: 2011 Torsten Werner +@copyright: 2011 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +from daklib.dbconn import * +from daklib.config import Config + +from multiprocessing import Pool +from subprocess import Popen, PIPE + +import os.path + +class MetadataScanner(object): + ''' + MetadataScanner provides a threadsafe method scan() to scan the metadata of + a DBSource or DBBinary object depending on what is passed as dbclass''' + + def __init__(self, dbclass, pkid, verbose=True): + ''' + The argument binary_id is the id of the DBBinary object that + + should be scanned. + ''' + self.verbose = True + self.dbclass = dbclass + self.pkid = pkid + + def scan(self, dummy_arg = None): + ''' + This method does the actual scan and fills in the associated metadata + property. It commits any changes to the database. The argument dummy_arg + is ignored but needed by our threadpool implementation. + ''' + obj = None + fullpath = 'UNKNOWN PATH' + + session = DBConn().session() + try: + obj = session.query(self.dbclass).get(self.pkid) + fullpath = obj.poolfile.fullpath + import_metadata_into_db(obj, session=session) + if self.verbose: + print "Imported %s (%s)" % (self.pkid, fullpath) + session.commit() + except Exception, e: + print "Failed to import %s [id=%s; fullpath=%s]" % (self.dbclass.__name__, self.pkid, fullpath) + print "Exception: ", e + session.rollback() + + session.close() + + @classmethod + def scan_all(class_, scantype='source', limit = None): + ''' + The class method scan_all() scans all sources using multiple threads. + The number of sources to be scanned can be limited with the limit + argument. Returns the number of processed and remaining files as a + dict. + ''' + session = DBConn().session() + if scantype == 'source': + dbclass = DBSource + query = session.query(DBSource).filter(~DBSource.source_id.in_(session.query(SourceMetadata.source_id.distinct()))) + t = 'sources' + else: + # Otherwise binary + dbclass = DBBinary + query = session.query(DBBinary).filter(~DBBinary.binary_id.in_(session.query(BinaryMetadata.binary_id.distinct()))) + t = 'binaries' + + remaining = query.count + if limit is not None: + query = query.limit(limit) + processed = query.count() + pool = Pool(processes=10) + for obj in query.yield_per(100): + pool.apply_async(scan_helper, (dbclass, obj.pkid, )) + pool.close() + pool.join() + remaining = remaining() + session.close() + return { 'processed': processed, 'remaining': remaining , 'type': t} + +def scan_helper(dbclass, source_id): + ''' + This function runs in a subprocess. + ''' + scanner = MetadataScanner(dbclass, source_id) + scanner.scan() diff --git a/daklib/queue.py b/daklib/queue.py index b4c62d38..52483cca 100755 --- a/daklib/queue.py +++ b/daklib/queue.py @@ -2025,6 +2025,7 @@ distribution.""" print "Installing." self.logger.log(["installing changes", self.pkg.changes_file]) + binaries = [] poolfiles = [] # Add the .dsc file to the DB first @@ -2037,7 +2038,9 @@ distribution.""" # Add .deb / .udeb files to the DB (type is always deb, dbtype is udeb/deb) for newfile, entry in self.pkg.files.items(): if entry["type"] == "deb": - poolfiles.append(add_deb_to_db(self, newfile, session)) + b, pf = add_deb_to_db(self, newfile, session) + binaries.append(b) + poolfiles.append(pf) # If this is a sourceful diff only upload that is moving # cross-component we need to copy the .orig files into the new @@ -2122,6 +2125,18 @@ distribution.""" # Our SQL session will automatically start a new transaction after # the last commit + # Now ensure that the metadata has been added + # This has to be done after we copy the files into the pool + # For source if we have it: + if self.pkg.changes["architecture"].has_key("source"): + import_metadata_into_db(source, session) + + # Now for any of our binaries + for b in binaries: + import_metadata_into_db(b, session) + + session.commit() + # Move the .changes into the 'done' directory utils.move(self.pkg.changes_file, os.path.join(cnf["Dir::Queue::Done"], os.path.basename(self.pkg.changes_file))) diff --git a/docs/README.quotes b/docs/README.quotes index 6b89fc0f..2b159a8f 100644 --- a/docs/README.quotes +++ b/docs/README.quotes @@ -347,10 +347,3 @@ Canadians: This is a lighthouse. Your call. I wish they wouldnt leave biscuits out, thats just tempting. Damnit. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -< mvo> that screams for consolidation in libapt at least (that then in turn can use libdpkg … ) - I guess the "d" means delayed ;) - -(whilst discussing adding xz support to dak, and therefore python-apt, and -therefore libapt-pkg) - diff --git a/tests/dbtest_packages.py b/tests/dbtest_packages.py index 2b179053..f2587709 100755 --- a/tests/dbtest_packages.py +++ b/tests/dbtest_packages.py @@ -328,7 +328,7 @@ class PackageTestCase(DBDakTestCase): 'sha1sum': 'deadbeef', 'sha256sum': 'deadbeef'} upload = Upload(pkg) - poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session) + bin, poolfile = add_deb_to_db(upload, 'hello_2.2-2_i386.deb', self.session) self.session.refresh(poolfile) self.session.refresh(poolfile.binary) self.assertEqual('main/h/hello/hello_2.2-2_i386.deb', poolfile.filename)