From: Mike O'Connor Date: Mon, 9 Feb 2009 07:46:48 +0000 (-0500) Subject: Merge branch 'psycopg2' into content_generation X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=cd5b29ddfd8de263c085f494b9573d683913f6f3;hp=574fb13e8dc253e39da7b6259617aafa74c1ec2d;p=dak.git Merge branch 'psycopg2' into content_generation Conflicts: daklib/database.py Signed-off-by: Mike O'Connor --- diff --git a/dak/dak.py b/dak/dak.py index a08f20e0..fc22ec5c 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -112,6 +112,8 @@ def init(): "Generate package <-> file mapping"), ("generate-releases", "Generate Release files"), + ("generate-contents", + "Generate contest files"), ("generate-index-diffs", "Generate .diff/Index files"), ("clean-suites", @@ -143,6 +145,8 @@ def init(): "Check for users with no packages in the archive"), ("import-archive", "Populate SQL database based from an archive tree"), + ("import-contents", + "Populate SQL database with Contents files"), ("import-keyring", "Populate fingerprint/uid table based on a new/updated keyring"), ("import-ldap-fingerprints", diff --git a/dak/dakdb/update2.py b/dak/dakdb/update2.py index 71b43fa7..e411662c 100755 --- a/dak/dakdb/update2.py +++ b/dak/dakdb/update2.py @@ -26,11 +26,13 @@ import psycopg2, time ################################################################################ def do_update(self): +vvvvvvvvvvvvvvvvvvvv print "Note: to be able to enable the the PL/Perl (plperl) procedural language, we do" print "need postgresql-plperl-$postgres-version installed. Make sure that this is the" print "case before you continue. Interrupt if it isn't, sleeping 5 seconds now." print "(We need to be database superuser for this to work!)" time.sleep (5) +^^^^^^^^^^^^^^^^^^^^ try: c = self.db.cursor() diff --git a/dak/dakdb/update4.py b/dak/dakdb/update4.py new file mode 100644 index 00000000..31160766 --- /dev/null +++ b/dak/dakdb/update4.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Debian Archive Kit Database Update Script +Copyright © 2008 Michael Casadevall +Copyright © 2008 Roger Leigh + +Debian Archive Kit Database Update Script 2 +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# really, if we want to screw ourselves, let's find a better way. +# rm -rf /srv/ftp.debian.org + +################################################################################ + +import psycopg2, time + +################################################################################ + +def do_update(self): + print "Adding content fields to database" + + try: + c = self.db.cursor() + c.execute("""CREATE TABLE content_file_paths ( + id serial primary key not null, + path text unique not null + )""") + + c.execute("""CREATE TABLE content_file_names ( + id serial primary key not null, + file text unique not null + )""") + + c.execute("""CREATE TABLE content_associations ( + id serial not null, + binary_pkg int4 not null references binaries(id) on delete cascade, + filepath int4 not null references content_file_paths(id) on delete cascade, + filename int4 not null references content_file_names(id) on delete cascade + );""") + + c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text + AS $_$select case + WHEN $2 is null or $2 = '' THEN $1 + WHEN $1 is null or $1 = '' THEN $2 + ELSE $1 || ',' || $2 + END$_$ + LANGUAGE sql""") + + c.execute("""CREATE AGGREGATE comma_separated_list ( + BASETYPE = text, + SFUNC = comma_concat, + STYPE = text, + INITCOND = '' + );""") + + c.execute("UPDATE config SET value = '2' WHERE name = 'db_revision'") + self.db.commit() + + print "REMINDER: Remember to fully regenerate the Contents files before running import-contents" + print "" + print "Pausing for five seconds ..." + time.sleep (5) + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + print "FATAL: Unable to apply debversion table update 2!" + print "Error Message: " + str(msg) + print "Database changes have been rolled back." diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 683b1191..07258018 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -30,7 +30,7 @@ ############################################################################### import errno, fcntl, os, sys, time, re -import apt_pkg +import apt_pkg, tarfile, commands from daklib import database from daklib import logging from daklib import queue @@ -97,8 +97,10 @@ class Urgency_Log: else: os.unlink(self.log_filename) + ############################################################################### + def reject (str, prefix="Rejected: "): global reject_message if str: @@ -372,6 +374,13 @@ def install (): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) + # insert contents into the database + contents = utils.generate_contents_information(file) + q = projectB.query("SELECT currval('binaries_id_seq')") + bin_id = int(q.getresult()[0][0]) + for file in contents: + database.insert_content_path(bin_id, file) + # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the # "Directory:" field in the Sources.gz file) works. @@ -434,7 +443,6 @@ def install (): utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest) for dest in copy_dot_dak.keys(): utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest) - projectB.query("COMMIT WORK") # Move the .changes into the 'done' directory diff --git a/dak/update_db.py b/dak/update_db.py index f9b6e478..8bb88f65 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -37,7 +37,7 @@ from daklib import utils Cnf = None projectB = None -required_database_schema = 3 +required_database_schema = 4 ################################################################################ diff --git a/daklib/database.py b/daklib/database.py index 1882ad8b..3cbb67b7 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -53,6 +53,11 @@ fingerprint_id_cache = {} #: cache for fingerprints queue_id_cache = {} #: cache for queues uid_id_cache = {} #: cache for uids suite_version_cache = {} #: cache for suite_versions (packages) +suite_bin_version_cache = {} +content_path_id_cache = {} +content_file_id_cache = {} +insert_contents_file_cache = {} +cache_preloaded = False ################################################################################ @@ -387,6 +392,7 @@ def get_suite_version(source, suite): @return: the version for I{source} in I{suite} """ + global suite_version_cache cache_key = "%s_%s" % (source, suite) @@ -409,6 +415,50 @@ def get_suite_version(source, suite): return version +def get_latest_binary_version_id(binary, section, suite, arch): + global suite_bin_version_cache + cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch) + cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all")) + + # Check for the cache hit for its arch, then arch all + if suite_bin_version_cache.has_key(cache_key): + return suite_bin_version_cache[cache_key] + if suite_bin_version_cache.has_key(cache_key_all): + return suite_bin_version_cache[cache_key_all] + if cache_preloaded == True: + return # package does not exist + + q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section))) + + if not q.getresult(): + return False + + highest_bid = q.getresult()[0][0] + + suite_bin_version_cache[cache_key] = highest_bid + return highest_bid + +def preload_binary_id_cache(): + global suite_bin_version_cache, cache_preloaded + + # Get suite info + q = projectB.query("SELECT id FROM suite") + suites = q.getresult() + + # Get arch mappings + q = projectB.query("SELECT id FROM architecture") + arches = q.getresult() + + for suite in suites: + for arch in arches: + q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0]))) + + for bi in q.getresult(): + cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0]) + suite_bin_version_cache[cache_key] = int(bi[0]) + + cache_preloaded = True + ################################################################################ def get_or_set_maintainer_id (maintainer): @@ -730,3 +780,59 @@ def get_suites(pkgname, src=False): q = projectB.query(sql) return map(lambda x: x[0], q.getresult()) + +################################################################################ + +def get_or_set_contents_file_id(file): + global content_file_id_cache + + if not content_file_id_cache.has_key(file): + sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file + q = projectB.query(sql_select) + if not q.getresult(): + # since this can be called within a transaction, we can't use currval + q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file)) + content_file_id_cache[file] = int(q.getresult()[0][0]) + return content_file_id_cache[file] + +################################################################################ + +def get_or_set_contents_path_id(path): + global content_path_id_cache + + if not content_path_id_cache.has_key(path): + sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path + q = projectB.query(sql_select) + if not q.getresult(): + # since this can be called within a transaction, we can't use currval + q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path)) + content_path_id_cache[path] = int(q.getresult()[0][0]) + return content_path_id_cache[path] + +################################################################################ + +def insert_content_path(bin_id, fullpath): + global insert_contents_file_cache + cache_key = "%s_%s" % (bin_id, fullpath) + + # have we seen this contents before? + # probably only revelant during package import + if insert_contents_file_cache.has_key(cache_key): + return + + # split the path into basename, and pathname + (path, file) = os.path.split(fullpath) + + # Get the necessary IDs ... + file_id = get_or_set_contents_file_id(file) + path_id = get_or_set_contents_path_id(path) + + # Determine if we're inserting a duplicate row + q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) + if q.getresult(): + # Yes we are, return without doing the insert + return + + # Put them into content_assiocations + projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) + return diff --git a/daklib/projectb.py b/daklib/projectb.py new file mode 100644 index 00000000..8222ab1d --- /dev/null +++ b/daklib/projectb.py @@ -0,0 +1,129 @@ +#!/usr/bin/python + +""" +Class providing access to a projectb database + +This class provides convenience functions for common queries to a +projectb database using psycopg2. + +Copyright (C) 2009 Mike O'Connor +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import psycopg2 + +################################################################################ + +class Projectb(object): + """ + Object providing methods for accessing the projectb database + """ + def __init__(self,Cnf): + connect_str = "dbname=%s"% (Cnf["DB::Name"]) + if Cnf["DB::Host"] != '': connect_str += " host=%s" % (Cnf["DB::Host"]) + if Cnf["DB::Port"] != '-1': connect_str += " port=%d" % (int(Cnf["DB::Port"])) + + self.dbh = psycopg2.connect(connect_str) + self.suite_id_cache = {} + self.architecture_id_cache = {} + self.section_id_cache = {} + + def get_suite_id(self, suite_name): + """ + return the id for the given suite_name + + @param suite_name: name of a suite such as "unsatble" or "testing" + + @rtype: int + @return: id of given suite or None if suite_name not matched + + >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' } + >>> pb = Projectb( Cnf ) + >>> pb.get_suite_id("unstable") + 5 + >>> pb.get_suite_id("n'existe pas") + """ + if not self.suite_id_cache.has_key(suite_name): + c = self.dbh.cursor() + c.execute("SELECT id FROM suite WHERE suite_name=%(suite_name)s", + {'suite_name':suite_name}) + r = c.fetchone() + if r: + self.suite_id_cache[suite_name] = r[0] + else: + self.suite_id_cache[suite_name] = None + + return self.suite_id_cache[suite_name] + + def get_architecture_id(self, architecture_name): + """ + return the id for the given architecture_name + + @param architecture_name: name of a architecture such as "i386" or "source" + + @rtype: int + @return: id of given architecture or None if architecture_name not matched + + >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' } + >>> pb = Projectb( Cnf ) + >>> pb.get_architecture_id("i386") + 7 + >>> pb.get_architecture_id("n'existe pas") + """ + if not self.architecture_id_cache.has_key(architecture_name): + c = self.dbh.cursor() + c.execute("SELECT id FROM architecture WHERE arch_string=%(architecture_name)s", + {'architecture_name':architecture_name}) + r = c.fetchone() + if r: + self.architecture_id_cache[architecture_name] = r[0] + else: + self.architecture_id_cache[architecture_name] = None + + return self.architecture_id_cache[architecture_name] + + def get_section_id(self, section_name): + """ + return the id for the given section_name + + @param section_name: name of a section such as "x11" or "non-free/libs" + + @rtype: int + @return: id of given section or None if section_name not matched + + >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' } + >>> pb = Projectb( Cnf ) + >>> pb.get_section_id("non-free/libs") + 285 + >>> pb.get_section_id("n'existe pas") + """ + if not self.section_id_cache.has_key(section_name): + c = self.dbh.cursor() + c.execute("SELECT id FROM section WHERE section=%(section_name)s", + {'section_name':section_name}) + r = c.fetchone() + if r: + self.section_id_cache[section_name] = r[0] + else: + self.section_id_cache[section_name] = None + + return self.section_id_cache[section_name] + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/daklib/utils.py b/daklib/utils.py index 7b822b9d..5e362796 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -1436,3 +1436,52 @@ if which_conf_file() != default_config: apt_pkg.ReadConfigFileISC(Cnf,which_conf_file()) ################################################################################ + +def generate_contents_information(filename): + """ + Generate a list of flies contained in a .deb + + @type filename: string + @param filename: the path to a .deb + + @rtype: list + @return: a list of files in the data.tar.* portion of the .deb + """ + cmd = "ar t %s" % (filename) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Ugh ... this is ugly ... Code ripped from process_unchecked.py + chunks = output.split('\n') + + contents = [] + try: + cmd = "ar x %s %s" % (filename, chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: '%s' invocation failed." % (filename, cmd)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Got deb tarballs, now lets go through and determine what bits + # and pieces the deb had ... + if chunks[2] == "data.tar.gz": + data = tarfile.open("data.tar.gz", "r:gz") + elif data_tar == "data.tar.bz2": + data = tarfile.open("data.tar.bz2", "r:bz2") + else: + os.remove(chunks[2]) + reject("couldn't find data.tar.*") + + for tarinfo in data: + if not tarinfo.isdir(): + contents.append(tarinfo.name[2:]) + + finally: + if os.path.exists( chunks[2] ): + os.remove( chunks[2] ) + + return contents + +############################################################################### diff --git a/docs/README.quotes b/docs/README.quotes index 3568ae7a..c696fbeb 100644 --- a/docs/README.quotes +++ b/docs/README.quotes @@ -344,3 +344,9 @@ Canadians: This is a lighthouse. Your call. elmo: I can't believe people pay you to fix computers %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +* Ganneff ponders how to best write the text to -devel. (need to tell em in + case they find more bugs). "We fixed the fucking idiotic broken implementation + to be less so" is probably not the nicest, even if perfect valid, way to say so + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%