From: Mike O'Connor Date: Wed, 18 Feb 2009 13:51:30 +0000 (-0500) Subject: Contents generation should be working now X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=1483a68353fe374254379f4560cc245fbf2523d4;p=dak.git Contents generation should be working now * rename mhy's daklib/Foo.py to daklib/foo.py * add temporary tables which are populated during p-u * copy from the temporary tables during p-a Signed-off-by: Mike O'Connor --- diff --git a/dak/contents.py b/dak/contents.py index 5bb1b147..2e1d05dc 100644 --- a/dak/contents.py +++ b/dak/contents.py @@ -215,6 +215,7 @@ class Contents(object): WHERE ca.id IS NULL)""" ); cursor.execute( "COMMIT" ) + def bootstrap(self): """ scan the existing debs in the pool to populate the contents database tables diff --git a/dak/dak.py b/dak/dak.py index 638f3dfa..aa6efdee 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -171,6 +171,8 @@ def init(): "Split queue/done into a date-based hierarchy"), ("stats", "Generate statistics"), + ("calculate-shasums", + "Calculate missing sha1sums and sha256sums"), ("bts-categorize", "Categorize uncategorized bugs filed against ftp.debian.org"), ] diff --git a/dak/dakdb/update4.py b/dak/dakdb/update4.py index f6a2db06..f707a311 100644 --- a/dak/dakdb/update4.py +++ b/dak/dakdb/update4.py @@ -56,6 +56,14 @@ def do_update(self): filename int4 not null references content_file_names(id) on delete cascade );""") + c.execute("""CREATE TABLE temp_content_associations ( + id serial not null, + package text not null, + version debversion not null, + filepath int4 not null references content_file_paths(id) on delete cascade, + filename int4 not null references content_file_names(id) on delete cascade + );""") + c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text AS $_$select case WHEN $2 is null or $2 = '' THEN $1 diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 941acd95..e597a8e2 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -374,12 +374,9 @@ def install (): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) - # insert contents into the database - contents = utils.generate_contents_information(file) - q = projectB.query("SELECT currval('binaries_id_seq')") - bin_id = int(q.getresult()[0][0]) - for file in contents: - database.insert_content_path(bin_id, file) + + if not database.copy_temporary_contents(package, version, files[newfile]): + reject("Missing contents for package") # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index 1ec04563..5097b24a 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -28,9 +28,11 @@ ################################################################################ -import commands, errno, fcntl, os, re, shutil, stat, sys, time, tempfile, traceback +import commands, errno, fcntl, os, re, shutil, stat, sys, time, tempfile, traceback, tarfile import apt_inst, apt_pkg -from daklib import database +from debian_bundle import deb822 +from daklib.dbconn import DBConn +from daklib.binary import Binary from daklib import logging from daklib import queue from daklib import utils @@ -121,6 +123,16 @@ def reject (str, prefix="Rejected: "): ################################################################################ +def create_tmpdir(): + """ + Create a temporary directory that can be used for unpacking files into for + checking + """ + tmpdir = tempfile.mkdtemp() + return tmpdir + +################################################################################ + def copy_to_holding(filename): global in_holding @@ -322,33 +334,6 @@ def check_distributions(): ################################################################################ -def check_deb_ar(filename): - """ - Sanity check the ar of a .deb, i.e. that there is: - - 1. debian-binary - 2. control.tar.gz - 3. data.tar.gz or data.tar.bz2 - - in that order, and nothing else. - """ - cmd = "ar t %s" % (filename) - (result, output) = commands.getstatusoutput(cmd) - if result != 0: - reject("%s: 'ar t' invocation failed." % (filename)) - reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") - chunks = output.split('\n') - if len(chunks) != 3: - reject("%s: found %d chunks, expected 3." % (filename, len(chunks))) - if chunks[0] != "debian-binary": - reject("%s: first chunk is '%s', expected 'debian-binary'." % (filename, chunks[0])) - if chunks[1] != "control.tar.gz": - reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (filename, chunks[1])) - if chunks[2] not in [ "data.tar.bz2", "data.tar.gz" ]: - reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (filename, chunks[2])) - -################################################################################ - def check_files(): global reprocess @@ -387,6 +372,19 @@ def check_files(): has_binaries = 0 has_source = 0 + cursor = DBConn().cursor() + # Check for packages that have moved from one component to another + # STU: this should probably be changed to not join on architecture, suite tables but instead to used their cached name->id mappings from DBConn + cursor.execute("""PREPARE moved_pkg_q AS + SELECT c.name FROM binaries b, bin_associations ba, suite s, location l, + component c, architecture a, files f + WHERE b.package = $1 AND s.suite_name = $2 + AND (a.arch_string = $3 OR a.arch_string = 'all') + AND ba.bin = b.id AND ba.suite = s.id AND b.architecture = a.id + AND f.location = l.id + AND l.component = c.id + AND b.file = f.id""") + for f in file_keys: # Ensure the file does not already exist in one of the accepted directories for d in [ "Accepted", "Byhand", "New", "ProposedUpdates", "OldProposedUpdates", "Embargoed", "Unembargoed" ]: @@ -549,7 +547,7 @@ def check_files(): # Check the version and for file overwrites reject(Upload.check_binary_against_db(f),"") - check_deb_ar(f) + Binary(f).scan_package() # Checks for a source package... else: @@ -609,7 +607,7 @@ def check_files(): # Validate the component component = files[f]["component"] - component_id = database.get_component_id(component) + component_id = DBConn().get_component_id(component) if component_id == -1: reject("file '%s' has unknown component '%s'." % (f, component)) continue @@ -624,14 +622,14 @@ def check_files(): # Determine the location location = Cnf["Dir::Pool"] - location_id = database.get_location_id (location, component, archive) + location_id = DBConn().get_location_id(location, component, archive) if location_id == -1: reject("[INTERNAL ERROR] couldn't determine location (Component: %s, Archive: %s)" % (component, archive)) files[f]["location id"] = location_id # Check the md5sum & size against existing files (if any) files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"]) - files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) + files_id = DBConn().get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) if files_id == -1: reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f)) elif files_id == -2: @@ -639,16 +637,9 @@ def check_files(): files[f]["files id"] = files_id # Check for packages that have moved from one component to another - q = Upload.projectB.query(""" -SELECT c.name FROM binaries b, bin_associations ba, suite s, location l, - component c, architecture a, files f - WHERE b.package = '%s' AND s.suite_name = '%s' - AND (a.arch_string = '%s' OR a.arch_string = 'all') - AND ba.bin = b.id AND ba.suite = s.id AND b.architecture = a.id - AND f.location = l.id AND l.component = c.id AND b.file = f.id""" - % (files[f]["package"], suite, - files[f]["architecture"])) - ql = q.getresult() + files[f]['suite'] = suite + cursor.execute("""EXECUTE moved_pkg_q( %(package)s, %(suite)s, %(architecture)s )""", ( files[f] ) ) + ql = cursor.fetchone() if ql: files[f]["othercomponents"] = ql[0][0] @@ -870,13 +861,7 @@ def check_source(): or pkg.orig_tar_gz == -1: return - # Create a temporary directory to extract the source into - if Options["No-Action"]: - tmpdir = tempfile.mkdtemp() - else: - # We're in queue/holding and can create a random directory. - tmpdir = "%s" % (os.getpid()) - os.mkdir(tmpdir) + tmpdir = create_tmpdir() # Move into the temporary directory cwd = os.getcwd() @@ -997,12 +982,21 @@ def check_timestamps(): ################################################################################ def lookup_uid_from_fingerprint(fpr): - q = Upload.projectB.query("SELECT u.uid, u.name, k.debian_maintainer FROM fingerprint f JOIN keyrings k ON (f.keyring=k.id), uid u WHERE f.uid = u.id AND f.fingerprint = '%s'" % (fpr)) - qs = q.getresult() - if len(qs) == 0: - return (None, None, None) + """ + Return the uid,name,isdm for a given gpg fingerprint + + @ptype fpr: string + @param fpr: a 40 byte GPG fingerprint + + @return (uid, name, isdm) + """ + cursor = DBConn().cursor() + cursor.execute( "SELECT u.uid, u.name, k.debian_maintainer FROM fingerprint f JOIN keyrings k ON (f.keyring=k.id), uid u WHERE f.uid = u.id AND f.fingerprint = '%s'" % (fpr)) + qs = cursor.fetchone() + if qs: + return qs else: - return qs[0] + return (None, None, None) def check_signed_by_key(): """Ensure the .changes is signed by an authorized uploader.""" @@ -1043,12 +1037,16 @@ def check_signed_by_key(): if not sponsored and not may_nmu: source_ids = [] - q = Upload.projectB.query("SELECT s.id, s.version FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.dm_upload_allowed = 'yes'" % (changes["source"])) + cursor.execute( "SELECT s.id, s.version FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = %(source)s AND s.dm_upload_allowed = 'yes'", changes ) highest_sid, highest_version = None, None should_reject = True - for si in q.getresult(): + while True: + si = cursor.fetchone() + if not si: + break + if highest_version == None or apt_pkg.VersionCompare(si[1], highest_version) == 1: highest_sid = si[0] highest_version = si[1] @@ -1056,8 +1054,14 @@ def check_signed_by_key(): if highest_sid == None: reject("Source package %s does not have 'DM-Upload-Allowed: yes' in its most recent version" % changes["source"]) else: - q = Upload.projectB.query("SELECT m.name FROM maintainer m WHERE m.id IN (SELECT su.maintainer FROM src_uploaders su JOIN source s ON (s.id = su.source) WHERE su.source = %s)" % (highest_sid)) - for m in q.getresult(): + + cursor.execute("SELECT m.name FROM maintainer m WHERE m.id IN (SELECT su.maintainer FROM src_uploaders su JOIN source s ON (s.id = su.source) WHERE su.source = %s)" % (highest_sid)) + + while True: + m = cursor.fetchone() + if not m: + break + (rfc822, rfc2047, name, email) = utils.fix_maintainer(m[0]) if email == uid_email or name == uid_name: should_reject=False @@ -1068,9 +1072,14 @@ def check_signed_by_key(): for b in changes["binary"].keys(): for suite in changes["distribution"].keys(): - suite_id = database.get_suite_id(suite) - q = Upload.projectB.query("SELECT DISTINCT s.source FROM source s JOIN binaries b ON (s.id = b.source) JOIN bin_associations ba On (b.id = ba.bin) WHERE b.package = '%s' AND ba.suite = %s" % (b, suite_id)) - for s in q.getresult(): + suite_id = DBConn().get_suite_id(suite) + + cursor.execute("SELECT DISTINCT s.source FROM source s JOIN binaries b ON (s.id = b.source) JOIN bin_associations ba On (b.id = ba.bin) WHERE b.package = %(package)s AND ba.suite = %(suite)s" , {'package':b, 'suite':suite_id} ) + while True: + s = cursor.fetchone() + if not s: + break + if s[0] != changes["source"]: reject("%s may not hijack %s from source package %s in suite %s" % (uid, b, s, suite)) @@ -1214,11 +1223,9 @@ def move_to_dir (dest, perms=0660, changesperms=0664): ################################################################################ def is_unembargo (): - q = Upload.projectB.query( - "SELECT package FROM disembargo WHERE package = '%s' AND version = '%s'" % - (changes["source"], changes["version"])) - ql = q.getresult() - if ql: + cursor = DBConn().cursor() + cursor.execute( "SELECT package FROM disembargo WHERE package = %(source)s AND version = %(version)s", changes ) + if cursor.fetchone(): return 1 oldcwd = os.getcwd() @@ -1230,9 +1237,9 @@ def is_unembargo (): if changes["architecture"].has_key("source"): if Options["No-Action"]: return 1 - Upload.projectB.query( - "INSERT INTO disembargo (package, version) VALUES ('%s', '%s')" % - (changes["source"], changes["version"])) + cursor.execute( "INSERT INTO disembargo (package, version) VALUES ('%(package)s', '%(version)s')", + changes ) + cursor.execute( "COMMIT" ) return 1 return 0 @@ -1290,12 +1297,18 @@ def is_stableupdate (): return 0 if not changes["architecture"].has_key("source"): - pusuite = database.get_suite_id("proposed-updates") - q = Upload.projectB.query( - "SELECT S.source FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.version = '%s' AND sa.suite = %d" % - (changes["source"], changes["version"], pusuite)) - ql = q.getresult() - if ql: + pusuite = DBConn().get_suite_id("proposed-updates") + cursor = DBConn().cursor() + cursor.execute( """SELECT 1 FROM source s + JOIN src_associations sa ON (s.id = sa.source) + WHERE s.source = %(source)s + AND s.version = '%(version)s' + AND sa.suite = %(suite)d""", + {'source' : changes['source'], + 'version' : changes['version'], + 'suite' : pasuite}) + + if cursor.fetchone(): # source is already in proposed-updates so no need to hold return 0 @@ -1319,13 +1332,17 @@ def is_oldstableupdate (): return 0 if not changes["architecture"].has_key("source"): - pusuite = database.get_suite_id("oldstable-proposed-updates") - q = Upload.projectB.query( - "SELECT S.source FROM source s JOIN src_associations sa ON (s.id = sa.source) WHERE s.source = '%s' AND s.version = '%s' AND sa.suite = %d" % - (changes["source"], changes["version"], pusuite)) - ql = q.getresult() - if ql: - # source is already in oldstable-proposed-updates so no need to hold + pusuite = DBConn().get_suite_id("oldstable-proposed-updates") + cursor = DBConn().cursor() + cursor.execute( """"SELECT 1 FROM source s + JOIN src_associations sa ON (s.id = sa.source) + WHERE s.source = %(source)s + AND s.version = %(version)s + AND sa.suite = %d""", + {'source' : changes['source'], + 'version' : changes['version'], + 'suite' : pasuite}) + if cursor.fetchone(): return 0 return 1 diff --git a/daklib/Config.py b/daklib/Config.py deleted file mode 100644 index 96fdb8f5..00000000 --- a/daklib/Config.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python - -""" -Config access class - -@contact: Debian FTPMaster -@copyright: 2008 Mark Hymers -@license: GNU General Public License version 2 or later -""" - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -################################################################################ - -# mhy, how about "Now with 20% more monty python references" - -################################################################################ - -import apt_pkg -import socket - -from Singleton import Singleton - -################################################################################ - -default_config = "/etc/dak/dak.conf" - -def which_conf_file(Cnf): - res = socket.gethostbyaddr(socket.gethostname()) - if Cnf.get("Config::" + res[0] + "::DakConfig"): - return Cnf["Config::" + res[0] + "::DakConfig"] - else: - return default_config - -class Config(Singleton): - """ - A Config object is a singleton containing - information about the DAK configuration - """ - def __init__(self, *args, **kwargs): - super(Config, self).__init__(*args, **kwargs) - - def _readconf(self): - apt_pkg.init() - - self.Cnf = apt_pkg.newConfiguration() - - apt_pkg.ReadConfigFileISC(self.Cnf, default_config) - - # Check whether our dak.conf was the real one or - # just a pointer to our main one - res = socket.gethostbyaddr(socket.gethostname()) - conffile = self.Cnf.get("Config::" + res[0] + "::DakConfig") - if conffile: - apt_pkg.ReadConfigFileISC(self.Cnf, conffile) - - # Rebind some functions - # TODO: Clean this up - self.get = self.Cnf.get - self.SubTree = self.Cnf.SubTree - self.ValueList = self.Cnf.ValueList - - def _startup(self, *args, **kwargs): - self._readconf() - - def has_key(self, name): - return self.Cnf.has_key(name) - - def __getitem__(self, name): - return self.Cnf[name] - diff --git a/daklib/DBConn.py b/daklib/DBConn.py deleted file mode 100644 index 75afb5a7..00000000 --- a/daklib/DBConn.py +++ /dev/null @@ -1,414 +0,0 @@ -#!/usr/bin/env python - -""" DB access class - -@contact: Debian FTPMaster -@copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup -@copyright: 2008-2009 Mark Hymers -@copyright: 2009 Joerg Jaspert -@copyright: 2009 Mike O'Connor -@license: GNU General Public License version 2 or later -""" - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -################################################################################ - -# < mhy> I need a funny comment -# < sgran> two peanuts were walking down a dark street -# < sgran> one was a-salted -# * mhy looks up the definition of "funny" - -################################################################################ - -import os -import psycopg2 - -from Singleton import Singleton -from Config import Config - -################################################################################ - -class Cache(object): - def __init__(self, hashfunc=None): - if hashfunc: - self.hashfunc = hashfunc - else: - self.hashfunc = lambda x: x['value'] - - self.data = {} - - def SetValue(self, keys, value): - self.data[self.hashfunc(keys)] = value - - def GetValue(self, keys): - return self.data.get(self.hashfunc(keys)) - -################################################################################ - -class DBConn(Singleton): - """ - database module init. - """ - def __init__(self, *args, **kwargs): - super(DBConn, self).__init__(*args, **kwargs) - - def _startup(self, *args, **kwargs): - self.__createconn() - self.__init_caches() - - ## Connection functions - def __createconn(self): - cnf = Config() - connstr = "dbname=%s" % cnf["DB::Name"] - if cnf["DB::Host"]: - connstr += " host=%s" % cnf["DB::Host"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": - connstr += " port=%s" % cnf["DB::Port"] - - self.db_con = psycopg2.connect(connstr) - - def reconnect(self): - try: - self.db_con.close() - except psycopg2.InterfaceError: - pass - - self.db_con = None - self.__createconn() - - ## Cache functions - def __init_caches(self): - self.caches = {'suite': Cache(), - 'section': Cache(), - 'priority': Cache(), - 'override_type': Cache(), - 'architecture': Cache(), - 'archive': Cache(), - 'component': Cache(), - 'content_path_names': Cache(), - 'content_file_names': Cache(), - 'location': Cache(lambda x: '%s_%s_%s' % (x['location'], x['component'], x['location'])), - 'maintainer': {}, # TODO - 'keyring': {}, # TODO - 'source': Cache(lambda x: '%s_%s_' % (x['source'], x['version'])), - 'files': {}, # TODO - 'maintainer': {}, # TODO - 'fingerprint': {}, # TODO - 'queue': {}, # TODO - 'uid': {}, # TODO - 'suite_version': Cache(lambda x: '%s_%s' % (x['source'], x['suite'])), - } - - def clear_caches(self): - self.__init_caches() - - ## Functions to pass through to the database connector - def cursor(self): - return self.db_con.cursor() - - def commit(self): - return self.db_con.commit() - - ## Get functions - def __get_single_id(self, query, values, cachename=None): - # This is a bit of a hack but it's an internal function only - if cachename is not None: - res = self.caches[cachename].GetValue(values) - if res: - return res - - c = self.db_con.cursor() - c.execute(query, values) - - if c.rowcount != 1: - return None - - res = c.fetchone()[0] - - if cachename is not None: - self.caches[cachename].SetValue(values, res) - - return res - - def __get_id(self, retfield, table, qfield, value): - query = "SELECT %s FROM %s WHERE %s = %%(value)s" % (retfield, table, qfield) - return self.__get_single_id(query, {'value': value}, cachename=table) - - def get_suite_id(self, suite): - """ - Returns database id for given C{suite}. - Results are kept in a cache during runtime to minimize database queries. - - @type suite: string - @param suite: The name of the suite - - @rtype: int - @return: the database id for the given suite - - """ - return self.__get_id('id', 'suite', 'suite_name', suite) - - def get_section_id(self, section): - """ - Returns database id for given C{section}. - Results are kept in a cache during runtime to minimize database queries. - - @type section: string - @param section: The name of the section - - @rtype: int - @return: the database id for the given section - - """ - return self.__get_id('id', 'section', 'section', section) - - def get_priority_id(self, priority): - """ - Returns database id for given C{priority}. - Results are kept in a cache during runtime to minimize database queries. - - @type priority: string - @param priority: The name of the priority - - @rtype: int - @return: the database id for the given priority - - """ - return self.__get_id('id', 'priority', 'priority', priority) - - def get_override_type_id(self, override_type): - """ - Returns database id for given override C{type}. - Results are kept in a cache during runtime to minimize database queries. - - @type type: string - @param type: The name of the override type - - @rtype: int - @return: the database id for the given override type - - """ - return self.__get_id('id', 'override_type', 'override_type', override_type) - - def get_architecture_id(self, architecture): - """ - Returns database id for given C{architecture}. - Results are kept in a cache during runtime to minimize database queries. - - @type architecture: string - @param architecture: The name of the override type - - @rtype: int - @return: the database id for the given architecture - - """ - return self.__get_id('id', 'architecture', 'arch_string', architecture) - - def get_archive_id(self, archive): - """ - returns database id for given c{archive}. - results are kept in a cache during runtime to minimize database queries. - - @type archive: string - @param archive: the name of the override type - - @rtype: int - @return: the database id for the given archive - - """ - return self.__get_id('id', 'archive', 'lower(name)', archive) - - def get_component_id(self, component): - """ - Returns database id for given C{component}. - Results are kept in a cache during runtime to minimize database queries. - - @type component: string - @param component: The name of the override type - - @rtype: int - @return: the database id for the given component - - """ - return self.__get_id('id', 'component', 'lower(name)', component) - - def get_location_id(self, location, component, archive): - """ - Returns database id for the location behind the given combination of - - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/} - - B{component} - the id of the component as returned by L{get_component_id} - - B{archive} - the id of the archive as returned by L{get_archive_id} - Results are kept in a cache during runtime to minimize database queries. - - @type location: string - @param location: the path of the location - - @type component: int - @param component: the id of the component - - @type archive: int - @param archive: the id of the archive - - @rtype: int - @return: the database id for the location - - """ - - archive_id = self.get_archive_id(archive) - - if not archive_id: - return None - - res = None - - if component: - component_id = self.get_component_id(component) - if component_id: - res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND component=%(component)d AND archive=%(archive)d", - {'location': location, 'archive': archive_id, 'component': component_id}, cachename='location') - else: - res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND archive=%(archive)d", - {'location': location, 'archive': archive_id, 'component': ''}, cachename='location') - - return res - - def get_source_id(self, source, version): - """ - Returns database id for the combination of C{source} and C{version} - - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} - - B{version} - Results are kept in a cache during runtime to minimize database queries. - - @type source: string - @param source: source package name - - @type version: string - @param version: the source version - - @rtype: int - @return: the database id for the source - - """ - return self.__get_single_id("SELECT id FROM source s WHERE s.source=%(source)s AND s.version=%(version)s", - {'source': source, 'version': version}, cachename='source') - - def get_suite_version(self, source, suite): - """ - Returns database id for a combination of C{source} and C{suite}. - - - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} - - B{suite} - a suite name, eg. I{unstable} - - Results are kept in a cache during runtime to minimize database queries. - - @type source: string - @param source: source package name - - @type suite: string - @param suite: the suite name - - @rtype: string - @return: the version for I{source} in I{suite} - - """ - return self.__get_single_id(""" - SELECT s.version FROM source s, suite su, src_associations sa - WHERE sa.source=s.id - AND sa.suite=su.id - AND su.suite_name=%(suite)s - AND s.source=%(source)""", {'suite': suite, 'source': source}, cachename='suite_version') - - - def get_or_set_contents_file_id(self, filename): - """ - Returns database id for given filename. - - Results are kept in a cache during runtime to minimize database queries. - If no matching file is found, a row is inserted. - - @type filename: string - @param filename: The filename - - @rtype: int - @return: the database id for the given component - """ - values={'value': filename} - query = "SELECT id FROM content_file_names WHERE file = %(value)s" - id = self.__get_single_id(query, values, cachename='content_file_names') - if not id: - c = self.db_con.cursor() - c.execute( "INSERT INTO content_file_names VALUES (DEFAULT, %(value)s) RETURNING id", - values ) - - id = c.fetchone()[0] - self.caches['content_file_names'].SetValue(values, id) - - return id - - def get_or_set_contents_path_id(self, path): - """ - Returns database id for given path. - - Results are kept in a cache during runtime to minimize database queries. - If no matching file is found, a row is inserted. - - @type path: string - @param path: The filename - - @rtype: int - @return: the database id for the given component - """ - values={'value': path} - query = "SELECT id FROM content_file_paths WHERE path = %(value)s" - id = self.__get_single_id(query, values, cachename='content_path_names') - if not id: - c = self.db_con.cursor() - c.execute( "INSERT INTO content_file_paths VALUES (DEFAULT, %(value)s) RETURNING id", - values ) - - id = c.fetchone()[0] - self.caches['content_path_names'].SetValue(values, id) - - return id - - def insert_content_paths(self, bin_id, fullpaths): - """ - Make sure given path is associated with given binary id - - @type bin_id: int - @param bin_id: the id of the binary - @type fullpath: string - @param fullpath: the path of the file being associated with the binary - """ - - c = self.db_con.cursor() - - for fullpath in fullpaths: - c.execute( "BEGIN WORK" ) - (path, file) = os.path.split(fullpath) - - # Get the necessary IDs ... - file_id = self.get_or_set_contents_file_id(file) - path_id = self.get_or_set_contents_path_id(path) - - # Determine if we're inserting a duplicate row - - c.execute("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) - if not c.fetchone(): - # no, we are not, do the insert - - c.execute("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) - c.execute( "COMMIT" ) diff --git a/daklib/Singleton.py b/daklib/Singleton.py deleted file mode 100644 index 535a25a3..00000000 --- a/daklib/Singleton.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -# vim:set et ts=4 sw=4: - -""" -Singleton pattern code - -Inspiration for this very simple ABC was taken from various documents / -tutorials / mailing lists. This may not be thread safe but given that -(as I write) large chunks of dak aren't even type-safe, I'll live with -it for now - -@contact: Debian FTPMaster -@copyright: 2008 Mark Hymers -@license: GNU General Public License version 2 or later -""" - -################################################################################ - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -################################################################################ - -# < sgran> NCommander: in SQL, it's better to join than to repeat information -# < tomv_w> that makes SQL the opposite to Debian mailing lists! - -################################################################################ - -""" -This class set implements objects that may need to be instantiated multiple -times, but we don't want the overhead of actually creating and init'ing -them more than once. It also saves us using globals all over the place -""" - -class Singleton(object): - """This is the ABC for other dak Singleton classes""" - __single = None - def __new__(cls, *args, **kwargs): - # Check to see if a __single exists already for this class - # Compare class types instead of just looking for None so - # that subclasses will create their own __single objects - if cls != type(cls.__single): - cls.__single = object.__new__(cls, *args, **kwargs) - cls.__single._startup(*args, **kwargs) - return cls.__single - - def __init__(self, *args, **kwargs): - if type(self) == "Singleton": - raise NotImplementedError("Singleton is an ABC") - - def _startup(self): - """ - _startup is a private method used instead of __init__ due to the way - we instantiate this object - """ - raise NotImplementedError("Singleton is an ABC") - diff --git a/daklib/binary.py b/daklib/binary.py new file mode 100755 index 00000000..70133db6 --- /dev/null +++ b/daklib/binary.py @@ -0,0 +1,151 @@ +#!/usr/bin/python + +""" +Functions related debian binary packages + +@contact: Debian FTPMaster +@copyright: 2009 Mike O'Connor +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import os +import shutil +import tempfile +import tarfile +import commands +import traceback +from debian_bundle import deb822 +from dbconn import DBConn + +class Binary(object): + def __init__(self, filename): + self.filename = filename + self.tmpdir = None + self.chunks = None + + def __del__(self): + # we need to remove the temporary directory, if we created one + if self.tmpdir and os.path.exists(self.tmpdir): + shutil.rmtree(self.tmpdir) + + def __scan_ar(self): + # get a list of the ar contents + if not self.chunks: + + cmd = "ar t %s" % (self.filename) + + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + rejected = True + reject("%s: 'ar t' invocation failed." % (self.filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + self.chunks = output.split('\n') + + + + def __unpack(self): + # Internal function which extracts the contents of the .ar to + # a temporary directory + + if not self.tmpdir: + tmpdir = tempfile.mkdtemp() + cwd = os.getcwd() + try: + os.chdir( tmpdir ) + cmd = "ar x %s %s %s" % (os.path.join(cwd,self.filename), self.chunks[1], self.chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: '%s' invocation failed." % (filename, cmd)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + else: + self.tmpdir = tmpdir + + finally: + os.chdir( cwd ) + + def valid_deb(self): + """ + Check deb contents making sure the .deb contains: + 1. debian-binary + 2. control.tar.gz + 3. data.tar.gz or data.tar.bz2 + in that order, and nothing else. + """ + self.__scan_ar() + rejected = not self.chunks + if len(self.chunks) != 3: + rejected = True + reject("%s: found %d chunks, expected 3." % (self.filename, len(self.chunks))) + if self.chunks[0] != "debian-binary": + rejected = True + reject("%s: first chunk is '%s', expected 'debian-binary'." % (self.filename, self.chunks[0])) + if self.chunks[1] != "control.tar.gz": + rejected = True + reject("%s: second chunk is '%s', expected 'control.tar.gz'." % (self.filename, self.chunks[1])) + if self.chunks[2] not in [ "data.tar.bz2", "data.tar.gz" ]: + rejected = True + reject("%s: third chunk is '%s', expected 'data.tar.gz' or 'data.tar.bz2'." % (self.filename, self.chunks[2])) + + return not rejected + + def scan_package(self): + """ + Unpack the .deb, do sanity checking, and gather info from it. + + Currently information gathering consists of getting the contents list. In + the hopefully near future, it should also include gathering info from the + control file. + + @return True if the deb is valid and contents were imported + """ + rejected = not self.valid_deb() + self.__unpack() + + if not rejected and self.tmpdir: + cwd = os.getcwd() + try: + os.chdir(self.tmpdir) + if self.chunks[1] == "control.tar.gz": + control = tarfile.open(os.path.join(self.tmpdir, "control.tar.gz" ), "r:gz") + elif self.chunks[1] == "control.tar.bz2": + control = tarfile.open(os.path.join(self.tmpdir, "control.tar.bz2" ), "r:bz2") + + pkg = deb822.Packages.iter_paragraphs( control.extractfile('./control') ).next() + + if self.chunks[2] == "data.tar.gz": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.gz"), "r:gz") + elif self.chunks[2] == "data.tar.bz2": + data = tarfile.open(os.path.join(self.tmpdir, "data.tar.bz2" ), "r:bz2") + + return DBConn().insert_content_paths(pkg, [ tarinfo.name for tarinfo in data if tarinfo.isdir()]) + + except: + traceback.print_exc() + + return False + + finally: + os.chdir( cwd ) + + + + +if __name__ == "__main__": + Binary( "/srv/ftp.debian.org/queue/accepted/halevt_0.1.3-2_amd64.deb" ).scan_package() + diff --git a/daklib/config.py b/daklib/config.py new file mode 100755 index 00000000..997a597d --- /dev/null +++ b/daklib/config.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python + +""" +Config access class + +@contact: Debian FTPMaster +@copyright: 2008 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# mhy, how about "Now with 20% more monty python references" + +################################################################################ + +import apt_pkg +import socket + +from singleton import Singleton + +################################################################################ + +default_config = "/etc/dak/dak.conf" + +def which_conf_file(Cnf): + res = socket.gethostbyaddr(socket.gethostname()) + if Cnf.get("Config::" + res[0] + "::DakConfig"): + return Cnf["Config::" + res[0] + "::DakConfig"] + else: + return default_config + +class Config(Singleton): + """ + A Config object is a singleton containing + information about the DAK configuration + """ + def __init__(self, *args, **kwargs): + super(Config, self).__init__(*args, **kwargs) + + def _readconf(self): + apt_pkg.init() + + self.Cnf = apt_pkg.newConfiguration() + + apt_pkg.ReadConfigFileISC(self.Cnf, default_config) + + # Check whether our dak.conf was the real one or + # just a pointer to our main one + res = socket.gethostbyaddr(socket.gethostname()) + conffile = self.Cnf.get("Config::" + res[0] + "::DakConfig") + if conffile: + apt_pkg.ReadConfigFileISC(self.Cnf, conffile) + + # Rebind some functions + # TODO: Clean this up + self.get = self.Cnf.get + self.SubTree = self.Cnf.SubTree + self.ValueList = self.Cnf.ValueList + + def _startup(self, *args, **kwargs): + self._readconf() + + def has_key(self, name): + return self.Cnf.has_key(name) + + def __getitem__(self, name): + return self.Cnf[name] + diff --git a/daklib/database.py b/daklib/database.py index 3cbb67b7..5818733e 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -54,9 +54,6 @@ queue_id_cache = {} #: cache for queues uid_id_cache = {} #: cache for uids suite_version_cache = {} #: cache for suite_versions (packages) suite_bin_version_cache = {} -content_path_id_cache = {} -content_file_id_cache = {} -insert_contents_file_cache = {} cache_preloaded = False ################################################################################ @@ -781,58 +778,45 @@ def get_suites(pkgname, src=False): q = projectB.query(sql) return map(lambda x: x[0], q.getresult()) -################################################################################ - -def get_or_set_contents_file_id(file): - global content_file_id_cache - - if not content_file_id_cache.has_key(file): - sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file - q = projectB.query(sql_select) - if not q.getresult(): - # since this can be called within a transaction, we can't use currval - q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file)) - content_file_id_cache[file] = int(q.getresult()[0][0]) - return content_file_id_cache[file] ################################################################################ -def get_or_set_contents_path_id(path): - global content_path_id_cache +def copy_temporary_contents(package, version, deb): + """ + copy the previously stored contents from the temp table to the permanant one - if not content_path_id_cache.has_key(path): - sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path - q = projectB.query(sql_select) - if not q.getresult(): - # since this can be called within a transaction, we can't use currval - q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path)) - content_path_id_cache[path] = int(q.getresult()[0][0]) - return content_path_id_cache[path] + during process-unchecked, the deb should have been scanned and the + contents stored in temp_content_associations + """ -################################################################################ + # first see if contents exist: -def insert_content_path(bin_id, fullpath): - global insert_contents_file_cache - cache_key = "%s_%s" % (bin_id, fullpath) + exists = projectB.query("""SELECT 1 FROM temp_content_associations + WHERE package='%s' LIMIT 1""" % package ).getresult() - # have we seen this contents before? - # probably only revelant during package import - if insert_contents_file_cache.has_key(cache_key): - return + if not exists: + # This should NOT happen. We should have added contents + # during process-unchecked. if it did, log an error, and send + # an email. + subst = { + "__PACKAGE__": package, + "__VERSION__": version, + "__DAK_ADDRESS__": Cnf["Dinstall::MyEmailAddress"] + } - # split the path into basename, and pathname - (path, file) = os.path.split(fullpath) + message = utils.TemplateSubst(Subst, Cnf["Dir::Templates"]+"/bts-categorize") + utils.send_mail( message ) - # Get the necessary IDs ... - file_id = get_or_set_contents_file_id(file) - path_id = get_or_set_contents_path_id(path) + exists = DBConn().insert_content_path(package, version, deb) - # Determine if we're inserting a duplicate row - q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) - if q.getresult(): - # Yes we are, return without doing the insert - return + if exists: + sql = """INSERT INTO content_associations(binary_pkg,filepath,filename) + SELECT currval('binaries_id_seq'), filepath, filename FROM temp_content_associations + WHERE package='%s' + AND version='%s'""" % (package, version) + projectB.query(sql) + projectB.query("""DELETE from temp_content_associations + WHERE package='%s' + AND version='%s'""" % (package, version)) - # Put them into content_assiocations - projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) - return + return exists diff --git a/daklib/dbconn.py b/daklib/dbconn.py new file mode 100755 index 00000000..5bd1d765 --- /dev/null +++ b/daklib/dbconn.py @@ -0,0 +1,493 @@ +#!/usr/bin/python + +""" DB access class + +@contact: Debian FTPMaster +@copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup +@copyright: 2008-2009 Mark Hymers +@copyright: 2009 Joerg Jaspert +@copyright: 2009 Mike O'Connor +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# < mhy> I need a funny comment +# < sgran> two peanuts were walking down a dark street +# < sgran> one was a-salted +# * mhy looks up the definition of "funny" + +################################################################################ + +import os +import psycopg2 +import traceback + +from singleton import Singleton +from config import Config + +################################################################################ + +class Cache(object): + def __init__(self, hashfunc=None): + if hashfunc: + self.hashfunc = hashfunc + else: + self.hashfunc = lambda x: x['value'] + + self.data = {} + + def SetValue(self, keys, value): + self.data[self.hashfunc(keys)] = value + + def GetValue(self, keys): + return self.data.get(self.hashfunc(keys)) + +################################################################################ + +class DBConn(Singleton): + """ + database module init. + """ + def __init__(self, *args, **kwargs): + super(DBConn, self).__init__(*args, **kwargs) + + def _startup(self, *args, **kwargs): + self.__createconn() + self.__init_caches() + + ## Connection functions + def __createconn(self): + cnf = Config() + connstr = "dbname=%s" % cnf["DB::Name"] + if cnf["DB::Host"]: + connstr += " host=%s" % cnf["DB::Host"] + if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + connstr += " port=%s" % cnf["DB::Port"] + + self.db_con = psycopg2.connect(connstr) + + def reconnect(self): + try: + self.db_con.close() + except psycopg2.InterfaceError: + pass + + self.db_con = None + self.__createconn() + + ## Cache functions + def __init_caches(self): + self.caches = {'suite': Cache(), + 'section': Cache(), + 'priority': Cache(), + 'override_type': Cache(), + 'architecture': Cache(), + 'archive': Cache(), + 'component': Cache(), + 'content_path_names': Cache(), + 'content_file_names': Cache(), + 'location': Cache(lambda x: '%s_%s_%s' % (x['location'], x['component'], x['location'])), + 'maintainer': {}, # TODO + 'keyring': {}, # TODO + 'source': Cache(lambda x: '%s_%s_' % (x['source'], x['version'])), + 'files': Cache(lambda x: '%s_%s_' % (x['filename'], x['location'])), + 'maintainer': {}, # TODO + 'fingerprint': {}, # TODO + 'queue': {}, # TODO + 'uid': {}, # TODO + 'suite_version': Cache(lambda x: '%s_%s' % (x['source'], x['suite'])), + } + + def clear_caches(self): + self.__init_caches() + + ## Functions to pass through to the database connector + def cursor(self): + return self.db_con.cursor() + + def commit(self): + return self.db_con.commit() + + ## Get functions + def __get_single_id(self, query, values, cachename=None): + # This is a bit of a hack but it's an internal function only + if cachename is not None: + res = self.caches[cachename].GetValue(values) + if res: + return res + + c = self.db_con.cursor() + c.execute(query, values) + + if c.rowcount != 1: + return None + + res = c.fetchone()[0] + + if cachename is not None: + self.caches[cachename].SetValue(values, res) + + return res + + def __get_id(self, retfield, table, qfield, value): + query = "SELECT %s FROM %s WHERE %s = %%(value)s" % (retfield, table, qfield) + return self.__get_single_id(query, {'value': value}, cachename=table) + + def get_suite_id(self, suite): + """ + Returns database id for given C{suite}. + Results are kept in a cache during runtime to minimize database queries. + + @type suite: string + @param suite: The name of the suite + + @rtype: int + @return: the database id for the given suite + + """ + return self.__get_id('id', 'suite', 'suite_name', suite) + + def get_section_id(self, section): + """ + Returns database id for given C{section}. + Results are kept in a cache during runtime to minimize database queries. + + @type section: string + @param section: The name of the section + + @rtype: int + @return: the database id for the given section + + """ + return self.__get_id('id', 'section', 'section', section) + + def get_priority_id(self, priority): + """ + Returns database id for given C{priority}. + Results are kept in a cache during runtime to minimize database queries. + + @type priority: string + @param priority: The name of the priority + + @rtype: int + @return: the database id for the given priority + + """ + return self.__get_id('id', 'priority', 'priority', priority) + + def get_override_type_id(self, override_type): + """ + Returns database id for given override C{type}. + Results are kept in a cache during runtime to minimize database queries. + + @type type: string + @param type: The name of the override type + + @rtype: int + @return: the database id for the given override type + + """ + return self.__get_id('id', 'override_type', 'override_type', override_type) + + def get_architecture_id(self, architecture): + """ + Returns database id for given C{architecture}. + Results are kept in a cache during runtime to minimize database queries. + + @type architecture: string + @param architecture: The name of the override type + + @rtype: int + @return: the database id for the given architecture + + """ + return self.__get_id('id', 'architecture', 'arch_string', architecture) + + def get_archive_id(self, archive): + """ + returns database id for given c{archive}. + results are kept in a cache during runtime to minimize database queries. + + @type archive: string + @param archive: the name of the override type + + @rtype: int + @return: the database id for the given archive + + """ + return self.__get_id('id', 'archive', 'lower(name)', archive) + + def get_component_id(self, component): + """ + Returns database id for given C{component}. + Results are kept in a cache during runtime to minimize database queries. + + @type component: string + @param component: The name of the override type + + @rtype: int + @return: the database id for the given component + + """ + return self.__get_id('id', 'component', 'lower(name)', component) + + def get_location_id(self, location, component, archive): + """ + Returns database id for the location behind the given combination of + - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/} + - B{component} - the id of the component as returned by L{get_component_id} + - B{archive} - the id of the archive as returned by L{get_archive_id} + Results are kept in a cache during runtime to minimize database queries. + + @type location: string + @param location: the path of the location + + @type component: int + @param component: the id of the component + + @type archive: int + @param archive: the id of the archive + + @rtype: int + @return: the database id for the location + + """ + + archive_id = self.get_archive_id(archive) + + if not archive_id: + return None + + res = None + + if component: + component_id = self.get_component_id(component) + if component_id: + res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND component=%(component)s AND archive=%(archive)s", + {'location': location, + 'archive': int(archive_id), + 'component': component_id}, cachename='location') + else: + res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND archive=%(archive)d", + {'location': location, 'archive': archive_id, 'component': ''}, cachename='location') + + return res + + def get_source_id(self, source, version): + """ + Returns database id for the combination of C{source} and C{version} + - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} + - B{version} + Results are kept in a cache during runtime to minimize database queries. + + @type source: string + @param source: source package name + + @type version: string + @param version: the source version + + @rtype: int + @return: the database id for the source + + """ + return self.__get_single_id("SELECT id FROM source s WHERE s.source=%(source)s AND s.version=%(version)s", + {'source': source, 'version': version}, cachename='source') + + def get_suite_version(self, source, suite): + """ + Returns database id for a combination of C{source} and C{suite}. + + - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} + - B{suite} - a suite name, eg. I{unstable} + + Results are kept in a cache during runtime to minimize database queries. + + @type source: string + @param source: source package name + + @type suite: string + @param suite: the suite name + + @rtype: string + @return: the version for I{source} in I{suite} + + """ + return self.__get_single_id(""" + SELECT s.version FROM source s, suite su, src_associations sa + WHERE sa.source=s.id + AND sa.suite=su.id + AND su.suite_name=%(suite)s + AND s.source=%(source)""", {'suite': suite, 'source': source}, cachename='suite_version') + + + def get_files_id (self, filename, size, md5sum, location_id): + """ + Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an + existing copy. + + The database is queried using the C{filename} and C{location_id}. If a file does exist + at that location, the existing size and md5sum are checked against the provided + parameters. A size or checksum mismatch returns -2. If more than one entry is + found within the database, a -1 is returned, no result returns None, otherwise + the file id. + + Results are kept in a cache during runtime to minimize database queries. + + @type filename: string + @param filename: the filename of the file to check against the DB + + @type size: int + @param size: the size of the file to check against the DB + + @type md5sum: string + @param md5sum: the md5sum of the file to check against the DB + + @type location_id: int + @param location_id: the id of the location as returned by L{get_location_id} + + @rtype: int / None + @return: Various return values are possible: + - -2: size/checksum error + - -1: more than one file found in database + - None: no file found in database + - int: file id + + """ + values = {'filename' : filename, + 'location' : location_id} + + res = self.caches['files'].GetValue( values ) + + if not res: + query = """SELECT id, size, md5sum + FROM files + WHERE filename = %(filename)s AND location = %(location)s""" + + cursor = self.db_con.cursor() + cursor.execute( query, values ) + + if cursor.rowcount == 0: + res = None + + elif cursor.rowcount != 1: + res = -1 + + else: + row = cursor.fetchone() + + if row[1] != size or row[2] != md5sum: + res = -2 + + else: + self.caches[cachename].SetValue(values, row[0]) + res = row[0] + + return res + + + def get_or_set_contents_file_id(self, filename): + """ + Returns database id for given filename. + + Results are kept in a cache during runtime to minimize database queries. + If no matching file is found, a row is inserted. + + @type filename: string + @param filename: The filename + + @rtype: int + @return: the database id for the given component + """ + values={'value': filename} + query = "SELECT id FROM content_file_names WHERE file = %(value)s" + id = self.__get_single_id(query, values, cachename='content_file_names') + if not id: + c = self.db_con.cursor() + c.execute( "INSERT INTO content_file_names VALUES (DEFAULT, %(value)s) RETURNING id", + values ) + + id = c.fetchone()[0] + self.caches['content_file_names'].SetValue(values, id) + + return id + + def get_or_set_contents_path_id(self, path): + """ + Returns database id for given path. + + Results are kept in a cache during runtime to minimize database queries. + If no matching file is found, a row is inserted. + + @type path: string + @param path: The filename + + @rtype: int + @return: the database id for the given component + """ + values={'value': path} + query = "SELECT id FROM content_file_paths WHERE path = %(value)s" + id = self.__get_single_id(query, values, cachename='content_path_names') + if not id: + c = self.db_con.cursor() + c.execute( "INSERT INTO content_file_paths VALUES (DEFAULT, %(value)s) RETURNING id", + values ) + + id = c.fetchone()[0] + self.caches['content_path_names'].SetValue(values, id) + + return id + + def insert_content_paths(self, package, fullpaths): + """ + Make sure given path is associated with given binary id + + @type bin_id: int + @param bin_id: the id of the binary + @type fullpath: string + @param fullpath: the path of the file being associated with the binary + + @return True upon success + """ + + c = self.db_con.cursor() + + c.execute("BEGIN WORK") + try: + + # Remove any already existing recorded files for this package + c.execute("""DELETE FROM temp_content_associations + WHERE package=%(Package)s + AND version=%(Version)s""", package ) + + for fullpath in fullpaths: + (path, file) = os.path.split(fullpath) + + # Get the necessary IDs ... + file_id = self.get_or_set_contents_file_id(file) + path_id = self.get_or_set_contents_path_id(path) + + c.execute("""INSERT INTO temp_content_associations + (package, version, filepath, filename) + VALUES (%%(Package)s, %%(Version)s, '%d', '%d')""" % (path_id, file_id), + package ) + c.execute("COMMIT") + return True + except: + traceback.print_exc() + c.execute("ROLLBACK") + return False diff --git a/daklib/singleton.py b/daklib/singleton.py new file mode 100644 index 00000000..535a25a3 --- /dev/null +++ b/daklib/singleton.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# vim:set et ts=4 sw=4: + +""" +Singleton pattern code + +Inspiration for this very simple ABC was taken from various documents / +tutorials / mailing lists. This may not be thread safe but given that +(as I write) large chunks of dak aren't even type-safe, I'll live with +it for now + +@contact: Debian FTPMaster +@copyright: 2008 Mark Hymers +@license: GNU General Public License version 2 or later +""" + +################################################################################ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +# < sgran> NCommander: in SQL, it's better to join than to repeat information +# < tomv_w> that makes SQL the opposite to Debian mailing lists! + +################################################################################ + +""" +This class set implements objects that may need to be instantiated multiple +times, but we don't want the overhead of actually creating and init'ing +them more than once. It also saves us using globals all over the place +""" + +class Singleton(object): + """This is the ABC for other dak Singleton classes""" + __single = None + def __new__(cls, *args, **kwargs): + # Check to see if a __single exists already for this class + # Compare class types instead of just looking for None so + # that subclasses will create their own __single objects + if cls != type(cls.__single): + cls.__single = object.__new__(cls, *args, **kwargs) + cls.__single._startup(*args, **kwargs) + return cls.__single + + def __init__(self, *args, **kwargs): + if type(self) == "Singleton": + raise NotImplementedError("Singleton is an ABC") + + def _startup(self): + """ + _startup is a private method used instead of __init__ due to the way + we instantiate this object + """ + raise NotImplementedError("Singleton is an ABC") + diff --git a/daklib/utils.py b/daklib/utils.py index 9582dadb..5cb502d6 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -1443,7 +1443,7 @@ def generate_contents_information(filename): Generate a list of flies contained in a .deb @type filename: string - @param filename: the path to a .deb + @param filename: the path to a data.tar.gz or data.tar.bz2 @rtype: list @return: a list of files in the data.tar.* portion of the .deb @@ -1481,6 +1481,7 @@ def generate_contents_information(filename): finally: if os.path.exists( chunks[2] ): + shutil.rmtree( chunks[2] ) os.remove( chunks[2] ) return contents diff --git a/templates/missing-contents b/templates/missing-contents new file mode 100644 index 00000000..a4477fa7 --- /dev/null +++ b/templates/missing-contents @@ -0,0 +1,14 @@ +From: __DAK_ADDRESS__ +X-Debian: DAK +X-Debian-Package: __PACKAGE__ +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 8bit +Subject: Missing contents for __PACKAGE__ in accepted queue + +While processing the accepted queue, I didn't have contents in the +database for __PACKAGE__ version __VERSION__. These contents should +have been put into the database by process-unchecked when the package +first arrived. + +This is probably stew's fault. \ No newline at end of file diff --git a/templates/process-unchecked.new b/templates/process-unchecked.new index 6c3162fa..22c59759 100644 --- a/templates/process-unchecked.new +++ b/templates/process-unchecked.new @@ -2,7 +2,7 @@ From: __DAK_ADDRESS__ To: __MAINTAINER_TO__ __BCC__ X-Debian: DAK -X-Debian-Package: __SOURCE__ +X-Debian-Package: __PACKAGE__ Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8"