#!/usr/bin/env python
-# DB access fucntions
-# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
+""" DB access functions
+@group readonly: get_suite_id, get_section_id, get_priority_id, get_override_type_id,
+ get_architecture_id, get_archive_id, get_component_id, get_location_id,
+ get_source_id, get_suite_version, get_files_id, get_maintainer, get_suites,
+ get_suite_architectures, get_new_comments, has_new_comment
+@group read/write: get_or_set*, set_files_id
+@group writeonly: add_new_comment, delete_new_comments
+
+@contact: Debian FTP Master <ftpmaster@debian.org>
+@copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
+@copyright: 2009 Joerg Jaspert <joerg@debian.org>
+@license: GNU General Public License version 2 or later
+"""
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
################################################################################
-import os, sys, time, types, apt_pkg
+import sys
+import time
+import types
+import utils
+import pg
+from binary import Binary
################################################################################
-Cnf = None
-projectB = None
-suite_id_cache = {}
-section_id_cache = {}
-priority_id_cache = {}
-override_type_id_cache = {}
-architecture_id_cache = {}
-archive_id_cache = {}
-component_id_cache = {}
-location_id_cache = {}
-maintainer_id_cache = {}
-keyring_id_cache = {}
-source_id_cache = {}
-files_id_cache = {}
-maintainer_cache = {}
-fingerprint_id_cache = {}
-queue_id_cache = {}
-uid_id_cache = {}
-suite_version_cache = {}
+Cnf = None #: Configuration, apt_pkg.Configuration
+projectB = None #: database connection, pgobject
+suite_id_cache = {} #: cache for suites
+section_id_cache = {} #: cache for sections
+priority_id_cache = {} #: cache for priorities
+override_type_id_cache = {} #: cache for overrides
+architecture_id_cache = {} #: cache for architectures
+archive_id_cache = {} #: cache for archives
+component_id_cache = {} #: cache for components
+location_id_cache = {} #: cache for locations
+maintainer_id_cache = {} #: cache for maintainers
+keyring_id_cache = {} #: cache for keyrings
+source_id_cache = {} #: cache for sources
+
+files_id_cache = {} #: cache for files
+maintainer_cache = {} #: cache for maintainer names
+fingerprint_id_cache = {} #: cache for fingerprints
+queue_id_cache = {} #: cache for queues
+uid_id_cache = {} #: cache for uids
+suite_version_cache = {} #: cache for suite_versions (packages)
suite_bin_version_cache = {}
-content_path_id_cache = {}
-content_file_id_cache = {}
-insert_contents_file_cache = {}
+cache_preloaded = False
################################################################################
def init (config, sql):
+ """
+ database module init.
+
+ @type config: apt_pkg.Configuration
+ @param config: apt config, see U{http://apt.alioth.debian.org/python-apt-doc/apt_pkg/cache.html#Configuration}
+
+ @type sql: pgobject
+ @param sql: database connection
+
+ """
global Cnf, projectB
Cnf = config
projectB = sql
-def do_query(q):
- sys.stderr.write("query: \"%s\" ... " % (q))
+def do_query(query):
+ """
+ Executes a database query. Writes statistics / timing to stderr.
+
+ @type query: string
+ @param query: database query string, passed unmodified
+
+ @return: db result
+
+ @warning: The query is passed B{unmodified}, so be careful what you use this for.
+ """
+ sys.stderr.write("query: \"%s\" ... " % (query))
before = time.time()
- r = projectB.query(q)
+ r = projectB.query(query)
time_diff = time.time()-before
sys.stderr.write("took %.3f seconds.\n" % (time_diff))
if type(r) is int:
################################################################################
def get_suite_id (suite):
+ """
+ Returns database id for given C{suite}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type suite: string
+ @param suite: The name of the suite
+
+ @rtype: int
+ @return: the database id for the given suite
+
+ """
global suite_id_cache
if suite_id_cache.has_key(suite):
return suite_id
def get_section_id (section):
+ """
+ Returns database id for given C{section}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type section: string
+ @param section: The name of the section
+
+ @rtype: int
+ @return: the database id for the given section
+
+ """
global section_id_cache
if section_id_cache.has_key(section):
return section_id
def get_priority_id (priority):
+ """
+ Returns database id for given C{priority}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type priority: string
+ @param priority: The name of the priority
+
+ @rtype: int
+ @return: the database id for the given priority
+
+ """
global priority_id_cache
if priority_id_cache.has_key(priority):
return priority_id
def get_override_type_id (type):
+ """
+ Returns database id for given override C{type}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type type: string
+ @param type: The name of the override type
+
+ @rtype: int
+ @return: the database id for the given override type
+
+ """
global override_type_id_cache
if override_type_id_cache.has_key(type):
return override_type_id
def get_architecture_id (architecture):
+ """
+ Returns database id for given C{architecture}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type architecture: string
+ @param architecture: The name of the override type
+
+ @rtype: int
+ @return: the database id for the given architecture
+
+ """
global architecture_id_cache
if architecture_id_cache.has_key(architecture):
return architecture_id
def get_archive_id (archive):
+ """
+ Returns database id for given C{archive}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type archive: string
+ @param archive: The name of the override type
+
+ @rtype: int
+ @return: the database id for the given archive
+
+ """
global archive_id_cache
archive = archive.lower()
return archive_id
def get_component_id (component):
+ """
+ Returns database id for given C{component}.
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type component: string
+ @param component: The name of the component
+
+ @rtype: int
+ @return: the database id for the given component
+
+ """
global component_id_cache
component = component.lower()
return component_id
def get_location_id (location, component, archive):
+ """
+ Returns database id for the location behind the given combination of
+ - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/}
+ - B{component} - the id of the component as returned by L{get_component_id}
+ - B{archive} - the id of the archive as returned by L{get_archive_id}
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type location: string
+ @param location: the path of the location
+
+ @type component: int
+ @param component: the id of the component
+
+ @type archive: int
+ @param archive: the id of the archive
+
+ @rtype: int
+ @return: the database id for the location
+
+ """
global location_id_cache
cache_key = location + '_' + component + '_' + location
return location_id
def get_source_id (source, version):
+ """
+ Returns database id for the combination of C{source} and C{version}
+ - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
+ - B{version}
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type source: string
+ @param source: source package name
+
+ @type version: string
+ @param version: the source version
+
+ @rtype: int
+ @return: the database id for the source
+
+ """
global source_id_cache
cache_key = source + '_' + version + '_'
return source_id
-def get_suite_version(source, suite, arch):
+def get_suite_version(source, suite):
+ """
+ Returns database id for a combination of C{source} and C{suite}.
+
+ - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
+ - B{suite} - a suite name, eg. I{unstable}
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type source: string
+ @param source: source package name
+
+ @type suite: string
+ @param suite: the suite name
+
+ @rtype: string
+ @return: the version for I{source} in I{suite}
+
+ """
+
global suite_version_cache
cache_key = "%s_%s" % (source, suite)
def get_latest_binary_version_id(binary, section, suite, arch):
global suite_bin_version_cache
cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
+ cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all"))
+ # Check for the cache hit for its arch, then arch all
if suite_bin_version_cache.has_key(cache_key):
return suite_bin_version_cache[cache_key]
+ if suite_bin_version_cache.has_key(cache_key_all):
+ return suite_bin_version_cache[cache_key_all]
+ if cache_preloaded == True:
+ return # package does not exist
- q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
+ q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
- highest_bid, highest_version = None, None
+ if not q.getresult():
+ return False
- for bi in q.getresult():
- if highest_version == None or apt_pkg.VersionCompare(bi[1], highest_version) == 1:
- highest_bid = bi[0]
- highest_version = bi[1]
+ highest_bid = q.getresult()[0][0]
suite_bin_version_cache[cache_key] = highest_bid
return highest_bid
+def preload_binary_id_cache():
+ global suite_bin_version_cache, cache_preloaded
+
+ # Get suite info
+ q = projectB.query("SELECT id FROM suite")
+ suites = q.getresult()
+
+ # Get arch mappings
+ q = projectB.query("SELECT id FROM architecture")
+ arches = q.getresult()
+
+ for suite in suites:
+ for arch in arches:
+ q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0])))
+
+ for bi in q.getresult():
+ cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0])
+ suite_bin_version_cache[cache_key] = int(bi[0])
+
+ cache_preloaded = True
+
+def get_suite_architectures(suite):
+ """
+ Returns list of architectures for C{suite}.
+
+ @type suite: string, int
+ @param suite: the suite name or the suite_id
+
+ @rtype: list
+ @return: the list of architectures for I{suite}
+ """
+
+ suite_id = None
+ if type(suite) == str:
+ suite_id = get_suite_id(suite)
+ elif type(suite) == int:
+ suite_id = suite
+ else:
+ return None
+
+ sql = """ SELECT a.arch_string FROM suite_architectures sa
+ JOIN architecture a ON (a.id = sa.architecture)
+ WHERE suite='%s' """ % (suite_id)
+
+ q = projectB.query(sql)
+ return map(lambda x: x[0], q.getresult())
+
+def get_suite_untouchable(suite):
+ """
+ Returns true if the C{suite} is untouchable, otherwise false.
+
+ @type suite: string, int
+ @param suite: the suite name or the suite_id
+
+ @rtype: boolean
+ @return: status of suite
+ """
+
+ suite_id = None
+ if type(suite) == str:
+ suite_id = get_suite_id(suite.lower())
+ elif type(suite) == int:
+ suite_id = suite
+ else:
+ return None
+
+ sql = """ SELECT untouchable FROM suite WHERE id='%s' """ % (suite_id)
+
+ q = projectB.query(sql)
+ if q.getresult()[0][0] == "f":
+ return False
+ else:
+ return True
+
################################################################################
def get_or_set_maintainer_id (maintainer):
+ """
+ If C{maintainer} does not have an entry in the maintainer table yet, create one
+ and return the new id.
+ If C{maintainer} already has an entry, simply return the existing id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type maintainer: string
+ @param maintainer: the maintainer name
+
+ @rtype: int
+ @return: the database id for the maintainer
+
+ """
global maintainer_id_cache
if maintainer_id_cache.has_key(maintainer):
################################################################################
def get_or_set_keyring_id (keyring):
+ """
+ If C{keyring} does not have an entry in the C{keyrings} table yet, create one
+ and return the new id.
+ If C{keyring} already has an entry, simply return the existing id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type keyring: string
+ @param keyring: the keyring name
+
+ @rtype: int
+ @return: the database id for the keyring
+
+ """
global keyring_id_cache
if keyring_id_cache.has_key(keyring):
################################################################################
def get_or_set_uid_id (uid):
+ """
+ If C{uid} does not have an entry in the uid table yet, create one
+ and return the new id.
+ If C{uid} already has an entry, simply return the existing id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type uid: string
+ @param uid: the uid.
+
+ @rtype: int
+ @return: the database id for the uid
+
+ """
+
global uid_id_cache
if uid_id_cache.has_key(uid):
################################################################################
def get_or_set_fingerprint_id (fingerprint):
+ """
+ If C{fingerprint} does not have an entry in the fingerprint table yet, create one
+ and return the new id.
+ If C{fingerprint} already has an entry, simply return the existing id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type fingerprint: string
+ @param fingerprint: the fingerprint
+
+ @rtype: int
+ @return: the database id for the fingerprint
+
+ """
global fingerprint_id_cache
if fingerprint_id_cache.has_key(fingerprint):
################################################################################
def get_files_id (filename, size, md5sum, location_id):
+ """
+ Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an
+ existing copy.
+
+ The database is queried using the C{filename} and C{location_id}. If a file does exist
+ at that location, the existing size and md5sum are checked against the provided
+ parameters. A size or checksum mismatch returns -2. If more than one entry is
+ found within the database, a -1 is returned, no result returns None, otherwise
+ the file id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type filename: string
+ @param filename: the filename of the file to check against the DB
+
+ @type size: int
+ @param size: the size of the file to check against the DB
+
+ @type md5sum: string
+ @param md5sum: the md5sum of the file to check against the DB
+
+ @type location_id: int
+ @param location_id: the id of the location as returned by L{get_location_id}
+
+ @rtype: int / None
+ @return: Various return values are possible:
+ - -2: size/checksum error
+ - -1: more than one file found in database
+ - None: no file found in database
+ - int: file id
+
+ """
global files_id_cache
cache_key = "%s_%d" % (filename, location_id)
################################################################################
def get_or_set_queue_id (queue):
+ """
+ If C{queue} does not have an entry in the queue table yet, create one
+ and return the new id.
+ If C{queue} already has an entry, simply return the existing id.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type queue: string
+ @param queue: the queue name (no full path)
+
+ @rtype: int
+ @return: the database id for the queue
+
+ """
global queue_id_cache
if queue_id_cache.has_key(queue):
################################################################################
def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
+ """
+ Insert a new entry into the files table and return its id.
+
+ @type filename: string
+ @param filename: the filename
+
+ @type size: int
+ @param size: the size in bytes
+
+ @type md5sum: string
+ @param md5sum: md5sum of the file
+
+ @type sha1sum: string
+ @param sha1sum: sha1sum of the file
+
+ @type sha256sum: string
+ @param sha256sum: sha256sum of the file
+
+ @type location_id: int
+ @param location_id: the id of the location as returned by L{get_location_id}
+
+ @rtype: int
+ @return: the database id for the new file
+
+ """
global files_id_cache
projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
################################################################################
def get_maintainer (maintainer_id):
+ """
+ Return the name of the maintainer behind C{maintainer_id}.
+
+ Results are kept in a cache during runtime to minimize database queries.
+
+ @type maintainer_id: int
+ @param maintainer_id: the id of the maintainer, eg. from L{get_or_set_maintainer_id}
+
+ @rtype: string
+ @return: the name of the maintainer
+
+ """
global maintainer_cache
if not maintainer_cache.has_key(maintainer_id):
################################################################################
def get_suites(pkgname, src=False):
+ """
+ Return the suites in which C{pkgname} can be found. If C{src} is True query for source
+ package, else binary package.
+
+ @type pkgname: string
+ @param pkgname: name of the package
+
+ @type src: bool
+ @param src: if True look for source packages, false (default) looks for binary.
+
+ @rtype: list
+ @return: list of suites, or empty list if no match
+
+ """
if src:
- sql = "select suite_name from source, src_associations,suite where source.id=src_associations.source and source.source='%s' and src_associations.suite = suite.id"%pkgname
+ sql = """
+ SELECT suite_name
+ FROM source,
+ src_associations,
+ suite
+ WHERE source.id = src_associations.source
+ AND source.source = '%s'
+ AND src_associations.suite = suite.id
+ """ % (pkgname)
else:
- sql = "select suite_name from binaries, bin_associations,suite where binaries.id=bin_associations.bin and package='%s' and bin_associations.suite = suite.id"%pkgname
+ sql = """
+ SELECT suite_name
+ FROM binaries,
+ bin_associations,
+ suite
+ WHERE binaries.id = bin_associations.bin
+ AND package = '%s'
+ AND bin_associations.suite = suite.id
+ """ % (pkgname)
+
q = projectB.query(sql)
return map(lambda x: x[0], q.getresult())
+
################################################################################
-def get_or_set_contents_file_id(file):
- global content_file_id_cache
+def get_new_comments(package):
+ """
+ Returns all the possible comments attached to C{package} in NEW. All versions.
- if not content_file_id_cache.has_key(file):
- sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file
- q = projectB.query(sql_select)
- if not q.getresult():
- # since this can be called within a transaction, we can't use currval
- q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file))
- content_file_id_cache[file] = int(q.getresult()[0][0])
- return content_file_id_cache[file]
+ @type package: string
+ @param package: name of the package
-################################################################################
+ @rtype: list
+ @return: list of strings containing comments for all versions from all authors for package
+ """
-def get_or_set_contents_path_id(path):
- global content_path_id_cache
+ comments = []
+ query = projectB.query(""" SELECT version, comment, author, notedate
+ FROM new_comments
+ WHERE package = '%s'
+ ORDER BY notedate
+ """ % (package))
- if not content_path_id_cache.has_key(path):
- sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path
- q = projectB.query(sql_select)
- if not q.getresult():
- # since this can be called within a transaction, we can't use currval
- q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path))
- content_path_id_cache[path] = int(q.getresult()[0][0])
- return content_path_id_cache[path]
+ for row in query.getresult():
+ comments.append("\nAuthor: %s\nVersion: %s\nTimestamp: %s\n\n%s\n" % (row[2], row[0], row[3], row[1]))
+ comments.append("-"*72)
-################################################################################
+ return comments
+
+def has_new_comment(package, version, ignore_trainee=False):
+ """
+ Returns true if the given combination of C{package}, C{version} has a comment.
+ If C{ignore_trainee} is true, comments from a trainee are ignored.
+
+ @type package: string
+ @param package: name of the package
-def insert_content_path(bin_id, fullpath):
- global insert_contents_file_cache
- cache_key = "%s_%s" % (bin_id, fullpath)
+ @type version: string
+ @param version: package version
- # have we seen this contents before?
- # probably only revelant during package import
- if insert_contents_file_cache.has_key(cache_key):
- return
+ @type ignore_trainee: boolean
+ @param ignore_trainee: ignore trainee comments
- # split the path into basename, and pathname
- (path, file) = os.path.split(fullpath)
+ @rtype: boolean
+ @return: true/false
+ """
- # Get the necessary IDs ...
- file_id = get_or_set_contents_file_id(file)
- path_id = get_or_set_contents_path_id(path)
+ trainee=""
+ if ignore_trainee:
+ trainee='AND trainee=false'
- # Determine if we're inserting a duplicate row
- q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
- if q.getresult():
- # Yes we are, return without doing the insert
- return
+ exists = projectB.query("""SELECT 1 FROM new_comments
+ WHERE package='%s'
+ AND version='%s'
+ %s
+ LIMIT 1"""
+ % (package, version, trainee) ).getresult()
+
+ if not exists:
+ return False
+ else:
+ return True
+
+def add_new_comment(package, version, comment, author, trainee=False):
+ """
+ Add a new comment for C{package}, C{version} written by C{author}
+
+ @type package: string
+ @param package: name of the package
+
+ @type version: string
+ @param version: package version
+
+ @type comment: string
+ @param comment: the comment
+
+ @type author: string
+ @param author: the authorname
+
+ @type trainee: boolean
+ @param trainee: trainee comment
+ """
+
+ projectB.query(""" INSERT INTO new_comments (package, version, comment, author, trainee)
+ VALUES ('%s', '%s', '%s', '%s', '%s')
+ """ % (package, version, pg.escape_string(comment), pg.escape_string(author), trainee))
- # Put them into content_assiocations
- projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
return
+
+def delete_new_comments(package, version):
+ """
+ Delete a comment for C{package}, C{version}, if one exists
+ """
+
+ projectB.query(""" DELETE FROM new_comments
+ WHERE package = '%s' AND version = '%s'
+ """ % (package, version))
+ return
+
+def delete_all_new_comments(package):
+ """
+ Delete all comments for C{package}, if they exist
+ """
+
+ projectB.query(""" DELETE FROM new_comments
+ WHERE package = '%s'
+ """ % (package))
+ return
+
+################################################################################
+def copy_temporary_contents(package, version, arch, deb, reject):
+ """
+ copy the previously stored contents from the temp table to the permanant one
+
+ during process-unchecked, the deb should have been scanned and the
+ contents stored in pending_content_associations
+ """
+
+ # first see if contents exist:
+
+ arch_id = get_architecture_id (arch)
+
+ exists = projectB.query("""SELECT 1 FROM pending_content_associations
+ WHERE package='%s'
+ AND version='%s'
+ AND architecture=%d LIMIT 1"""
+ % (package, version, arch_id) ).getresult()
+
+ if not exists:
+ # This should NOT happen. We should have added contents
+ # during process-unchecked. if it did, log an error, and send
+ # an email.
+ subst = {
+ "__PACKAGE__": package,
+ "__VERSION__": version,
+ "__ARCH__": arch,
+ "__TO_ADDRESS__": Cnf["Dinstall::MyAdminAddress"],
+ "__DAK_ADDRESS__": Cnf["Dinstall::MyEmailAddress"] }
+
+ message = utils.TemplateSubst(subst, Cnf["Dir::Templates"]+"/missing-contents")
+ utils.send_mail( message )
+
+ exists = Binary(deb, reject).scan_package()
+
+ if exists:
+ sql = """INSERT INTO content_associations(binary_pkg,filepath,filename)
+ SELECT currval('binaries_id_seq'), filepath, filename FROM pending_content_associations
+ WHERE package='%s'
+ AND version='%s'
+ AND architecture=%d""" % (package, version, arch_id)
+ projectB.query(sql)
+ projectB.query("""DELETE from pending_content_associations
+ WHERE package='%s'
+ AND version='%s'
+ AND architecture=%d""" % (package, version, arch_id))
+
+ return exists