From: Mike O'Connor Date: Mon, 9 Feb 2009 07:46:48 +0000 (-0500) Subject: Merge branch 'psycopg2' into content_generation X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=cd5b29ddfd8de263c085f494b9573d683913f6f3;hp=-c;p=dak.git Merge branch 'psycopg2' into content_generation Conflicts: daklib/database.py Signed-off-by: Mike O'Connor --- cd5b29ddfd8de263c085f494b9573d683913f6f3 diff --combined dak/dak.py index d04eebc2,a08f20e0..fc22ec5c --- a/dak/dak.py +++ b/dak/dak.py @@@ -1,6 -1,11 +1,11 @@@ #!/usr/bin/env python - """Wrapper to launch dak functionality""" + """ + Wrapper to launch dak functionality + + G{importgraph} + + """ # Copyright (C) 2005, 2006 Anthony Towns # Copyright (C) 2006 James Troup @@@ -103,10 -108,10 +108,12 @@@ def init() ("make-suite-file-list", "Generate lists of packages per suite for apt-ftparchive"), + ("make-pkg-file-mapping", + "Generate package <-> file mapping"), ("generate-releases", "Generate Release files"), + ("generate-contents", + "Generate contest files"), ("generate-index-diffs", "Generate .diff/Index files"), ("clean-suites", @@@ -138,8 -143,6 +145,8 @@@ "Check for users with no packages in the archive"), ("import-archive", "Populate SQL database based from an archive tree"), + ("import-contents", + "Populate SQL database with Contents files"), ("import-keyring", "Populate fingerprint/uid table based on a new/updated keyring"), ("import-ldap-fingerprints", @@@ -156,8 -159,6 +163,6 @@@ "Generates Maintainers file for BTS etc"), ("make-overrides", "Generates override files"), - ("mirror-split", - "Split the pool/ by architecture groups"), ("poolize", "Move packages from dists/ to pool/"), ("reject-proposed-updates", @@@ -168,6 -169,8 +173,8 @@@ "Split queue/done into a date-based hierarchy"), ("stats", "Generate statistics"), + ("bts-categorize", + "Categorize uncategorized bugs filed against ftp.debian.org"), ] return functionality diff --combined dak/dakdb/update2.py index 0cf747e9,71b43fa7..e411662c --- a/dak/dakdb/update2.py +++ b/dak/dakdb/update2.py @@@ -1,7 -1,7 +1,7 @@@ #!/usr/bin/env python # coding=utf8 - # Debian Archive Kit Database Update Script + """ Database Update Script - debversion """ # Copyright © 2008 Michael Casadevall # Copyright © 2008 Roger Leigh @@@ -26,13 -26,11 +26,13 @@@ import psycopg2, tim ################################################################################ def do_update(self): +vvvvvvvvvvvvvvvvvvvv print "Note: to be able to enable the the PL/Perl (plperl) procedural language, we do" print "need postgresql-plperl-$postgres-version installed. Make sure that this is the" print "case before you continue. Interrupt if it isn't, sleeping 5 seconds now." print "(We need to be database superuser for this to work!)" time.sleep (5) +^^^^^^^^^^^^^^^^^^^^ try: c = self.db.cursor() diff --combined dak/process_accepted.py index e88e7904,683b1191..07258018 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@@ -1,6 -1,6 +1,6 @@@ #!/usr/bin/env python - # Installs Debian packages from queue/accepted into the pool + """ Installs Debian packages from queue/accepted into the pool """ # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup # This program is free software; you can redistribute it and/or modify @@@ -30,12 -30,13 +30,13 @@@ ############################################################################### import errno, fcntl, os, sys, time, re -import apt_pkg +import apt_pkg, tarfile, commands from daklib import database from daklib import logging from daklib import queue from daklib import utils from daklib.dak_exceptions import * + from daklib.regexes import re_default_answer, re_issource, re_fdnic ############################################################################### @@@ -96,10 -97,8 +97,10 @@@ class Urgency_Log else: os.unlink(self.log_filename) + ############################################################################### + def reject (str, prefix="Rejected: "): global reject_message if str: @@@ -111,32 -110,32 +112,32 @@@ def check(): propogate={} nopropogate={} - for file in files.keys(): + for checkfile in files.keys(): # The .orig.tar.gz can disappear out from under us is it's a # duplicate of one in the archive. - if not files.has_key(file): + if not files.has_key(checkfile): continue # Check that the source still exists - if files[file]["type"] == "deb": - source_version = files[file]["source version"] - source_package = files[file]["source package"] + if files[checkfile]["type"] == "deb": + source_version = files[checkfile]["source version"] + source_package = files[checkfile]["source package"] if not changes["architecture"].has_key("source") \ and not Upload.source_exists(source_package, source_version, changes["distribution"].keys()): - reject("no source found for %s %s (%s)." % (source_package, source_version, file)) + reject("no source found for %s %s (%s)." % (source_package, source_version, checkfile)) # Version and file overwrite checks if not installing_to_stable: - if files[file]["type"] == "deb": - reject(Upload.check_binary_against_db(file), "") - elif files[file]["type"] == "dsc": - reject(Upload.check_source_against_db(file), "") - (reject_msg, is_in_incoming) = Upload.check_dsc_against_db(file) + if files[checkfile]["type"] == "deb": + reject(Upload.check_binary_against_db(checkfile), "") + elif files[checkfile]["type"] == "dsc": + reject(Upload.check_source_against_db(checkfile), "") + (reject_msg, is_in_incoming) = Upload.check_dsc_against_db(checkfile) reject(reject_msg, "") # propogate in the case it is in the override tables: if changes.has_key("propdistribution"): for suite in changes["propdistribution"].keys(): - if Upload.in_override_p(files[file]["package"], files[file]["component"], suite, files[file].get("dbtype",""), file): + if Upload.in_override_p(files[checkfile]["package"], files[checkfile]["component"], suite, files[checkfile].get("dbtype",""), checkfile): propogate[suite] = 1 else: nopropogate[suite] = 1 @@@ -146,11 -145,11 +147,11 @@@ continue changes["distribution"][suite] = 1 - for file in files.keys(): + for checkfile in files.keys(): # Check the package is still in the override tables for suite in changes["distribution"].keys(): - if not Upload.in_override_p(files[file]["package"], files[file]["component"], suite, files[file].get("dbtype",""), file): - reject("%s is NEW for %s." % (file, suite)) + if not Upload.in_override_p(files[checkfile]["package"], files[checkfile]["component"], suite, files[checkfile].get("dbtype",""), checkfile): + reject("%s is NEW for %s." % (checkfile, suite)) ############################################################################### @@@ -222,7 -221,7 +223,7 @@@ def action () while prompt.find(answer) == -1: answer = utils.our_raw_input(prompt) - m = queue.re_default_answer.match(prompt) + m = re_default_answer.match(prompt) if answer == "": answer = m.group(1) answer = answer[:1].upper() @@@ -285,8 -284,8 +286,8 @@@ def install () return # Add the .dsc file to the DB - for file in files.keys(): - if files[file]["type"] == "dsc": + for newfile in files.keys(): + if files[newfile]["type"] == "dsc": package = dsc["source"] version = dsc["version"] # NB: not files[file]["version"], that has no epoch maintainer = dsc["maintainer"] @@@ -297,26 -296,26 +298,26 @@@ changedby_id = database.get_or_set_maintainer_id(changedby) fingerprint_id = database.get_or_set_fingerprint_id(dsc["fingerprint"]) install_date = time.strftime("%Y-%m-%d") - filename = files[file]["pool name"] + file - dsc_component = files[file]["component"] - dsc_location_id = files[file]["location id"] + filename = files[newfile]["pool name"] + newfile + dsc_component = files[newfile]["component"] + dsc_location_id = files[newfile]["location id"] if dsc.has_key("dm-upload-allowed") and dsc["dm-upload-allowed"] == "yes": dm_upload_allowed = "true" else: dm_upload_allowed = "false" - if not files[file].has_key("files id") or not files[file]["files id"]: - files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) + if not files[newfile].has_key("files id") or not files[newfile]["files id"]: + files[newfile]["files id"] = database.set_files_id (filename, files[newfile]["size"], files[newfile]["md5sum"], files[newfile]["sha1sum"], files[newfile]["sha256sum"], dsc_location_id) projectB.query("INSERT INTO source (source, version, maintainer, changedby, file, install_date, sig_fpr, dm_upload_allowed) VALUES ('%s', '%s', %d, %d, %d, '%s', %s, %s)" - % (package, version, maintainer_id, changedby_id, files[file]["files id"], install_date, fingerprint_id, dm_upload_allowed)) + % (package, version, maintainer_id, changedby_id, files[newfile]["files id"], install_date, fingerprint_id, dm_upload_allowed)) for suite in changes["distribution"].keys(): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO src_associations (suite, source) VALUES (%d, currval('source_id_seq'))" % (suite_id)) # Add the source files to the DB (files and dsc_files) - projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files[file]["files id"])) + projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files[newfile]["files id"])) for dsc_file in dsc_files.keys(): - filename = files[file]["pool name"] + dsc_file + filename = files[newfile]["pool name"] + dsc_file # If the .orig.tar.gz is already in the pool, it's # files id is stored in dsc_files by check_dsc(). files_id = dsc_files[dsc_file].get("files id", None) @@@ -345,41 -344,34 +346,41 @@@ # Add the .deb files to the DB - for file in files.keys(): - if files[file]["type"] == "deb": - package = files[file]["package"] - version = files[file]["version"] - maintainer = files[file]["maintainer"] + for newfile in files.keys(): + if files[newfile]["type"] == "deb": + package = files[newfile]["package"] + version = files[newfile]["version"] + maintainer = files[newfile]["maintainer"] maintainer = maintainer.replace("'", "\\'") maintainer_id = database.get_or_set_maintainer_id(maintainer) fingerprint_id = database.get_or_set_fingerprint_id(changes["fingerprint"]) - architecture = files[file]["architecture"] + architecture = files[newfile]["architecture"] architecture_id = database.get_architecture_id (architecture) - type = files[file]["dbtype"] - source = files[file]["source package"] - source_version = files[file]["source version"] - filename = files[file]["pool name"] + file - if not files[file].has_key("location id") or not files[file]["location id"]: - files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i()) - if not files[file].has_key("files id") or not files[file]["files id"]: - files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], files[file]["location id"]) + filetype = files[newfile]["dbtype"] + source = files[newfile]["source package"] + source_version = files[newfile]["source version"] + filename = files[newfile]["pool name"] + newfile + if not files[newfile].has_key("location id") or not files[newfile]["location id"]: + files[newfile]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[newfile]["component"],utils.where_am_i()) + if not files[newfile].has_key("files id") or not files[newfile]["files id"]: + files[newfile]["files id"] = database.set_files_id (filename, files[newfile]["size"], files[newfile]["md5sum"], files[newfile]["sha1sum"], files[newfile]["sha256sum"], files[newfile]["location id"]) source_id = database.get_source_id (source, source_version) if source_id: projectB.query("INSERT INTO binaries (package, version, maintainer, source, architecture, file, type, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, %d, '%s', %d)" - % (package, version, maintainer_id, source_id, architecture_id, files[file]["files id"], type, fingerprint_id)) + % (package, version, maintainer_id, source_id, architecture_id, files[newfile]["files id"], filetype, fingerprint_id)) else: - raise NoSourceFieldError, "Unable to find a source id for %s (%s), %s, file %s, type %s, signed by %s" % (package, version, architecture, file, type, sig_fpr) + raise NoSourceFieldError, "Unable to find a source id for %s (%s), %s, file %s, type %s, signed by %s" % (package, version, architecture, newfile, filetype, changes["fingerprint"]) for suite in changes["distribution"].keys(): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) + # insert contents into the database + contents = utils.generate_contents_information(file) + q = projectB.query("SELECT currval('binaries_id_seq')") + bin_id = int(q.getresult()[0][0]) + for file in contents: + database.insert_content_path(bin_id, file) + # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the # "Directory:" field in the Sources.gz file) works. @@@ -395,7 -387,7 +396,7 @@@ continue # First move the files to the new location legacy_filename = qid["path"] + qid["filename"] - pool_location = utils.poolify (changes["source"], files[file]["component"]) + pool_location = utils.poolify (changes["source"], files[newfile]["component"]) pool_filename = pool_location + os.path.basename(qid["filename"]) destination = Cnf["Dir::Pool"] + pool_location utils.move(legacy_filename, destination) @@@ -423,11 -415,11 +424,11 @@@ projectB.query("UPDATE dsc_files SET file = %s WHERE source = %s AND file = %s" % (new_files_id, database.get_source_id(changes["source"], changes["version"]), orig_tar_id)) # Install the files into the pool - for file in files.keys(): - destination = Cnf["Dir::Pool"] + files[file]["pool name"] + file - utils.move(file, destination) - Logger.log(["installed", file, files[file]["type"], files[file]["size"], files[file]["architecture"]]) - install_bytes += float(files[file]["size"]) + for newfile in files.keys(): + destination = Cnf["Dir::Pool"] + files[newfile]["pool name"] + newfile + utils.move(newfile, destination) + Logger.log(["installed", newfile, files[newfile]["type"], files[newfile]["size"], files[newfile]["architecture"]]) + install_bytes += float(files[newfile]["size"]) # Copy the .changes file across for suite which need it. copy_changes = {} @@@ -442,6 -434,7 +443,6 @@@ utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest) for dest in copy_dot_dak.keys(): utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest) - projectB.query("COMMIT WORK") # Move the .changes into the 'done' directory @@@ -465,14 -458,14 +466,14 @@@ dest_dir = Cnf["Dir::QueueBuild"] if Cnf.FindB("Dinstall::SecurityQueueBuild"): dest_dir = os.path.join(dest_dir, suite) - for file in files.keys(): - dest = os.path.join(dest_dir, file) + for newfile in files.keys(): + dest = os.path.join(dest_dir, newfile) # Remove it from the list of packages for later processing by apt-ftparchive projectB.query("UPDATE queue_build SET in_queue = 'f', last_used = '%s' WHERE filename = '%s' AND suite = %s" % (now_date, dest, suite_id)) if not Cnf.FindB("Dinstall::SecurityQueueBuild"): # Update the symlink to point to the new location in the pool - pool_location = utils.poolify (changes["source"], files[file]["component"]) - src = os.path.join(Cnf["Dir::Pool"], pool_location, os.path.basename(file)) + pool_location = utils.poolify (changes["source"], files[newfile]["component"]) + src = os.path.join(Cnf["Dir::Pool"], pool_location, os.path.basename(newfile)) if os.path.islink(dest): os.unlink(dest) os.symlink(src, dest) @@@ -501,8 -494,8 +502,8 @@@ def stable_install (summary, short_summ projectB.query("BEGIN WORK") # Add the source to stable (and remove it from proposed-updates) - for file in files.keys(): - if files[file]["type"] == "dsc": + for newfile in files.keys(): + if files[newfile]["type"] == "dsc": package = dsc["source"] version = dsc["version"]; # NB: not files[file]["version"], that has no epoch q = projectB.query("SELECT id FROM source WHERE source = '%s' AND version = '%s'" % (package, version)) @@@ -516,11 -509,11 +517,11 @@@ projectB.query("INSERT INTO src_associations (suite, source) VALUES ('%s', '%s')" % (suite_id, source_id)) # Add the binaries to stable (and remove it/them from proposed-updates) - for file in files.keys(): - if files[file]["type"] == "deb": - package = files[file]["package"] - version = files[file]["version"] - architecture = files[file]["architecture"] + for newfile in files.keys(): + if files[newfile]["type"] == "deb": + package = files[newfile]["package"] + version = files[newfile]["version"] + architecture = files[newfile]["architecture"] q = projectB.query("SELECT b.id FROM binaries b, architecture a WHERE b.package = '%s' AND b.version = '%s' AND (a.arch_string = '%s' OR a.arch_string = 'all') AND b.architecture = a.id" % (package, version, architecture)) ql = q.getresult() if not ql: @@@ -543,14 -536,14 +544,14 @@@ os.unlink (new_changelog_filename) new_changelog = utils.open_file(new_changelog_filename, 'w') - for file in files.keys(): - if files[file]["type"] == "deb": - new_changelog.write("stable/%s/binary-%s/%s\n" % (files[file]["component"], files[file]["architecture"], file)) - elif utils.re_issource.match(file): - new_changelog.write("stable/%s/source/%s\n" % (files[file]["component"], file)) + for newfile in files.keys(): + if files[newfile]["type"] == "deb": + new_changelog.write("stable/%s/binary-%s/%s\n" % (files[newfile]["component"], files[newfile]["architecture"], newfile)) + elif re_issource.match(newfile): + new_changelog.write("stable/%s/source/%s\n" % (files[newfile]["component"], newfile)) else: - new_changelog.write("%s\n" % (file)) - chop_changes = queue.re_fdnic.sub("\n", changes["changes"]) + new_changelog.write("%s\n" % (newfile)) + chop_changes = re_fdnic.sub("\n", changes["changes"]) new_changelog.write(chop_changes + '\n\n') if os.access(changelog_filename, os.R_OK) != 0: changelog = utils.open_file(changelog_filename) diff --combined dak/update_db.py index ee1e50f8,f9b6e478..8bb88f65 --- a/dak/update_db.py +++ b/dak/update_db.py @@@ -1,6 -1,6 +1,6 @@@ #!/usr/bin/env python - # Debian Archive Kit Database Update Script + """ Database Update Main Script """ # Copyright (C) 2008 Michael Casadevall # This program is free software; you can redistribute it and/or modify @@@ -29,6 -29,7 +29,7 @@@ import psycopg2, sys, fcntl, os import apt_pkg import time + import errno from daklib import database from daklib import utils @@@ -36,7 -37,7 +37,7 @@@ Cnf = None projectB = None -required_database_schema = 3 +required_database_schema = 4 ################################################################################ @@@ -52,7 -53,7 +53,7 @@@ Updates dak's database schema to the la ################################################################################ def update_db_to_zero(self): - # This function will attempt to update a pre-zero database schema to zero + """ This function will attempt to update a pre-zero database schema to zero """ # First, do the sure thing, and create the configuration table try: @@@ -63,7 -64,7 +64,7 @@@ name TEXT UNIQUE NOT NULL, value TEXT );""") - c.execute("INSERT INTO config VALUES ( nextval('config_id_seq'), 'db_revision', '0')"); + c.execute("INSERT INTO config VALUES ( nextval('config_id_seq'), 'db_revision', '0')") self.db.commit() except psycopg2.ProgrammingError: @@@ -84,7 -85,7 +85,7 @@@ try: c = self.db.cursor() - q = c.execute("SELECT value FROM config WHERE name = 'db_revision';"); + q = c.execute("SELECT value FROM config WHERE name = 'db_revision';") return c.fetchone()[0] except psycopg2.ProgrammingError: diff --combined daklib/database.py index 9cefc381,1882ad8b..3cbb67b7 --- a/daklib/database.py +++ b/daklib/database.py @@@ -1,7 -1,16 +1,16 @@@ #!/usr/bin/env python - # DB access fucntions - # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup + """ DB access functions + @group readonly: get_suite_id, get_section_id, get_priority_id, get_override_type_id, + get_architecture_id, get_archive_id, get_component_id, get_location_id, + get_source_id, get_suite_version, get_files_id, get_maintainer, get_suites + @group read/write: get_or_set*, set_files_id + + @contact: Debian FTP Master + @copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup + @copyright: 2009 Joerg Jaspert + @license: GNU General Public License version 2 or later + """ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@@ -19,48 -28,65 +28,70 @@@ ################################################################################ - import os, sys, time, types, apt_pkg + import sys + import time + import types ################################################################################ - Cnf = None - projectB = None - suite_id_cache = {} - section_id_cache = {} - priority_id_cache = {} - override_type_id_cache = {} - architecture_id_cache = {} - archive_id_cache = {} - component_id_cache = {} - location_id_cache = {} - maintainer_id_cache = {} - keyring_id_cache = {} - source_id_cache = {} - files_id_cache = {} - maintainer_cache = {} - fingerprint_id_cache = {} - queue_id_cache = {} - uid_id_cache = {} - suite_version_cache = {} + Cnf = None #: Configuration, apt_pkg.Configuration + projectB = None #: database connection, pgobject + suite_id_cache = {} #: cache for suites + section_id_cache = {} #: cache for sections + priority_id_cache = {} #: cache for priorities + override_type_id_cache = {} #: cache for overrides + architecture_id_cache = {} #: cache for architectures + archive_id_cache = {} #: cache for archives + component_id_cache = {} #: cache for components + location_id_cache = {} #: cache for locations + maintainer_id_cache = {} #: cache for maintainers + keyring_id_cache = {} #: cache for keyrings + source_id_cache = {} #: cache for sources + files_id_cache = {} #: cache for files + maintainer_cache = {} #: cache for maintainer names + fingerprint_id_cache = {} #: cache for fingerprints + queue_id_cache = {} #: cache for queues + uid_id_cache = {} #: cache for uids + suite_version_cache = {} #: cache for suite_versions (packages) +suite_bin_version_cache = {} +content_path_id_cache = {} +content_file_id_cache = {} +insert_contents_file_cache = {} +cache_preloaded = False ################################################################################ def init (config, sql): + """ + database module init. + + @type config: apt_pkg.Configuration + @param config: apt config, see U{http://apt.alioth.debian.org/python-apt-doc/apt_pkg/cache.html#Configuration} + + @type sql: pgobject + @param sql: database connection + + """ global Cnf, projectB Cnf = config projectB = sql - def do_query(q): - sys.stderr.write("query: \"%s\" ... " % (q)) + def do_query(query): + """ + Executes a database query. Writes statistics / timing to stderr. + + @type query: string + @param query: database query string, passed unmodified + + @return: db result + + @warning: The query is passed B{unmodified}, so be careful what you use this for. + """ + sys.stderr.write("query: \"%s\" ... " % (query)) before = time.time() - r = projectB.query(q) + r = projectB.query(query) time_diff = time.time()-before sys.stderr.write("took %.3f seconds.\n" % (time_diff)) if type(r) is int: @@@ -74,6 -100,17 +105,17 @@@ ################################################################################ def get_suite_id (suite): + """ + Returns database id for given C{suite}. + Results are kept in a cache during runtime to minimize database queries. + + @type suite: string + @param suite: The name of the suite + + @rtype: int + @return: the database id for the given suite + + """ global suite_id_cache if suite_id_cache.has_key(suite): @@@ -90,6 -127,17 +132,17 @@@ return suite_id def get_section_id (section): + """ + Returns database id for given C{section}. + Results are kept in a cache during runtime to minimize database queries. + + @type section: string + @param section: The name of the section + + @rtype: int + @return: the database id for the given section + + """ global section_id_cache if section_id_cache.has_key(section): @@@ -106,6 -154,17 +159,17 @@@ return section_id def get_priority_id (priority): + """ + Returns database id for given C{priority}. + Results are kept in a cache during runtime to minimize database queries. + + @type priority: string + @param priority: The name of the priority + + @rtype: int + @return: the database id for the given priority + + """ global priority_id_cache if priority_id_cache.has_key(priority): @@@ -122,6 -181,17 +186,17 @@@ return priority_id def get_override_type_id (type): + """ + Returns database id for given override C{type}. + Results are kept in a cache during runtime to minimize database queries. + + @type type: string + @param type: The name of the override type + + @rtype: int + @return: the database id for the given override type + + """ global override_type_id_cache if override_type_id_cache.has_key(type): @@@ -138,6 -208,17 +213,17 @@@ return override_type_id def get_architecture_id (architecture): + """ + Returns database id for given C{architecture}. + Results are kept in a cache during runtime to minimize database queries. + + @type architecture: string + @param architecture: The name of the override type + + @rtype: int + @return: the database id for the given architecture + + """ global architecture_id_cache if architecture_id_cache.has_key(architecture): @@@ -154,6 -235,17 +240,17 @@@ return architecture_id def get_archive_id (archive): + """ + Returns database id for given C{archive}. + Results are kept in a cache during runtime to minimize database queries. + + @type archive: string + @param archive: The name of the override type + + @rtype: int + @return: the database id for the given archive + + """ global archive_id_cache archive = archive.lower() @@@ -172,6 -264,17 +269,17 @@@ return archive_id def get_component_id (component): + """ + Returns database id for given C{component}. + Results are kept in a cache during runtime to minimize database queries. + + @type component: string + @param component: The name of the component + + @rtype: int + @return: the database id for the given component + + """ global component_id_cache component = component.lower() @@@ -190,6 -293,26 +298,26 @@@ return component_id def get_location_id (location, component, archive): + """ + Returns database id for the location behind the given combination of + - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/} + - B{component} - the id of the component as returned by L{get_component_id} + - B{archive} - the id of the archive as returned by L{get_archive_id} + Results are kept in a cache during runtime to minimize database queries. + + @type location: string + @param location: the path of the location + + @type component: int + @param component: the id of the component + + @type archive: int + @param archive: the id of the archive + + @rtype: int + @return: the database id for the location + + """ global location_id_cache cache_key = location + '_' + component + '_' + location @@@ -213,6 -336,22 +341,22 @@@ return location_id def get_source_id (source, version): + """ + Returns database id for the combination of C{source} and C{version} + - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} + - B{version} + Results are kept in a cache during runtime to minimize database queries. + + @type source: string + @param source: source package name + + @type version: string + @param version: the source version + + @rtype: int + @return: the database id for the source + + """ global source_id_cache cache_key = source + '_' + version + '_' @@@ -229,7 -368,25 +373,26 @@@ return source_id - def get_suite_version(source, suite, arch): + def get_suite_version(source, suite): + """ + Returns database id for a combination of C{source} and C{suite}. + + - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} + - B{suite} - a suite name, eg. I{unstable} + + Results are kept in a cache during runtime to minimize database queries. + + @type source: string + @param source: source package name + + @type suite: string + @param suite: the suite name + + @rtype: string + @return: the version for I{source} in I{suite} + + """ ++ global suite_version_cache cache_key = "%s_%s" % (source, suite) @@@ -252,53 -409,23 +415,67 @@@ return version +def get_latest_binary_version_id(binary, section, suite, arch): + global suite_bin_version_cache + cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch) + cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all")) + + # Check for the cache hit for its arch, then arch all + if suite_bin_version_cache.has_key(cache_key): + return suite_bin_version_cache[cache_key] + if suite_bin_version_cache.has_key(cache_key_all): + return suite_bin_version_cache[cache_key_all] + if cache_preloaded == True: + return # package does not exist + + q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section))) + + if not q.getresult(): + return False + + highest_bid = q.getresult()[0][0] + + suite_bin_version_cache[cache_key] = highest_bid + return highest_bid + +def preload_binary_id_cache(): + global suite_bin_version_cache, cache_preloaded + + # Get suite info + q = projectB.query("SELECT id FROM suite") + suites = q.getresult() + + # Get arch mappings + q = projectB.query("SELECT id FROM architecture") + arches = q.getresult() + + for suite in suites: + for arch in arches: + q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0]))) + + for bi in q.getresult(): + cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0]) + suite_bin_version_cache[cache_key] = int(bi[0]) + + cache_preloaded = True + ################################################################################ def get_or_set_maintainer_id (maintainer): + """ + If C{maintainer} does not have an entry in the maintainer table yet, create one + and return the new id. + If C{maintainer} already has an entry, simply return the existing id. + + Results are kept in a cache during runtime to minimize database queries. + + @type maintainer: string + @param maintainer: the maintainer name + + @rtype: int + @return: the database id for the maintainer + + """ global maintainer_id_cache if maintainer_id_cache.has_key(maintainer): @@@ -316,6 -443,20 +493,20 @@@ ################################################################################ def get_or_set_keyring_id (keyring): + """ + If C{keyring} does not have an entry in the C{keyrings} table yet, create one + and return the new id. + If C{keyring} already has an entry, simply return the existing id. + + Results are kept in a cache during runtime to minimize database queries. + + @type keyring: string + @param keyring: the keyring name + + @rtype: int + @return: the database id for the keyring + + """ global keyring_id_cache if keyring_id_cache.has_key(keyring): @@@ -333,6 -474,21 +524,21 @@@ ################################################################################ def get_or_set_uid_id (uid): + """ + If C{uid} does not have an entry in the uid table yet, create one + and return the new id. + If C{uid} already has an entry, simply return the existing id. + + Results are kept in a cache during runtime to minimize database queries. + + @type uid: string + @param uid: the uid. + + @rtype: int + @return: the database id for the uid + + """ + global uid_id_cache if uid_id_cache.has_key(uid): @@@ -350,6 -506,20 +556,20 @@@ ################################################################################ def get_or_set_fingerprint_id (fingerprint): + """ + If C{fingerprint} does not have an entry in the fingerprint table yet, create one + and return the new id. + If C{fingerprint} already has an entry, simply return the existing id. + + Results are kept in a cache during runtime to minimize database queries. + + @type fingerprint: string + @param fingerprint: the fingerprint + + @rtype: int + @return: the database id for the fingerprint + + """ global fingerprint_id_cache if fingerprint_id_cache.has_key(fingerprint): @@@ -367,6 -537,38 +587,38 @@@ ################################################################################ def get_files_id (filename, size, md5sum, location_id): + """ + Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an + existing copy. + + The database is queried using the C{filename} and C{location_id}. If a file does exist + at that location, the existing size and md5sum are checked against the provided + parameters. A size or checksum mismatch returns -2. If more than one entry is + found within the database, a -1 is returned, no result returns None, otherwise + the file id. + + Results are kept in a cache during runtime to minimize database queries. + + @type filename: string + @param filename: the filename of the file to check against the DB + + @type size: int + @param size: the size of the file to check against the DB + + @type md5sum: string + @param md5sum: the md5sum of the file to check against the DB + + @type location_id: int + @param location_id: the id of the location as returned by L{get_location_id} + + @rtype: int / None + @return: Various return values are possible: + - -2: size/checksum error + - -1: more than one file found in database + - None: no file found in database + - int: file id + + """ global files_id_cache cache_key = "%s_%d" % (filename, location_id) @@@ -393,6 -595,20 +645,20 @@@ ################################################################################ def get_or_set_queue_id (queue): + """ + If C{queue} does not have an entry in the queue table yet, create one + and return the new id. + If C{queue} already has an entry, simply return the existing id. + + Results are kept in a cache during runtime to minimize database queries. + + @type queue: string + @param queue: the queue name (no full path) + + @rtype: int + @return: the database id for the queue + + """ global queue_id_cache if queue_id_cache.has_key(queue): @@@ -410,6 -626,31 +676,31 @@@ ################################################################################ def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id): + """ + Insert a new entry into the files table and return its id. + + @type filename: string + @param filename: the filename + + @type size: int + @param size: the size in bytes + + @type md5sum: string + @param md5sum: md5sum of the file + + @type sha1sum: string + @param sha1sum: sha1sum of the file + + @type sha256sum: string + @param sha256sum: sha256sum of the file + + @type location_id: int + @param location_id: the id of the location as returned by L{get_location_id} + + @rtype: int + @return: the database id for the new file + + """ global files_id_cache projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id)) @@@ -429,6 -670,18 +720,18 @@@ ################################################################################ def get_maintainer (maintainer_id): + """ + Return the name of the maintainer behind C{maintainer_id}. + + Results are kept in a cache during runtime to minimize database queries. + + @type maintainer_id: int + @param maintainer_id: the id of the maintainer, eg. from L{get_or_set_maintainer_id} + + @rtype: string + @return: the name of the maintainer + + """ global maintainer_cache if not maintainer_cache.has_key(maintainer_id): @@@ -440,65 -693,40 +743,96 @@@ ################################################################################ def get_suites(pkgname, src=False): + """ + Return the suites in which C{pkgname} can be found. If C{src} is True query for source + package, else binary package. + + @type pkgname: string + @param pkgname: name of the package + + @type src: bool + @param src: if True look for source packages, false (default) looks for binary. + + @rtype: list + @return: list of suites, or empty list if no match + + """ if src: - sql = "select suite_name from source, src_associations,suite where source.id=src_associations.source and source.source='%s' and src_associations.suite = suite.id"%pkgname + sql = """ + SELECT suite_name + FROM source, + src_associations, + suite + WHERE source.id = src_associations.source + AND source.source = '%s' + AND src_associations.suite = suite.id + """ % (pkgname) else: - sql = "select suite_name from binaries, bin_associations,suite where binaries.id=bin_associations.bin and package='%s' and bin_associations.suite = suite.id"%pkgname + sql = """ + SELECT suite_name + FROM binaries, + bin_associations, + suite + WHERE binaries.id = bin_associations.bin + AND package = '%s' + AND bin_associations.suite = suite.id + """ % (pkgname) + q = projectB.query(sql) return map(lambda x: x[0], q.getresult()) + +################################################################################ + +def get_or_set_contents_file_id(file): + global content_file_id_cache + + if not content_file_id_cache.has_key(file): + sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file + q = projectB.query(sql_select) + if not q.getresult(): + # since this can be called within a transaction, we can't use currval + q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file)) + content_file_id_cache[file] = int(q.getresult()[0][0]) + return content_file_id_cache[file] + +################################################################################ + +def get_or_set_contents_path_id(path): + global content_path_id_cache + + if not content_path_id_cache.has_key(path): + sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path + q = projectB.query(sql_select) + if not q.getresult(): + # since this can be called within a transaction, we can't use currval + q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path)) + content_path_id_cache[path] = int(q.getresult()[0][0]) + return content_path_id_cache[path] + +################################################################################ + +def insert_content_path(bin_id, fullpath): + global insert_contents_file_cache + cache_key = "%s_%s" % (bin_id, fullpath) + + # have we seen this contents before? + # probably only revelant during package import + if insert_contents_file_cache.has_key(cache_key): + return + + # split the path into basename, and pathname + (path, file) = os.path.split(fullpath) + + # Get the necessary IDs ... + file_id = get_or_set_contents_file_id(file) + path_id = get_or_set_contents_path_id(path) + + # Determine if we're inserting a duplicate row + q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id)) + if q.getresult(): + # Yes we are, return without doing the insert + return + + # Put them into content_assiocations + projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) + return diff --combined daklib/utils.py index 52b902f9,7b822b9d..5e362796 --- a/daklib/utils.py +++ b/daklib/utils.py @@@ -1,10 -1,12 +1,12 @@@ #!/usr/bin/env python # vim:set et ts=4 sw=4: - # Utility functions - # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup + """Utility functions - ################################################################################ + @contact: Debian FTP Master + @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup + @license: GNU General Public License version 2 or later + """ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@@ -20,58 -22,62 +22,62 @@@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - ################################################################################ - - import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \ - sys, tempfile, traceback, stat + import codecs + import commands + import email.Header + import os + import pwd + import select + import socket + import shutil + import sys + import tempfile + import traceback + import stat import apt_pkg import database import time from dak_exceptions import * + from regexes import re_html_escaping, html_escaping, re_single_line_field, \ + re_multi_line_field, re_srchasver, re_verwithext, \ + re_parse_maintainer, re_taint_free, re_gpg_uid ################################################################################ - re_comments = re.compile(r"\#.*") - re_no_epoch = re.compile(r"^\d+\:") - re_no_revision = re.compile(r"-[^-]+$") - re_arch_from_filename = re.compile(r"/binary-[^/]+/") - re_extract_src_version = re.compile (r"(\S+)\s*\((.*)\)") - re_isadeb = re.compile (r"(.+?)_(.+?)_(.+)\.u?deb$") - re_issource = re.compile (r"(.+)_(.+?)\.(orig\.tar\.gz|diff\.gz|tar\.gz|dsc)$") - - re_single_line_field = re.compile(r"^(\S*)\s*:\s*(.*)") - re_multi_line_field = re.compile(r"^\s(.*)") - re_taint_free = re.compile(r"^[-+~/\.\w]+$") - - re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>") - re_gpg_uid = re.compile('^uid.*<([^>]*)>') - - re_srchasver = re.compile(r"^(\S+)\s+\((\S+)\)$") - re_verwithext = re.compile(r"^(\d+)(?:\.(\d+))(?:\s+\((\S+)\))?$") + default_config = "/etc/dak/dak.conf" #: default dak config, defines host properties + default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used - re_srchasver = re.compile(r"^(\S+)\s+\((\S+)\)$") - - html_escaping = {'"':'"', '&':'&', '<':'<', '>':'>'} - re_html_escaping = re.compile('|'.join(map(re.escape, html_escaping.keys()))) - - default_config = "/etc/dak/dak.conf" - default_apt_config = "/etc/dak/apt.conf" - - alias_cache = None - key_uid_email_cache = {} + alias_cache = None #: Cache for email alias checks + key_uid_email_cache = {} #: Cache for email addresses from gpg key uids # (hashname, function, earliest_changes_version) known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)), - ("sha256", apt_pkg.sha256sum, (1, 8))] + ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc ################################################################################ def html_escape(s): + """ Escape html chars """ return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s) ################################################################################ def open_file(filename, mode='r'): + """ + Open C{file}, return fileobject. + + @type filename: string + @param filename: path/filename to open + + @type mode: string + @param mode: open mode + + @rtype: fileobject + @return: open fileobject + + @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError. + + """ try: f = open(filename, mode) except IOError: @@@ -201,25 -207,26 +207,26 @@@ def parse_deb822(contents, signing_rule ################################################################################ def parse_changes(filename, signing_rules=0): - """Parses a changes file and returns a dictionary where each field is a - key. The mandatory first argument is the filename of the .changes - file. + """ + Parses a changes file and returns a dictionary where each field is a + key. The mandatory first argument is the filename of the .changes + file. - signing_rules is an optional argument: + signing_rules is an optional argument: - o If signing_rules == -1, no signature is required. - o If signing_rules == 0 (the default), a signature is required. - o If signing_rules == 1, it turns on the same strict format checking - as dpkg-source. + - If signing_rules == -1, no signature is required. + - If signing_rules == 0 (the default), a signature is required. + - If signing_rules == 1, it turns on the same strict format checking + as dpkg-source. - The rules for (signing_rules == 1)-mode are: + The rules for (signing_rules == 1)-mode are: - o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" - followed by any PGP header data and must end with a blank line. + - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" + followed by any PGP header data and must end with a blank line. - o The data section must end with a blank line and must be followed by - "-----BEGIN PGP SIGNATURE-----". - """ + - The data section must end with a blank line and must be followed by + "-----BEGIN PGP SIGNATURE-----". + """ changes_in = open_file(filename) content = changes_in.read() @@@ -234,9 -241,11 +241,11 @@@ def hash_key(hashname) ################################################################################ def create_hash(where, files, hashname, hashfunc): - """create_hash extends the passed files dict with the given hash by + """ + create_hash extends the passed files dict with the given hash by iterating over all files on disk and passing them to the hashing - function given.""" + function given. + """ rejmsg = [] for f in files.keys(): @@@ -253,9 -262,11 +262,11 @@@ ################################################################################ def check_hash(where, files, hashname, hashfunc): - """check_hash checks the given hash in the files dict against the actual + """ + check_hash checks the given hash in the files dict against the actual files on disk. The hash values need to be present consistently in - all file entries. It does not modify its input in any way.""" + all file entries. It does not modify its input in any way. + """ rejmsg = [] for f in files.keys(): @@@ -286,8 -297,10 +297,10 @@@ ################################################################################ def check_size(where, files): - """check_size checks the file sizes in the passed files dict against the - files on disk.""" + """ + check_size checks the file sizes in the passed files dict against the + files on disk. + """ rejmsg = [] for f in files.keys(): @@@ -309,8 -322,10 +322,10 @@@ ################################################################################ def check_hash_fields(what, manifest): - """check_hash_fields ensures that there are no checksum fields in the - given dict that we do not know about.""" + """ + check_hash_fields ensures that there are no checksum fields in the + given dict that we do not know about. + """ rejmsg = [] hashes = map(lambda x: x[0], known_hashes) @@@ -343,9 -358,11 +358,11 @@@ def _ensure_changes_hash(changes, forma # access the checksums easily. def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc): - """ensure_dsc_hashes' task is to ensure that each and every *present* hash + """ + ensure_dsc_hashes' task is to ensure that each and every *present* hash in the dsc is correct, i.e. identical to the changes file and if necessary - the pool. The latter task is delegated to check_hash.""" + the pool. The latter task is delegated to check_hash. + """ rejmsg = [] if not dsc.has_key('Checksums-%s' % (hashname,)): @@@ -398,25 -415,24 +415,24 @@@ def parse_checksums(where, files, manif field = 'checksums-%s' % hashname if not field in manifest: return rejmsg - input = manifest[field] - for line in input.split('\n'): + for line in manifest[field].split('\n'): if not line: break - hash, size, file = line.strip().split(' ') - if not files.has_key(file): + checksum, size, checkfile = line.strip().split(' ') + if not files.has_key(checkfile): # TODO: check for the file's entry in the original files dict, not # the one modified by (auto)byhand and other weird stuff # rejmsg.append("%s: not present in files but in checksums-%s in %s" % # (file, hashname, where)) continue - if not files[file]["size"] == size: + if not files[checkfile]["size"] == size: rejmsg.append("%s: size differs for files and checksums-%s entry "\ - "in %s" % (file, hashname, where)) + "in %s" % (checkfile, hashname, where)) continue - files[file][hash_key(hashname)] = hash + files[checkfile][hash_key(hashname)] = checksum for f in files.keys(): if not files[f].has_key(hash_key(hashname)): - rejmsg.append("%s: no entry in checksums-%s in %s" % (file, + rejmsg.append("%s: no entry in checksums-%s in %s" % (checkfile, hashname, where)) return rejmsg @@@ -488,8 -504,10 +504,10 @@@ def build_file_list(changes, is_a_dsc=0 ################################################################################ def force_to_utf8(s): - """Forces a string to UTF-8. If the string isn't already UTF-8, - it's assumed to be ISO-8859-1.""" + """ + Forces a string to UTF-8. If the string isn't already UTF-8, + it's assumed to be ISO-8859-1. + """ try: unicode(s, 'utf-8') return s @@@ -498,8 -516,10 +516,10 @@@ return latin1_s.encode('utf-8') def rfc2047_encode(s): - """Encodes a (header) string per RFC2047 if necessary. If the - string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.""" + """ + Encodes a (header) string per RFC2047 if necessary. If the + string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1. + """ try: codecs.lookup('ascii')[1](s) return s @@@ -520,15 -540,18 +540,18 @@@ # incompatible!' def fix_maintainer (maintainer): - """Parses a Maintainer or Changed-By field and returns: - (1) an RFC822 compatible version, - (2) an RFC2047 compatible version, - (3) the name - (4) the email - - The name is forced to UTF-8 for both (1) and (3). If the name field - contains '.' or ',' (as allowed by Debian policy), (1) and (2) are - switched to 'email (name)' format.""" + """ + Parses a Maintainer or Changed-By field and returns: + 1. an RFC822 compatible version, + 2. an RFC2047 compatible version, + 3. the name + 4. the email + + The name is forced to UTF-8 for both 1. and 3.. If the name field + contains '.' or ',' (as allowed by Debian policy), 1. and 2. are + switched to 'email (name)' format. + + """ maintainer = maintainer.strip() if not maintainer: return ('', '', '', '') @@@ -566,12 -589,12 +589,12 @@@ ################################################################################ - # sendmail wrapper, takes _either_ a message string or a file as arguments def send_mail (message, filename=""): - # If we've been passed a string dump it into a temporary file + """sendmail wrapper, takes _either_ a message string or a file as arguments""" + + # If we've been passed a string dump it into a temporary file if message: - filename = tempfile.mktemp() - fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700) + (fd, filename) = tempfile.mkstemp() os.write (fd, message) os.close (fd) @@@ -685,13 -708,13 +708,13 @@@ def regex_safe (s) ################################################################################ - # Perform a substition of template def TemplateSubst(map, filename): - file = open_file(filename) - template = file.read() + """ Perform a substition of template """ + templatefile = open_file(filename) + template = templatefile.read() for x in map.keys(): template = template.replace(x,map[x]) - file.close() + templatefile.close() return template ################################################################################ @@@ -732,8 -755,8 +755,8 @@@ def cc_fix_changes (changes) for j in o.split(): changes["architecture"][j] = 1 - # Sort by source name, source version, 'have source', and then by filename def changes_compare (a, b): + """ Sort by source name, source version, 'have source', and then by filename """ try: a_changes = parse_changes(a) except: @@@ -787,13 -810,13 +810,13 @@@ def find_next_free (dest, too_many=100) ################################################################################ def result_join (original, sep = '\t'): - list = [] + resultlist = [] for i in xrange(len(original)): if original[i] == None: - list.append("") + resultlist.append("") else: - list.append(original[i]) - return sep.join(list) + resultlist.append(original[i]) + return sep.join(resultlist) ################################################################################ @@@ -811,18 -834,20 +834,20 @@@ def prefix_multi_line_string(str, prefi ################################################################################ def validate_changes_file_arg(filename, require_changes=1): - """'filename' is either a .changes or .dak file. If 'filename' is a - .dak file, it's changed to be the corresponding .changes file. The - function then checks if the .changes file a) exists and b) is - readable and returns the .changes filename if so. If there's a - problem, the next action depends on the option 'require_changes' - argument: - - o If 'require_changes' == -1, errors are ignored and the .changes - filename is returned. - o If 'require_changes' == 0, a warning is given and 'None' is returned. - o If 'require_changes' == 1, a fatal error is raised. - """ + """ + 'filename' is either a .changes or .dak file. If 'filename' is a + .dak file, it's changed to be the corresponding .changes file. The + function then checks if the .changes file a) exists and b) is + readable and returns the .changes filename if so. If there's a + problem, the next action depends on the option 'require_changes' + argument: + + - If 'require_changes' == -1, errors are ignored and the .changes + filename is returned. + - If 'require_changes' == 0, a warning is given and 'None' is returned. + - If 'require_changes' == 1, a fatal error is raised. + + """ error = None orig_filename = filename @@@ -881,8 -906,8 +906,8 @@@ def get_conf() ################################################################################ - # Handle -a, -c and -s arguments; returns them as SQL constraints def parse_args(Options): + """ Handle -a, -c and -s arguments; returns them as SQL constraints """ # Process suite if Options["Suite"]: suite_ids_list = [] @@@ -978,10 -1003,13 +1003,13 @@@ def try_with_debug(function) ################################################################################ - # Function for use in sorting lists of architectures. - # Sorts normally except that 'source' dominates all others. - def arch_compare_sw (a, b): + """ + Function for use in sorting lists of architectures. + + Sorts normally except that 'source' dominates all others. + """ + if a == "source" and b == "source": return 0 elif a == "source": @@@ -993,13 -1021,15 +1021,15 @@@ ################################################################################ - # Split command line arguments which can be separated by either commas - # or whitespace. If dwim is set, it will complain about string ending - # in comma since this usually means someone did 'dak ls -a i386, m68k - # foo' or something and the inevitable confusion resulting from 'm68k' - # being treated as an argument is undesirable. - def split_args (s, dwim=1): + """ + Split command line arguments which can be separated by either commas + or whitespace. If dwim is set, it will complain about string ending + in comma since this usually means someone did 'dak ls -a i386, m68k + foo' or something and the inevitable confusion resulting from 'm68k' + being treated as an argument is undesirable. + """ + if s.find(",") == -1: return s.split() else: @@@ -1013,9 -1043,12 +1043,12 @@@ def Dict(**dict): return dic ######################################## - # Our very own version of commands.getouputstatus(), hacked to support - # gpgv's status fd. def gpgv_get_status_output(cmd, status_read, status_write): + """ + Our very own version of commands.getouputstatus(), hacked to support + gpgv's status fd. + """ + cmd = ['/bin/sh', '-c', cmd] p2cread, p2cwrite = os.pipe() c2pread, c2pwrite = os.pipe() @@@ -1105,9 -1138,11 +1138,11 @@@ def process_gpgv_output(status) ################################################################################ def retrieve_key (filename, keyserver=None, keyring=None): - """Retrieve the key that signed 'filename' from 'keyserver' and - add it to 'keyring'. Returns nothing on success, or an error message - on error.""" + """ + Retrieve the key that signed 'filename' from 'keyserver' and + add it to 'keyring'. Returns nothing on success, or an error message + on error. + """ # Defaults for keyserver and keyring if not keyserver: @@@ -1120,7 -1155,7 +1155,7 @@@ return "%s: tainted filename" % (filename) # Invoke gpgv on the file - status_read, status_write = os.pipe(); + status_read, status_write = os.pipe() cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename) (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write) @@@ -1157,18 -1192,20 +1192,20 @@@ def gpg_keyring_args(keyrings=None) ################################################################################ def check_signature (sig_filename, reject, data_filename="", keyrings=None, autofetch=None): - """Check the signature of a file and return the fingerprint if the - signature is valid or 'None' if it's not. The first argument is the - filename whose signature should be checked. The second argument is a - reject function and is called when an error is found. The reject() - function must allow for two arguments: the first is the error message, - the second is an optional prefix string. It's possible for reject() - to be called more than once during an invocation of check_signature(). - The third argument is optional and is the name of the files the - detached signature applies to. The fourth argument is optional and is - a *list* of keyrings to use. 'autofetch' can either be None, True or - False. If None, the default behaviour specified in the config will be - used.""" + """ + Check the signature of a file and return the fingerprint if the + signature is valid or 'None' if it's not. The first argument is the + filename whose signature should be checked. The second argument is a + reject function and is called when an error is found. The reject() + function must allow for two arguments: the first is the error message, + the second is an optional prefix string. It's possible for reject() + to be called more than once during an invocation of check_signature(). + The third argument is optional and is the name of the files the + detached signature applies to. The fourth argument is optional and is + a *list* of keyrings to use. 'autofetch' can either be None, True or + False. If None, the default behaviour specified in the config will be + used. + """ # Ensure the filename contains no shell meta-characters or other badness if not re_taint_free.match(sig_filename): @@@ -1192,7 -1229,7 +1229,7 @@@ return None # Build the command line - status_read, status_write = os.pipe(); + status_read, status_write = os.pipe() cmd = "gpgv --status-fd %s %s %s %s" % ( status_write, gpg_keyring_args(keyrings), sig_filename, data_filename) @@@ -1349,9 -1386,11 +1386,11 @@@ def wrap(paragraph, max_length, prefix= ################################################################################ - # Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'. - # Returns fixed 'src' def clean_symlink (src, dest, root): + """ + Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'. + Returns fixed 'src' + """ src = src.replace(root, '', 1) dest = dest.replace(root, '', 1) dest = os.path.dirname(dest) @@@ -1360,32 -1399,22 +1399,22 @@@ ################################################################################ - def temp_filename(directory=None, dotprefix=None, perms=0700): - """Return a secure and unique filename by pre-creating it. - If 'directory' is non-null, it will be the directory the file is pre-created in. - If 'dotprefix' is non-null, the filename will be prefixed with a '.'.""" - - if directory: - old_tempdir = tempfile.tempdir - tempfile.tempdir = directory + def temp_filename(directory=None, prefix="dak", suffix=""): + """ + Return a secure and unique filename by pre-creating it. + If 'directory' is non-null, it will be the directory the file is pre-created in. + If 'prefix' is non-null, the filename will be prefixed with it, default is dak. + If 'suffix' is non-null, the filename will end with it. - filename = tempfile.mktemp() + Returns a pair (fd, name). + """ - if dotprefix: - filename = "%s/.%s" % (os.path.dirname(filename), os.path.basename(filename)) - fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, perms) - os.close(fd) - - if directory: - tempfile.tempdir = old_tempdir - - return filename + return tempfile.mkstemp(suffix, prefix, directory) ################################################################################ - # checks if the user part of the email is listed in the alias file - def is_email_alias(email): + """ checks if the user part of the email is listed in the alias file """ global alias_cache if alias_cache == None: aliasfn = which_alias_file() @@@ -1407,52 -1436,3 +1436,52 @@@ if which_conf_file() != default_config apt_pkg.ReadConfigFileISC(Cnf,which_conf_file()) ################################################################################ + +def generate_contents_information(filename): + """ + Generate a list of flies contained in a .deb + + @type filename: string + @param filename: the path to a .deb + + @rtype: list + @return: a list of files in the data.tar.* portion of the .deb + """ + cmd = "ar t %s" % (filename) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Ugh ... this is ugly ... Code ripped from process_unchecked.py + chunks = output.split('\n') + + contents = [] + try: + cmd = "ar x %s %s" % (filename, chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: '%s' invocation failed." % (filename, cmd)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Got deb tarballs, now lets go through and determine what bits + # and pieces the deb had ... + if chunks[2] == "data.tar.gz": + data = tarfile.open("data.tar.gz", "r:gz") + elif data_tar == "data.tar.bz2": + data = tarfile.open("data.tar.bz2", "r:bz2") + else: + os.remove(chunks[2]) + reject("couldn't find data.tar.*") + + for tarinfo in data: + if not tarinfo.isdir(): + contents.append(tarinfo.name[2:]) + + finally: + if os.path.exists( chunks[2] ): + os.remove( chunks[2] ) + + return contents + +###############################################################################