Merge branch 'psycopg2' into content_generation

author Mike O'Connor <stew@vireo.org>

Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)

committer Mike O'Connor <stew@vireo.org>

Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)
author Mike O'Connor <stew@vireo.org>
Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)
committer Mike O'Connor <stew@vireo.org>
Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)
diff --combined dak/dak.py

index d04eebc2881cbd87a0b6c362d27334e18f2b8066,a08f20e0eecf5e87d65e6483a75bc4c18cab7492..fc22ec5cda16394cdbdebf30bd453641f502045c
--- 1/dak/dak.py
--- 2/dak/dak.py
+++ b/dak/dak.py
@@@ -1,6 -1,11 +1,11 @@@
   #!/usr/bin/env python
   
- """Wrapper to launch dak functionality"""
+ """
+ Wrapper to launch dak functionality
+ 
+ G{importgraph}
+ 
+ """
   # Copyright (C) 2005, 2006 Anthony Towns <ajt@debian.org>
   # Copyright (C) 2006 James Troup <james@nocrew.org>
   
@@@ -103,10 -108,10 +108,12 @@@ def init()
   
           ("make-suite-file-list",
            "Generate lists of packages per suite for apt-ftparchive"),
+         ("make-pkg-file-mapping",
+          "Generate package <-> file mapping"),
           ("generate-releases",
            "Generate Release files"),
+ +        ("generate-contents",
+ +         "Generate contest files"),
           ("generate-index-diffs",
            "Generate .diff/Index files"),
           ("clean-suites",
@@@ -138,8 -143,6 +145,8 @@@
            "Check for users with no packages in the archive"),
           ("import-archive",
            "Populate SQL database based from an archive tree"),
+ +        ("import-contents",
+ +         "Populate SQL database with Contents files"),
           ("import-keyring",
            "Populate fingerprint/uid table based on a new/updated keyring"),
           ("import-ldap-fingerprints",
@@@ -156,8 -159,6 +163,6 @@@
            "Generates Maintainers file for BTS etc"),
           ("make-overrides",
            "Generates override files"),
-         ("mirror-split",
-          "Split the pool/ by architecture groups"),
           ("poolize",
            "Move packages from dists/ to pool/"),
           ("reject-proposed-updates",
@@@ -168,6 -169,8 +173,8 @@@
            "Split queue/done into a date-based hierarchy"),
           ("stats",
            "Generate statistics"),
+         ("bts-categorize",
+          "Categorize uncategorized bugs filed against ftp.debian.org"),
           ]
       return functionality
   
diff --combined dak/dakdb/update2.py

index 0cf747e923a3786f80306e9e44ce50be2b846a46,71b43fa701ccd73caab555e9e878097ad4a15e24..e411662c69fd50860c135173ceeda6cacb02c2c2
--- 1/dak/dakdb/update2.py
--- 2/dak/dakdb/update2.py
+++ b/dak/dakdb/update2.py
@@@ -1,7 -1,7 +1,7 @@@
   #!/usr/bin/env python
   # coding=utf8
   
- # Debian Archive Kit Database Update Script
+ """ Database Update Script - debversion """
   # Copyright © 2008  Michael Casadevall <mcasadevall@debian.org>
   # Copyright © 2008  Roger Leigh <rleigh@debian.org>
   
@@@ -26,13 -26,11 +26,13 @@@ import psycopg2, tim
   ################################################################################
   
   def do_update(self):
+ +vvvvvvvvvvvvvvvvvvvv
       print "Note: to be able to enable the the PL/Perl (plperl) procedural language, we do"
       print "need postgresql-plperl-$postgres-version installed. Make sure that this is the"
       print "case before you continue. Interrupt if it isn't, sleeping 5 seconds now."
       print "(We need to be database superuser for this to work!)"
       time.sleep (5)
+ +^^^^^^^^^^^^^^^^^^^^
   
       try:
           c = self.db.cursor()
diff --combined dak/process_accepted.py

index e88e7904949bf25c323cb79f360207654f1ee5fd,683b1191ad6974ecbe72673ffeda85d6e5e5da88..07258018bb13426ca3394670096de2948df1194e
--- 1/dak/process_accepted.py
--- 2/dak/process_accepted.py
+++ b/dak/process_accepted.py
@@@ -1,6 -1,6 +1,6 @@@
   #!/usr/bin/env python
   
- # Installs Debian packages from queue/accepted into the pool
+ """ Installs Debian packages from queue/accepted into the pool """
   # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006  James Troup <james@nocrew.org>
   
   # This program is free software; you can redistribute it and/or modify
@@@ -30,12 -30,13 +30,13 @@@
   ###############################################################################
   
   import errno, fcntl, os, sys, time, re
- -import apt_pkg
+ +import apt_pkg, tarfile, commands
   from daklib import database
   from daklib import logging
   from daklib import queue
   from daklib import utils
   from daklib.dak_exceptions import *
+ from daklib.regexes import re_default_answer, re_issource, re_fdnic
   
   ###############################################################################
   
@@@ -96,10 -97,8 +97,10 @@@ class Urgency_Log
           else:
               os.unlink(self.log_filename)
   
+ +
   ###############################################################################
   
+ +
   def reject (str, prefix="Rejected: "):
       global reject_message
       if str:
@@@ -111,32 -110,32 +112,32 @@@
   def check():
       propogate={}
       nopropogate={}
-     for file in files.keys():
+     for checkfile in files.keys():
           # The .orig.tar.gz can disappear out from under us is it's a
           # duplicate of one in the archive.
-         if not files.has_key(file):
+         if not files.has_key(checkfile):
               continue
           # Check that the source still exists
-         if files[file]["type"] == "deb":
-             source_version = files[file]["source version"]
-             source_package = files[file]["source package"]
+         if files[checkfile]["type"] == "deb":
+             source_version = files[checkfile]["source version"]
+             source_package = files[checkfile]["source package"]
               if not changes["architecture"].has_key("source") \
                  and not Upload.source_exists(source_package, source_version,  changes["distribution"].keys()):
-                 reject("no source found for %s %s (%s)." % (source_package, source_version, file))
+                 reject("no source found for %s %s (%s)." % (source_package, source_version, checkfile))
   
           # Version and file overwrite checks
           if not installing_to_stable:
-             if files[file]["type"] == "deb":
-                 reject(Upload.check_binary_against_db(file), "")
-             elif files[file]["type"] == "dsc":
-                 reject(Upload.check_source_against_db(file), "")
-                 (reject_msg, is_in_incoming) = Upload.check_dsc_against_db(file)
+             if files[checkfile]["type"] == "deb":
+                 reject(Upload.check_binary_against_db(checkfile), "")
+             elif files[checkfile]["type"] == "dsc":
+                 reject(Upload.check_source_against_db(checkfile), "")
+                 (reject_msg, is_in_incoming) = Upload.check_dsc_against_db(checkfile)
                   reject(reject_msg, "")
   
           # propogate in the case it is in the override tables:
           if changes.has_key("propdistribution"):
               for suite in changes["propdistribution"].keys():
-                 if Upload.in_override_p(files[file]["package"], files[file]["component"], suite, files[file].get("dbtype",""), file):
+                 if Upload.in_override_p(files[checkfile]["package"], files[checkfile]["component"], suite, files[checkfile].get("dbtype",""), checkfile):
                       propogate[suite] = 1
                   else:
                       nopropogate[suite] = 1
@@@ -146,11 -145,11 +147,11 @@@
               continue
           changes["distribution"][suite] = 1
   
-     for file in files.keys():
+     for checkfile in files.keys():
           # Check the package is still in the override tables
           for suite in changes["distribution"].keys():
-             if not Upload.in_override_p(files[file]["package"], files[file]["component"], suite, files[file].get("dbtype",""), file):
-                 reject("%s is NEW for %s." % (file, suite))
+             if not Upload.in_override_p(files[checkfile]["package"], files[checkfile]["component"], suite, files[checkfile].get("dbtype",""), checkfile):
+                 reject("%s is NEW for %s." % (checkfile, suite))
   
   ###############################################################################
   
@@@ -222,7 -221,7 +223,7 @@@ def action ()
   
       while prompt.find(answer) == -1:
           answer = utils.our_raw_input(prompt)
-         m = queue.re_default_answer.match(prompt)
+         m = re_default_answer.match(prompt)
           if answer == "":
               answer = m.group(1)
           answer = answer[:1].upper()
@@@ -285,8 -284,8 +286,8 @@@ def install ()
           return
   
       # Add the .dsc file to the DB
-     for file in files.keys():
-         if files[file]["type"] == "dsc":
+     for newfile in files.keys():
+         if files[newfile]["type"] == "dsc":
               package = dsc["source"]
               version = dsc["version"]  # NB: not files[file]["version"], that has no epoch
               maintainer = dsc["maintainer"]
@@@ -297,26 -296,26 +298,26 @@@
               changedby_id = database.get_or_set_maintainer_id(changedby)
               fingerprint_id = database.get_or_set_fingerprint_id(dsc["fingerprint"])
               install_date = time.strftime("%Y-%m-%d")
-             filename = files[file]["pool name"] + file
-             dsc_component = files[file]["component"]
-             dsc_location_id = files[file]["location id"]
+             filename = files[newfile]["pool name"] + newfile
+             dsc_component = files[newfile]["component"]
+             dsc_location_id = files[newfile]["location id"]
               if dsc.has_key("dm-upload-allowed") and  dsc["dm-upload-allowed"] == "yes":
                   dm_upload_allowed = "true"
               else:
                   dm_upload_allowed = "false"
-             if not files[file].has_key("files id") or not files[file]["files id"]:
-                 files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
+             if not files[newfile].has_key("files id") or not files[newfile]["files id"]:
+                 files[newfile]["files id"] = database.set_files_id (filename, files[newfile]["size"], files[newfile]["md5sum"], files[newfile]["sha1sum"], files[newfile]["sha256sum"], dsc_location_id)
               projectB.query("INSERT INTO source (source, version, maintainer, changedby, file, install_date, sig_fpr, dm_upload_allowed) VALUES ('%s', '%s', %d, %d, %d, '%s', %s, %s)"
-                            % (package, version, maintainer_id, changedby_id, files[file]["files id"], install_date, fingerprint_id, dm_upload_allowed))
+                            % (package, version, maintainer_id, changedby_id, files[newfile]["files id"], install_date, fingerprint_id, dm_upload_allowed))
   
               for suite in changes["distribution"].keys():
                   suite_id = database.get_suite_id(suite)
                   projectB.query("INSERT INTO src_associations (suite, source) VALUES (%d, currval('source_id_seq'))" % (suite_id))
   
               # Add the source files to the DB (files and dsc_files)
-             projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files[file]["files id"]))
+             projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files[newfile]["files id"]))
               for dsc_file in dsc_files.keys():
-                 filename = files[file]["pool name"] + dsc_file
+                 filename = files[newfile]["pool name"] + dsc_file
                   # If the .orig.tar.gz is already in the pool, it's
                   # files id is stored in dsc_files by check_dsc().
                   files_id = dsc_files[dsc_file].get("files id", None)
@@@ -345,41 -344,34 +346,41 @@@
   
   
       # Add the .deb files to the DB
-     for file in files.keys():
-         if files[file]["type"] == "deb":
-             package = files[file]["package"]
-             version = files[file]["version"]
-             maintainer = files[file]["maintainer"]
+     for newfile in files.keys():
+         if files[newfile]["type"] == "deb":
+             package = files[newfile]["package"]
+             version = files[newfile]["version"]
+             maintainer = files[newfile]["maintainer"]
               maintainer = maintainer.replace("'", "\\'")
               maintainer_id = database.get_or_set_maintainer_id(maintainer)
               fingerprint_id = database.get_or_set_fingerprint_id(changes["fingerprint"])
-             architecture = files[file]["architecture"]
+             architecture = files[newfile]["architecture"]
               architecture_id = database.get_architecture_id (architecture)
-             type = files[file]["dbtype"]
-             source = files[file]["source package"]
-             source_version = files[file]["source version"]
-             filename = files[file]["pool name"] + file
-             if not files[file].has_key("location id") or not files[file]["location id"]:
-                 files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i())
-             if not files[file].has_key("files id") or not files[file]["files id"]:
-                 files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], files[file]["location id"])
+             filetype = files[newfile]["dbtype"]
+             source = files[newfile]["source package"]
+             source_version = files[newfile]["source version"]
+             filename = files[newfile]["pool name"] + newfile
+             if not files[newfile].has_key("location id") or not files[newfile]["location id"]:
+                 files[newfile]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[newfile]["component"],utils.where_am_i())
+             if not files[newfile].has_key("files id") or not files[newfile]["files id"]:
+                 files[newfile]["files id"] = database.set_files_id (filename, files[newfile]["size"], files[newfile]["md5sum"], files[newfile]["sha1sum"], files[newfile]["sha256sum"], files[newfile]["location id"])
               source_id = database.get_source_id (source, source_version)
               if source_id:
                   projectB.query("INSERT INTO binaries (package, version, maintainer, source, architecture, file, type, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, %d, '%s', %d)"
-                                % (package, version, maintainer_id, source_id, architecture_id, files[file]["files id"], type, fingerprint_id))
+                                % (package, version, maintainer_id, source_id, architecture_id, files[newfile]["files id"], filetype, fingerprint_id))
               else:
-                 raise NoSourceFieldError, "Unable to find a source id for %s (%s), %s, file %s, type %s, signed by %s" % (package, version, architecture, file, type, sig_fpr)
+                 raise NoSourceFieldError, "Unable to find a source id for %s (%s), %s, file %s, type %s, signed by %s" % (package, version, architecture, newfile, filetype, changes["fingerprint"])
               for suite in changes["distribution"].keys():
                   suite_id = database.get_suite_id(suite)
                   projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id))
   
+ +            # insert contents into the database
+ +            contents = utils.generate_contents_information(file)
+ +            q = projectB.query("SELECT currval('binaries_id_seq')")
+ +            bin_id = int(q.getresult()[0][0])
+ +            for file in contents:
+ +                database.insert_content_path(bin_id, file)
+ +
       # If the .orig.tar.gz is in a legacy directory we need to poolify
       # it, so that apt-get source (and anything else that goes by the
       # "Directory:" field in the Sources.gz file) works.
@@@ -395,7 -387,7 +396,7 @@@
                   continue
               # First move the files to the new location
               legacy_filename = qid["path"] + qid["filename"]
-             pool_location = utils.poolify (changes["source"], files[file]["component"])
+             pool_location = utils.poolify (changes["source"], files[newfile]["component"])
               pool_filename = pool_location + os.path.basename(qid["filename"])
               destination = Cnf["Dir::Pool"] + pool_location
               utils.move(legacy_filename, destination)
@@@ -423,11 -415,11 +424,11 @@@
               projectB.query("UPDATE dsc_files SET file = %s WHERE source = %s AND file = %s" % (new_files_id, database.get_source_id(changes["source"], changes["version"]), orig_tar_id))
   
       # Install the files into the pool
-     for file in files.keys():
-         destination = Cnf["Dir::Pool"] + files[file]["pool name"] + file
-         utils.move(file, destination)
-         Logger.log(["installed", file, files[file]["type"], files[file]["size"], files[file]["architecture"]])
-         install_bytes += float(files[file]["size"])
+     for newfile in files.keys():
+         destination = Cnf["Dir::Pool"] + files[newfile]["pool name"] + newfile
+         utils.move(newfile, destination)
+         Logger.log(["installed", newfile, files[newfile]["type"], files[newfile]["size"], files[newfile]["architecture"]])
+         install_bytes += float(files[newfile]["size"])
   
       # Copy the .changes file across for suite which need it.
       copy_changes = {}
@@@ -442,6 -434,7 +443,6 @@@
           utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest)
       for dest in copy_dot_dak.keys():
           utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest)
- -
       projectB.query("COMMIT WORK")
   
       # Move the .changes into the 'done' directory
@@@ -465,14 -458,14 +466,14 @@@
           dest_dir = Cnf["Dir::QueueBuild"]
           if Cnf.FindB("Dinstall::SecurityQueueBuild"):
               dest_dir = os.path.join(dest_dir, suite)
-         for file in files.keys():
-             dest = os.path.join(dest_dir, file)
+         for newfile in files.keys():
+             dest = os.path.join(dest_dir, newfile)
               # Remove it from the list of packages for later processing by apt-ftparchive
               projectB.query("UPDATE queue_build SET in_queue = 'f', last_used = '%s' WHERE filename = '%s' AND suite = %s" % (now_date, dest, suite_id))
               if not Cnf.FindB("Dinstall::SecurityQueueBuild"):
                   # Update the symlink to point to the new location in the pool
-                 pool_location = utils.poolify (changes["source"], files[file]["component"])
-                 src = os.path.join(Cnf["Dir::Pool"], pool_location, os.path.basename(file))
+                 pool_location = utils.poolify (changes["source"], files[newfile]["component"])
+                 src = os.path.join(Cnf["Dir::Pool"], pool_location, os.path.basename(newfile))
                   if os.path.islink(dest):
                       os.unlink(dest)
                   os.symlink(src, dest)
@@@ -501,8 -494,8 +502,8 @@@ def stable_install (summary, short_summ
       projectB.query("BEGIN WORK")
   
       # Add the source to stable (and remove it from proposed-updates)
-     for file in files.keys():
-         if files[file]["type"] == "dsc":
+     for newfile in files.keys():
+         if files[newfile]["type"] == "dsc":
               package = dsc["source"]
               version = dsc["version"];  # NB: not files[file]["version"], that has no epoch
               q = projectB.query("SELECT id FROM source WHERE source = '%s' AND version = '%s'" % (package, version))
@@@ -516,11 -509,11 +517,11 @@@
               projectB.query("INSERT INTO src_associations (suite, source) VALUES ('%s', '%s')" % (suite_id, source_id))
   
       # Add the binaries to stable (and remove it/them from proposed-updates)
-     for file in files.keys():
-         if files[file]["type"] == "deb":
-             package = files[file]["package"]
-             version = files[file]["version"]
-             architecture = files[file]["architecture"]
+     for newfile in files.keys():
+         if files[newfile]["type"] == "deb":
+             package = files[newfile]["package"]
+             version = files[newfile]["version"]
+             architecture = files[newfile]["architecture"]
               q = projectB.query("SELECT b.id FROM binaries b, architecture a WHERE b.package = '%s' AND b.version = '%s' AND (a.arch_string = '%s' OR a.arch_string = 'all') AND b.architecture = a.id" % (package, version, architecture))
               ql = q.getresult()
               if not ql:
@@@ -543,14 -536,14 +544,14 @@@
           os.unlink (new_changelog_filename)
   
       new_changelog = utils.open_file(new_changelog_filename, 'w')
-     for file in files.keys():
-         if files[file]["type"] == "deb":
-             new_changelog.write("stable/%s/binary-%s/%s\n" % (files[file]["component"], files[file]["architecture"], file))
-         elif utils.re_issource.match(file):
-             new_changelog.write("stable/%s/source/%s\n" % (files[file]["component"], file))
+     for newfile in files.keys():
+         if files[newfile]["type"] == "deb":
+             new_changelog.write("stable/%s/binary-%s/%s\n" % (files[newfile]["component"], files[newfile]["architecture"], newfile))
+         elif re_issource.match(newfile):
+             new_changelog.write("stable/%s/source/%s\n" % (files[newfile]["component"], newfile))
           else:
-             new_changelog.write("%s\n" % (file))
-     chop_changes = queue.re_fdnic.sub("\n", changes["changes"])
+             new_changelog.write("%s\n" % (newfile))
+     chop_changes = re_fdnic.sub("\n", changes["changes"])
       new_changelog.write(chop_changes + '\n\n')
       if os.access(changelog_filename, os.R_OK) != 0:
           changelog = utils.open_file(changelog_filename)
diff --combined dak/update_db.py

index ee1e50f8a953acae389c5fb2ef41c386ae66b4ae,f9b6e478fd6686ecde9d3b15f56b8f61ded2a645..8bb88f65c41a71685a7b2aff385bd82fba374992
--- 1/dak/update_db.py
--- 2/dak/update_db.py
+++ b/dak/update_db.py
@@@ -1,6 -1,6 +1,6 @@@
   #!/usr/bin/env python
   
- # Debian Archive Kit Database Update Script
+ """ Database Update Main Script """
   # Copyright (C) 2008  Michael Casadevall <mcasadevall@debian.org>
   
   # This program is free software; you can redistribute it and/or modify
@@@ -29,6 -29,7 +29,7 @@@
   import psycopg2, sys, fcntl, os
   import apt_pkg
   import time
+ import errno
   from daklib import database
   from daklib import utils
   
@@@ -36,7 -37,7 +37,7 @@@
   
   Cnf = None
   projectB = None
- -required_database_schema = 3
+ +required_database_schema = 4
   
   ################################################################################
   
@@@ -52,7 -53,7 +53,7 @@@ Updates dak's database schema to the la
   ################################################################################
   
       def update_db_to_zero(self):
-         # This function will attempt to update a pre-zero database schema to zero
+         """ This function will attempt to update a pre-zero database schema to zero """
   
           # First, do the sure thing, and create the configuration table
           try:
@@@ -63,7 -64,7 +64,7 @@@
                                     name TEXT UNIQUE NOT NULL,
                                     value TEXT
                                   );""")
-             c.execute("INSERT INTO config VALUES ( nextval('config_id_seq'), 'db_revision', '0')");
+             c.execute("INSERT INTO config VALUES ( nextval('config_id_seq'), 'db_revision', '0')")
               self.db.commit()
   
           except psycopg2.ProgrammingError:
@@@ -84,7 -85,7 +85,7 @@@
   
           try:
               c = self.db.cursor()
-             q = c.execute("SELECT value FROM config WHERE name = 'db_revision';");
+             q = c.execute("SELECT value FROM config WHERE name = 'db_revision';")
               return c.fetchone()[0]
   
           except psycopg2.ProgrammingError:
diff --combined daklib/database.py

index 9cefc38189c8df6333b9db5affc0a8a4014b4139,1882ad8b774d66ca4e5ab083287cc955ff3490fc..3cbb67b7e57b7b9e4ffca2035900af4aaced35dc
--- 1/daklib/database.py
--- 2/daklib/database.py
+++ b/daklib/database.py
@@@ -1,7 -1,16 +1,16 @@@
   #!/usr/bin/env python
   
- # DB access fucntions
- # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006  James Troup <james@nocrew.org>
+ """ DB access functions
+ @group readonly: get_suite_id, get_section_id, get_priority_id, get_override_type_id,
+                  get_architecture_id, get_archive_id, get_component_id, get_location_id,
+                  get_source_id, get_suite_version, get_files_id, get_maintainer, get_suites
+ @group read/write: get_or_set*, set_files_id
+ 
+ @contact: Debian FTP Master <ftpmaster@debian.org>
+ @copyright: 2000, 2001, 2002, 2003, 2004, 2006  James Troup <james@nocrew.org>
+ @copyright: 2009  Joerg Jaspert <joerg@debian.org>
+ @license: GNU General Public License version 2 or later
+ """
   
   # This program is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
@@@ -19,48 -28,65 +28,70 @@@
   
   ################################################################################
   
- import os, sys, time, types, apt_pkg
+ import sys
+ import time
+ import types
   
   ################################################################################
   
- Cnf = None
- projectB = None
- suite_id_cache = {}
- section_id_cache = {}
- priority_id_cache = {}
- override_type_id_cache = {}
- architecture_id_cache = {}
- archive_id_cache = {}
- component_id_cache = {}
- location_id_cache = {}
- maintainer_id_cache = {}
- keyring_id_cache = {}
- source_id_cache = {}
- files_id_cache = {}
- maintainer_cache = {}
- fingerprint_id_cache = {}
- queue_id_cache = {}
- uid_id_cache = {}
- suite_version_cache = {}
+ Cnf = None                    #: Configuration, apt_pkg.Configuration
+ projectB = None               #: database connection, pgobject
+ suite_id_cache = {}           #: cache for suites
+ section_id_cache = {}         #: cache for sections
+ priority_id_cache = {}        #: cache for priorities
+ override_type_id_cache = {}   #: cache for overrides
+ architecture_id_cache = {}    #: cache for architectures
+ archive_id_cache = {}         #: cache for archives
+ component_id_cache = {}       #: cache for components
+ location_id_cache = {}        #: cache for locations
+ maintainer_id_cache = {}      #: cache for maintainers
+ keyring_id_cache = {}         #: cache for keyrings
+ source_id_cache = {}          #: cache for sources
+ files_id_cache = {}           #: cache for files
+ maintainer_cache = {}         #: cache for maintainer names
+ fingerprint_id_cache = {}     #: cache for fingerprints
+ queue_id_cache = {}           #: cache for queues
+ uid_id_cache = {}             #: cache for uids
+ suite_version_cache = {}      #: cache for suite_versions (packages)
+ +suite_bin_version_cache = {}
+ +content_path_id_cache = {}
+ +content_file_id_cache = {}
+ +insert_contents_file_cache = {}
+ +cache_preloaded = False
   
   ################################################################################
   
   def init (config, sql):
+     """
+     database module init.
+ 
+     @type config: apt_pkg.Configuration
+     @param config: apt config, see U{http://apt.alioth.debian.org/python-apt-doc/apt_pkg/cache.html#Configuration}
+ 
+     @type sql: pgobject
+     @param sql: database connection
+ 
+     """
       global Cnf, projectB
   
       Cnf = config
       projectB = sql
   
   
- def do_query(q):
-     sys.stderr.write("query: \"%s\" ... " % (q))
+ def do_query(query):
+     """
+     Executes a database query. Writes statistics / timing to stderr.
+ 
+     @type query: string
+     @param query: database query string, passed unmodified
+ 
+     @return: db result
+ 
+     @warning: The query is passed B{unmodified}, so be careful what you use this for.
+     """
+     sys.stderr.write("query: \"%s\" ... " % (query))
       before = time.time()
-     r = projectB.query(q)
+     r = projectB.query(query)
       time_diff = time.time()-before
       sys.stderr.write("took %.3f seconds.\n" % (time_diff))
       if type(r) is int:
@@@ -74,6 -100,17 +105,17 @@@
   ################################################################################
   
   def get_suite_id (suite):
+     """
+     Returns database id for given C{suite}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type suite: string
+     @param suite: The name of the suite
+ 
+     @rtype: int
+     @return: the database id for the given suite
+ 
+     """
       global suite_id_cache
   
       if suite_id_cache.has_key(suite):
@@@ -90,6 -127,17 +132,17 @@@
       return suite_id
   
   def get_section_id (section):
+     """
+     Returns database id for given C{section}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type section: string
+     @param section: The name of the section
+ 
+     @rtype: int
+     @return: the database id for the given section
+ 
+     """
       global section_id_cache
   
       if section_id_cache.has_key(section):
@@@ -106,6 -154,17 +159,17 @@@
       return section_id
   
   def get_priority_id (priority):
+     """
+     Returns database id for given C{priority}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type priority: string
+     @param priority: The name of the priority
+ 
+     @rtype: int
+     @return: the database id for the given priority
+ 
+     """
       global priority_id_cache
   
       if priority_id_cache.has_key(priority):
@@@ -122,6 -181,17 +186,17 @@@
       return priority_id
   
   def get_override_type_id (type):
+     """
+     Returns database id for given override C{type}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type type: string
+     @param type: The name of the override type
+ 
+     @rtype: int
+     @return: the database id for the given override type
+ 
+     """
       global override_type_id_cache
   
       if override_type_id_cache.has_key(type):
@@@ -138,6 -208,17 +213,17 @@@
       return override_type_id
   
   def get_architecture_id (architecture):
+     """
+     Returns database id for given C{architecture}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type architecture: string
+     @param architecture: The name of the override type
+ 
+     @rtype: int
+     @return: the database id for the given architecture
+ 
+     """
       global architecture_id_cache
   
       if architecture_id_cache.has_key(architecture):
@@@ -154,6 -235,17 +240,17 @@@
       return architecture_id
   
   def get_archive_id (archive):
+     """
+     Returns database id for given C{archive}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type archive: string
+     @param archive: The name of the override type
+ 
+     @rtype: int
+     @return: the database id for the given archive
+ 
+     """
       global archive_id_cache
   
       archive = archive.lower()
@@@ -172,6 -264,17 +269,17 @@@
       return archive_id
   
   def get_component_id (component):
+     """
+     Returns database id for given C{component}.
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type component: string
+     @param component: The name of the component
+ 
+     @rtype: int
+     @return: the database id for the given component
+ 
+     """
       global component_id_cache
   
       component = component.lower()
@@@ -190,6 -293,26 +298,26 @@@
       return component_id
   
   def get_location_id (location, component, archive):
+     """
+     Returns database id for the location behind the given combination of
+       - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/}
+       - B{component} - the id of the component as returned by L{get_component_id}
+       - B{archive} - the id of the archive as returned by L{get_archive_id}
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type location: string
+     @param location: the path of the location
+ 
+     @type component: int
+     @param component: the id of the component
+ 
+     @type archive: int
+     @param archive: the id of the archive
+ 
+     @rtype: int
+     @return: the database id for the location
+ 
+     """
       global location_id_cache
   
       cache_key = location + '_' + component + '_' + location
@@@ -213,6 -336,22 +341,22 @@@
       return location_id
   
   def get_source_id (source, version):
+     """
+     Returns database id for the combination of C{source} and C{version}
+       - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
+       - B{version}
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type source: string
+     @param source: source package name
+ 
+     @type version: string
+     @param version: the source version
+ 
+     @rtype: int
+     @return: the database id for the source
+ 
+     """
       global source_id_cache
   
       cache_key = source + '_' + version + '_'
@@@ -229,7 -368,25 +373,26 @@@
   
       return source_id
   
- def get_suite_version(source, suite, arch):
+ def get_suite_version(source, suite):
+     """
+     Returns database id for a combination of C{source} and C{suite}.
+ 
+       - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
+       - B{suite} - a suite name, eg. I{unstable}
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type source: string
+     @param source: source package name
+ 
+     @type suite: string
+     @param suite: the suite name
+ 
+     @rtype: string
+     @return: the version for I{source} in I{suite}
+ 
+     """
++
       global suite_version_cache
       cache_key = "%s_%s" % (source, suite)
   
@@@ -252,53 -409,23 +415,67 @@@
   
       return version
   
+ +def get_latest_binary_version_id(binary, section, suite, arch):
+ +    global suite_bin_version_cache
+ +    cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
+ +    cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all"))
+ +
+ +    # Check for the cache hit for its arch, then arch all
+ +    if suite_bin_version_cache.has_key(cache_key):
+ +        return suite_bin_version_cache[cache_key]
+ +    if suite_bin_version_cache.has_key(cache_key_all):
+ +        return suite_bin_version_cache[cache_key_all]
+ +    if cache_preloaded == True:
+ +        return # package does not exist
+ +
+ +    q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
+ +
+ +    if not q.getresult():
+ +        return False
+ +
+ +    highest_bid = q.getresult()[0][0]
+ +
+ +    suite_bin_version_cache[cache_key] = highest_bid
+ +    return highest_bid
+ +
+ +def preload_binary_id_cache():
+ +    global suite_bin_version_cache, cache_preloaded
+ +
+ +    # Get suite info
+ +    q = projectB.query("SELECT id FROM suite")
+ +    suites = q.getresult()
+ +
+ +    # Get arch mappings
+ +    q = projectB.query("SELECT id FROM architecture")
+ +    arches = q.getresult()
+ +
+ +    for suite in suites:
+ +        for arch in arches:
+ +            q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0])))
+ +
+ +            for bi in q.getresult():
+ +                cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0])
+ +                suite_bin_version_cache[cache_key] = int(bi[0])
+ +
+ +    cache_preloaded = True
+ +
   ################################################################################
   
   def get_or_set_maintainer_id (maintainer):
+     """
+     If C{maintainer} does not have an entry in the maintainer table yet, create one
+     and return the new id.
+     If C{maintainer} already has an entry, simply return the existing id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type maintainer: string
+     @param maintainer: the maintainer name
+ 
+     @rtype: int
+     @return: the database id for the maintainer
+ 
+     """
       global maintainer_id_cache
   
       if maintainer_id_cache.has_key(maintainer):
@@@ -316,6 -443,20 +493,20 @@@
   ################################################################################
   
   def get_or_set_keyring_id (keyring):
+     """
+     If C{keyring} does not have an entry in the C{keyrings} table yet, create one
+     and return the new id.
+     If C{keyring} already has an entry, simply return the existing id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type keyring: string
+     @param keyring: the keyring name
+ 
+     @rtype: int
+     @return: the database id for the keyring
+ 
+     """
       global keyring_id_cache
   
       if keyring_id_cache.has_key(keyring):
@@@ -333,6 -474,21 +524,21 @@@
   ################################################################################
   
   def get_or_set_uid_id (uid):
+     """
+     If C{uid} does not have an entry in the uid table yet, create one
+     and return the new id.
+     If C{uid} already has an entry, simply return the existing id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type uid: string
+     @param uid: the uid.
+ 
+     @rtype: int
+     @return: the database id for the uid
+ 
+     """
+ 
       global uid_id_cache
   
       if uid_id_cache.has_key(uid):
@@@ -350,6 -506,20 +556,20 @@@
   ################################################################################
   
   def get_or_set_fingerprint_id (fingerprint):
+     """
+     If C{fingerprint} does not have an entry in the fingerprint table yet, create one
+     and return the new id.
+     If C{fingerprint} already has an entry, simply return the existing id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type fingerprint: string
+     @param fingerprint: the fingerprint
+ 
+     @rtype: int
+     @return: the database id for the fingerprint
+ 
+     """
       global fingerprint_id_cache
   
       if fingerprint_id_cache.has_key(fingerprint):
@@@ -367,6 -537,38 +587,38 @@@
   ################################################################################
   
   def get_files_id (filename, size, md5sum, location_id):
+     """
+     Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an
+     existing copy.
+ 
+     The database is queried using the C{filename} and C{location_id}. If a file does exist
+     at that location, the existing size and md5sum are checked against the provided
+     parameters. A size or checksum mismatch returns -2. If more than one entry is
+     found within the database, a -1 is returned, no result returns None, otherwise
+     the file id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type filename: string
+     @param filename: the filename of the file to check against the DB
+ 
+     @type size: int
+     @param size: the size of the file to check against the DB
+ 
+     @type md5sum: string
+     @param md5sum: the md5sum of the file to check against the DB
+ 
+     @type location_id: int
+     @param location_id: the id of the location as returned by L{get_location_id}
+ 
+     @rtype: int / None
+     @return: Various return values are possible:
+                - -2: size/checksum error
+                - -1: more than one file found in database
+                - None: no file found in database
+                - int: file id
+ 
+     """
       global files_id_cache
   
       cache_key = "%s_%d" % (filename, location_id)
@@@ -393,6 -595,20 +645,20 @@@
   ################################################################################
   
   def get_or_set_queue_id (queue):
+     """
+     If C{queue} does not have an entry in the queue table yet, create one
+     and return the new id.
+     If C{queue} already has an entry, simply return the existing id.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type queue: string
+     @param queue: the queue name (no full path)
+ 
+     @rtype: int
+     @return: the database id for the queue
+ 
+     """
       global queue_id_cache
   
       if queue_id_cache.has_key(queue):
@@@ -410,6 -626,31 +676,31 @@@
   ################################################################################
   
   def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
+     """
+     Insert a new entry into the files table and return its id.
+ 
+     @type filename: string
+     @param filename: the filename
+ 
+     @type size: int
+     @param size: the size in bytes
+ 
+     @type md5sum: string
+     @param md5sum: md5sum of the file
+ 
+     @type sha1sum: string
+     @param sha1sum: sha1sum of the file
+ 
+     @type sha256sum: string
+     @param sha256sum: sha256sum of the file
+ 
+     @type location_id: int
+     @param location_id: the id of the location as returned by L{get_location_id}
+ 
+     @rtype: int
+     @return: the database id for the new file
+ 
+     """
       global files_id_cache
   
       projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
@@@ -429,6 -670,18 +720,18 @@@
   ################################################################################
   
   def get_maintainer (maintainer_id):
+     """
+     Return the name of the maintainer behind C{maintainer_id}.
+ 
+     Results are kept in a cache during runtime to minimize database queries.
+ 
+     @type maintainer_id: int
+     @param maintainer_id: the id of the maintainer, eg. from L{get_or_set_maintainer_id}
+ 
+     @rtype: string
+     @return: the name of the maintainer
+ 
+     """
       global maintainer_cache
   
       if not maintainer_cache.has_key(maintainer_id):
@@@ -440,65 -693,40 +743,96 @@@
   ################################################################################
   
   def get_suites(pkgname, src=False):
+     """
+     Return the suites in which C{pkgname} can be found. If C{src} is True query for source
+     package, else binary package.
+ 
+     @type pkgname: string
+     @param pkgname: name of the package
+ 
+     @type src: bool
+     @param src: if True look for source packages, false (default) looks for binary.
+ 
+     @rtype: list
+     @return: list of suites, or empty list if no match
+ 
+     """
       if src:
-         sql = "select suite_name from source, src_associations,suite where source.id=src_associations.source and source.source='%s' and src_associations.suite = suite.id"%pkgname
+         sql = """
+         SELECT suite_name
+         FROM source,
+              src_associations,
+              suite
+         WHERE source.id = src_associations.source
+         AND   source.source = '%s'
+         AND   src_associations.suite = suite.id
+         """ % (pkgname)
       else:
-         sql = "select suite_name from binaries, bin_associations,suite where binaries.id=bin_associations.bin and  package='%s' and bin_associations.suite = suite.id"%pkgname
+         sql = """
+         SELECT suite_name
+         FROM binaries,
+              bin_associations,
+              suite
+         WHERE binaries.id = bin_associations.bin
+         AND   package = '%s'
+         AND   bin_associations.suite = suite.id
+         """ % (pkgname)
+ 
       q = projectB.query(sql)
       return map(lambda x: x[0], q.getresult())
+ +
+ +################################################################################
+ +
+ +def get_or_set_contents_file_id(file):
+ +    global content_file_id_cache
+ +
+ +    if not content_file_id_cache.has_key(file):
+ +        sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file
+ +        q = projectB.query(sql_select)
+ +        if not q.getresult():
+ +            # since this can be called within a transaction, we can't use currval
+ +            q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file))
+ +        content_file_id_cache[file] = int(q.getresult()[0][0])
+ +    return content_file_id_cache[file]
+ +
+ +################################################################################
+ +
+ +def get_or_set_contents_path_id(path):
+ +    global content_path_id_cache
+ +
+ +    if not content_path_id_cache.has_key(path):
+ +        sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path
+ +        q = projectB.query(sql_select)
+ +        if not q.getresult():
+ +            # since this can be called within a transaction, we can't use currval
+ +            q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path))
+ +        content_path_id_cache[path] = int(q.getresult()[0][0])
+ +    return content_path_id_cache[path]
+ +
+ +################################################################################
+ +
+ +def insert_content_path(bin_id, fullpath):
+ +    global insert_contents_file_cache
+ +    cache_key = "%s_%s" % (bin_id, fullpath)
+ +
+ +    # have we seen this contents before?
+ +    # probably only revelant during package import
+ +    if insert_contents_file_cache.has_key(cache_key):
+ +        return
+ +
+ +    # split the path into basename, and pathname
+ +    (path, file)  = os.path.split(fullpath)
+ +
+ +    # Get the necessary IDs ...
+ +    file_id = get_or_set_contents_file_id(file)
+ +    path_id = get_or_set_contents_path_id(path)
+ +
+ +    # Determine if we're inserting a duplicate row
+ +    q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
+ +    if q.getresult():
+ +        # Yes we are, return without doing the insert
+ +        return
+ +
+ +    # Put them into content_assiocations
+ +    projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
+ +    return
diff --combined daklib/utils.py

index 52b902f9ffe84e298fce9fb4c127b935b5376f6a,7b822b9db312f3adb965bf3db4e586925c141db6..5e3627969f9b1f69cd8a216afe3b4dd49a5495c3
--- 1/daklib/utils.py
--- 2/daklib/utils.py
+++ b/daklib/utils.py
@@@ -1,10 -1,12 +1,12 @@@
   #!/usr/bin/env python
   # vim:set et ts=4 sw=4:
   
- # Utility functions
- # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
+ """Utility functions
   
- ################################################################################
+ @contact: Debian FTP Master <ftpmaster@debian.org>
+ @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
+ @license: GNU General Public License version 2 or later
+ """
   
   # This program is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
@@@ -20,58 -22,62 +22,62 @@@
   # along with this program; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   
- ################################################################################
- 
- import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \
-        sys, tempfile, traceback, stat
+ import codecs
+ import commands
+ import email.Header
+ import os
+ import pwd
+ import select
+ import socket
+ import shutil
+ import sys
+ import tempfile
+ import traceback
+ import stat
   import apt_pkg
   import database
   import time
   from dak_exceptions import *
+ from regexes import re_html_escaping, html_escaping, re_single_line_field, \
+                     re_multi_line_field, re_srchasver, re_verwithext, \
+                     re_parse_maintainer, re_taint_free, re_gpg_uid
   
   ################################################################################
   
- re_comments = re.compile(r"\#.*")
- re_no_epoch = re.compile(r"^\d+\:")
- re_no_revision = re.compile(r"-[^-]+$")
- re_arch_from_filename = re.compile(r"/binary-[^/]+/")
- re_extract_src_version = re.compile (r"(\S+)\s*\((.*)\)")
- re_isadeb = re.compile (r"(.+?)_(.+?)_(.+)\.u?deb$")
- re_issource = re.compile (r"(.+)_(.+?)\.(orig\.tar\.gz|diff\.gz|tar\.gz|dsc)$")
- 
- re_single_line_field = re.compile(r"^(\S*)\s*:\s*(.*)")
- re_multi_line_field = re.compile(r"^\s(.*)")
- re_taint_free = re.compile(r"^[-+~/\.\w]+$")
- 
- re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>")
- re_gpg_uid = re.compile('^uid.*<([^>]*)>')
- 
- re_srchasver = re.compile(r"^(\S+)\s+\((\S+)\)$")
- re_verwithext = re.compile(r"^(\d+)(?:\.(\d+))(?:\s+\((\S+)\))?$")
+ default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
+ default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
   
- re_srchasver = re.compile(r"^(\S+)\s+\((\S+)\)$")
- 
- html_escaping = {'"':'&quot;', '&':'&amp;', '<':'&lt;', '>':'&gt;'}
- re_html_escaping = re.compile('|'.join(map(re.escape, html_escaping.keys())))
- 
- default_config = "/etc/dak/dak.conf"
- default_apt_config = "/etc/dak/apt.conf"
- 
- alias_cache = None
- key_uid_email_cache = {}
+ alias_cache = None        #: Cache for email alias checks
+ key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
   
   # (hashname, function, earliest_changes_version)
   known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
-                 ("sha256", apt_pkg.sha256sum, (1, 8))]
+                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
   
   ################################################################################
   
   def html_escape(s):
+     """ Escape html chars """
       return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
   
   ################################################################################
   
   def open_file(filename, mode='r'):
+     """
+     Open C{file}, return fileobject.
+ 
+     @type filename: string
+     @param filename: path/filename to open
+ 
+     @type mode: string
+     @param mode: open mode
+ 
+     @rtype: fileobject
+     @return: open fileobject
+ 
+     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
+ 
+     """
       try:
           f = open(filename, mode)
       except IOError:
@@@ -201,25 -207,26 +207,26 @@@ def parse_deb822(contents, signing_rule
   ################################################################################
   
   def parse_changes(filename, signing_rules=0):
-     """Parses a changes file and returns a dictionary where each field is a
- key.  The mandatory first argument is the filename of the .changes
- file.
+     """
+     Parses a changes file and returns a dictionary where each field is a
+     key.  The mandatory first argument is the filename of the .changes
+     file.
   
- signing_rules is an optional argument:
+     signing_rules is an optional argument:
   
-  o If signing_rules == -1, no signature is required.
-  o If signing_rules == 0 (the default), a signature is required.
-  o If signing_rules == 1, it turns on the same strict format checking
-    as dpkg-source.
+       - If signing_rules == -1, no signature is required.
+       - If signing_rules == 0 (the default), a signature is required.
+       - If signing_rules == 1, it turns on the same strict format checking
+         as dpkg-source.
   
- The rules for (signing_rules == 1)-mode are:
+     The rules for (signing_rules == 1)-mode are:
   
-   o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
-     followed by any PGP header data and must end with a blank line.
+       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
+         followed by any PGP header data and must end with a blank line.
   
-   o The data section must end with a blank line and must be followed by
-     "-----BEGIN PGP SIGNATURE-----".
- """
+       - The data section must end with a blank line and must be followed by
+         "-----BEGIN PGP SIGNATURE-----".
+     """
   
       changes_in = open_file(filename)
       content = changes_in.read()
@@@ -234,9 -241,11 +241,11 @@@ def hash_key(hashname)
   ################################################################################
   
   def create_hash(where, files, hashname, hashfunc):
-     """create_hash extends the passed files dict with the given hash by
+     """
+     create_hash extends the passed files dict with the given hash by
       iterating over all files on disk and passing them to the hashing
-     function given."""
+     function given.
+     """
   
       rejmsg = []
       for f in files.keys():
@@@ -253,9 -262,11 +262,11 @@@
   ################################################################################
   
   def check_hash(where, files, hashname, hashfunc):
-     """check_hash checks the given hash in the files dict against the actual
+     """
+     check_hash checks the given hash in the files dict against the actual
       files on disk.  The hash values need to be present consistently in
-     all file entries.  It does not modify its input in any way."""
+     all file entries.  It does not modify its input in any way.
+     """
   
       rejmsg = []
       for f in files.keys():
@@@ -286,8 -297,10 +297,10 @@@
   ################################################################################
   
   def check_size(where, files):
-     """check_size checks the file sizes in the passed files dict against the
-     files on disk."""
+     """
+     check_size checks the file sizes in the passed files dict against the
+     files on disk.
+     """
   
       rejmsg = []
       for f in files.keys():
@@@ -309,8 -322,10 +322,10 @@@
   ################################################################################
   
   def check_hash_fields(what, manifest):
-     """check_hash_fields ensures that there are no checksum fields in the
-     given dict that we do not know about."""
+     """
+     check_hash_fields ensures that there are no checksum fields in the
+     given dict that we do not know about.
+     """
   
       rejmsg = []
       hashes = map(lambda x: x[0], known_hashes)
@@@ -343,9 -358,11 +358,11 @@@ def _ensure_changes_hash(changes, forma
   # access the checksums easily.
   
   def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
-     """ensure_dsc_hashes' task is to ensure that each and every *present* hash
+     """
+     ensure_dsc_hashes' task is to ensure that each and every *present* hash
       in the dsc is correct, i.e. identical to the changes file and if necessary
-     the pool.  The latter task is delegated to check_hash."""
+     the pool.  The latter task is delegated to check_hash.
+     """
   
       rejmsg = []
       if not dsc.has_key('Checksums-%s' % (hashname,)):
@@@ -398,25 -415,24 +415,24 @@@ def parse_checksums(where, files, manif
       field = 'checksums-%s' % hashname
       if not field in manifest:
           return rejmsg
-     input = manifest[field]
-     for line in input.split('\n'):
+     for line in manifest[field].split('\n'):
           if not line:
               break
-         hash, size, file = line.strip().split(' ')
-         if not files.has_key(file):
+         checksum, size, checkfile = line.strip().split(' ')
+         if not files.has_key(checkfile):
           # TODO: check for the file's entry in the original files dict, not
           # the one modified by (auto)byhand and other weird stuff
           #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
           #        (file, hashname, where))
               continue
-         if not files[file]["size"] == size:
+         if not files[checkfile]["size"] == size:
               rejmsg.append("%s: size differs for files and checksums-%s entry "\
-                 "in %s" % (file, hashname, where))
+                 "in %s" % (checkfile, hashname, where))
               continue
-         files[file][hash_key(hashname)] = hash
+         files[checkfile][hash_key(hashname)] = checksum
       for f in files.keys():
           if not files[f].has_key(hash_key(hashname)):
-             rejmsg.append("%s: no entry in checksums-%s in %s" % (file,
+             rejmsg.append("%s: no entry in checksums-%s in %s" % (checkfile,
                   hashname, where))
       return rejmsg
   
@@@ -488,8 -504,10 +504,10 @@@ def build_file_list(changes, is_a_dsc=0
   ################################################################################
   
   def force_to_utf8(s):
-     """Forces a string to UTF-8.  If the string isn't already UTF-8,
- it's assumed to be ISO-8859-1."""
+     """
+     Forces a string to UTF-8.  If the string isn't already UTF-8,
+     it's assumed to be ISO-8859-1.
+     """
       try:
           unicode(s, 'utf-8')
           return s
@@@ -498,8 -516,10 +516,10 @@@
           return latin1_s.encode('utf-8')
   
   def rfc2047_encode(s):
-     """Encodes a (header) string per RFC2047 if necessary.  If the
- string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1."""
+     """
+     Encodes a (header) string per RFC2047 if necessary.  If the
+     string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
+     """
       try:
           codecs.lookup('ascii')[1](s)
           return s
@@@ -520,15 -540,18 +540,18 @@@
   #          incompatible!'
   
   def fix_maintainer (maintainer):
-     """Parses a Maintainer or Changed-By field and returns:
-   (1) an RFC822 compatible version,
-   (2) an RFC2047 compatible version,
-   (3) the name
-   (4) the email
- 
- The name is forced to UTF-8 for both (1) and (3).  If the name field
- contains '.' or ',' (as allowed by Debian policy), (1) and (2) are
- switched to 'email (name)' format."""
+     """
+     Parses a Maintainer or Changed-By field and returns:
+       1. an RFC822 compatible version,
+       2. an RFC2047 compatible version,
+       3. the name
+       4. the email
+ 
+     The name is forced to UTF-8 for both 1. and 3..  If the name field
+     contains '.' or ',' (as allowed by Debian policy), 1. and 2. are
+     switched to 'email (name)' format.
+ 
+     """
       maintainer = maintainer.strip()
       if not maintainer:
           return ('', '', '', '')
@@@ -566,12 -589,12 +589,12 @@@
   
   ################################################################################
   
- # sendmail wrapper, takes _either_ a message string or a file as arguments
   def send_mail (message, filename=""):
-         # If we've been passed a string dump it into a temporary file
+     """sendmail wrapper, takes _either_ a message string or a file as arguments"""
+ 
+     # If we've been passed a string dump it into a temporary file
       if message:
-         filename = tempfile.mktemp()
-         fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700)
+         (fd, filename) = tempfile.mkstemp()
           os.write (fd, message)
           os.close (fd)
   
@@@ -685,13 -708,13 +708,13 @@@ def regex_safe (s)
   
   ################################################################################
   
- # Perform a substition of template
   def TemplateSubst(map, filename):
-     file = open_file(filename)
-     template = file.read()
+     """ Perform a substition of template """
+     templatefile = open_file(filename)
+     template = templatefile.read()
       for x in map.keys():
           template = template.replace(x,map[x])
-     file.close()
+     templatefile.close()
       return template
   
   ################################################################################
@@@ -732,8 -755,8 +755,8 @@@ def cc_fix_changes (changes)
       for j in o.split():
           changes["architecture"][j] = 1
   
- # Sort by source name, source version, 'have source', and then by filename
   def changes_compare (a, b):
+     """ Sort by source name, source version, 'have source', and then by filename """
       try:
           a_changes = parse_changes(a)
       except:
@@@ -787,13 -810,13 +810,13 @@@ def find_next_free (dest, too_many=100)
   ################################################################################
   
   def result_join (original, sep = '\t'):
-     list = []
+     resultlist = []
       for i in xrange(len(original)):
           if original[i] == None:
-             list.append("")
+             resultlist.append("")
           else:
-             list.append(original[i])
-     return sep.join(list)
+             resultlist.append(original[i])
+     return sep.join(resultlist)
   
   ################################################################################
   
@@@ -811,18 -834,20 +834,20 @@@ def prefix_multi_line_string(str, prefi
   ################################################################################
   
   def validate_changes_file_arg(filename, require_changes=1):
-     """'filename' is either a .changes or .dak file.  If 'filename' is a
- .dak file, it's changed to be the corresponding .changes file.  The
- function then checks if the .changes file a) exists and b) is
- readable and returns the .changes filename if so.  If there's a
- problem, the next action depends on the option 'require_changes'
- argument:
- 
-  o If 'require_changes' == -1, errors are ignored and the .changes
-                                filename is returned.
-  o If 'require_changes' == 0, a warning is given and 'None' is returned.
-  o If 'require_changes' == 1, a fatal error is raised.
- """
+     """
+     'filename' is either a .changes or .dak file.  If 'filename' is a
+     .dak file, it's changed to be the corresponding .changes file.  The
+     function then checks if the .changes file a) exists and b) is
+     readable and returns the .changes filename if so.  If there's a
+     problem, the next action depends on the option 'require_changes'
+     argument:
+ 
+       - If 'require_changes' == -1, errors are ignored and the .changes
+         filename is returned.
+       - If 'require_changes' == 0, a warning is given and 'None' is returned.
+       - If 'require_changes' == 1, a fatal error is raised.
+ 
+     """
       error = None
   
       orig_filename = filename
@@@ -881,8 -906,8 +906,8 @@@ def get_conf()
   
   ################################################################################
   
- # Handle -a, -c and -s arguments; returns them as SQL constraints
   def parse_args(Options):
+     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
       # Process suite
       if Options["Suite"]:
           suite_ids_list = []
@@@ -978,10 -1003,13 +1003,13 @@@ def try_with_debug(function)
   
   ################################################################################
   
- # Function for use in sorting lists of architectures.
- # Sorts normally except that 'source' dominates all others.
- 
   def arch_compare_sw (a, b):
+     """
+     Function for use in sorting lists of architectures.
+ 
+     Sorts normally except that 'source' dominates all others.
+     """
+ 
       if a == "source" and b == "source":
           return 0
       elif a == "source":
@@@ -993,13 -1021,15 +1021,15 @@@
   
   ################################################################################
   
- # Split command line arguments which can be separated by either commas
- # or whitespace.  If dwim is set, it will complain about string ending
- # in comma since this usually means someone did 'dak ls -a i386, m68k
- # foo' or something and the inevitable confusion resulting from 'm68k'
- # being treated as an argument is undesirable.
- 
   def split_args (s, dwim=1):
+     """
+     Split command line arguments which can be separated by either commas
+     or whitespace.  If dwim is set, it will complain about string ending
+     in comma since this usually means someone did 'dak ls -a i386, m68k
+     foo' or something and the inevitable confusion resulting from 'm68k'
+     being treated as an argument is undesirable.
+     """
+ 
       if s.find(",") == -1:
           return s.split()
       else:
@@@ -1013,9 -1043,12 +1043,12 @@@ def Dict(**dict): return dic
   
   ########################################
   
- # Our very own version of commands.getouputstatus(), hacked to support
- # gpgv's status fd.
   def gpgv_get_status_output(cmd, status_read, status_write):
+     """
+     Our very own version of commands.getouputstatus(), hacked to support
+     gpgv's status fd.
+     """
+ 
       cmd = ['/bin/sh', '-c', cmd]
       p2cread, p2cwrite = os.pipe()
       c2pread, c2pwrite = os.pipe()
@@@ -1105,9 -1138,11 +1138,11 @@@ def process_gpgv_output(status)
   ################################################################################
   
   def retrieve_key (filename, keyserver=None, keyring=None):
-     """Retrieve the key that signed 'filename' from 'keyserver' and
- add it to 'keyring'.  Returns nothing on success, or an error message
- on error."""
+     """
+     Retrieve the key that signed 'filename' from 'keyserver' and
+     add it to 'keyring'.  Returns nothing on success, or an error message
+     on error.
+     """
   
       # Defaults for keyserver and keyring
       if not keyserver:
@@@ -1120,7 -1155,7 +1155,7 @@@
           return "%s: tainted filename" % (filename)
   
       # Invoke gpgv on the file
-     status_read, status_write = os.pipe();
+     status_read, status_write = os.pipe()
       cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
       (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
   
@@@ -1157,18 -1192,20 +1192,20 @@@ def gpg_keyring_args(keyrings=None)
   ################################################################################
   
   def check_signature (sig_filename, reject, data_filename="", keyrings=None, autofetch=None):
-     """Check the signature of a file and return the fingerprint if the
- signature is valid or 'None' if it's not.  The first argument is the
- filename whose signature should be checked.  The second argument is a
- reject function and is called when an error is found.  The reject()
- function must allow for two arguments: the first is the error message,
- the second is an optional prefix string.  It's possible for reject()
- to be called more than once during an invocation of check_signature().
- The third argument is optional and is the name of the files the
- detached signature applies to.  The fourth argument is optional and is
- a *list* of keyrings to use.  'autofetch' can either be None, True or
- False.  If None, the default behaviour specified in the config will be
- used."""
+     """
+     Check the signature of a file and return the fingerprint if the
+     signature is valid or 'None' if it's not.  The first argument is the
+     filename whose signature should be checked.  The second argument is a
+     reject function and is called when an error is found.  The reject()
+     function must allow for two arguments: the first is the error message,
+     the second is an optional prefix string.  It's possible for reject()
+     to be called more than once during an invocation of check_signature().
+     The third argument is optional and is the name of the files the
+     detached signature applies to.  The fourth argument is optional and is
+     a *list* of keyrings to use.  'autofetch' can either be None, True or
+     False.  If None, the default behaviour specified in the config will be
+     used.
+     """
   
       # Ensure the filename contains no shell meta-characters or other badness
       if not re_taint_free.match(sig_filename):
@@@ -1192,7 -1229,7 +1229,7 @@@
               return None
   
       # Build the command line
-     status_read, status_write = os.pipe();
+     status_read, status_write = os.pipe()
       cmd = "gpgv --status-fd %s %s %s %s" % (
           status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
   
@@@ -1349,9 -1386,11 +1386,11 @@@ def wrap(paragraph, max_length, prefix=
   
   ################################################################################
   
- # Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
- # Returns fixed 'src'
   def clean_symlink (src, dest, root):
+     """
+     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
+     Returns fixed 'src'
+     """
       src = src.replace(root, '', 1)
       dest = dest.replace(root, '', 1)
       dest = os.path.dirname(dest)
@@@ -1360,32 -1399,22 +1399,22 @@@
   
   ################################################################################
   
- def temp_filename(directory=None, dotprefix=None, perms=0700):
-     """Return a secure and unique filename by pre-creating it.
- If 'directory' is non-null, it will be the directory the file is pre-created in.
- If 'dotprefix' is non-null, the filename will be prefixed with a '.'."""
- 
-     if directory:
-         old_tempdir = tempfile.tempdir
-         tempfile.tempdir = directory
+ def temp_filename(directory=None, prefix="dak", suffix=""):
+     """
+     Return a secure and unique filename by pre-creating it.
+     If 'directory' is non-null, it will be the directory the file is pre-created in.
+     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
+     If 'suffix' is non-null, the filename will end with it.
   
-     filename = tempfile.mktemp()
+     Returns a pair (fd, name).
+     """
   
-     if dotprefix:
-         filename = "%s/.%s" % (os.path.dirname(filename), os.path.basename(filename))
-     fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, perms)
-     os.close(fd)
- 
-     if directory:
-         tempfile.tempdir = old_tempdir
- 
-     return filename
+     return tempfile.mkstemp(suffix, prefix, directory)
   
   ################################################################################
   
- # checks if the user part of the email is listed in the alias file
- 
   def is_email_alias(email):
+     """ checks if the user part of the email is listed in the alias file """
       global alias_cache
       if alias_cache == None:
           aliasfn = which_alias_file()
@@@ -1407,52 -1436,3 +1436,52 @@@ if which_conf_file() != default_config
       apt_pkg.ReadConfigFileISC(Cnf,which_conf_file())
   
   ################################################################################
+ +
+ +def generate_contents_information(filename):
+ +    """
+ +    Generate a list of flies contained in a .deb
+ +
+ +    @type filename: string
+ +    @param filename: the path to a .deb
+ +
+ +    @rtype: list
+ +    @return: a list of files in the data.tar.* portion of the .deb
+ +    """
+ +    cmd = "ar t %s" % (filename)
+ +    (result, output) = commands.getstatusoutput(cmd)
+ +    if result != 0:
+ +        reject("%s: 'ar t' invocation failed." % (filename))
+ +        reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
+ +
+ +    # Ugh ... this is ugly ... Code ripped from process_unchecked.py
+ +    chunks = output.split('\n')
+ +
+ +    contents = []
+ +    try:
+ +        cmd = "ar x %s %s" % (filename, chunks[2])
+ +        (result, output) = commands.getstatusoutput(cmd)
+ +        if result != 0:
+ +            reject("%s: '%s' invocation failed." % (filename, cmd))
+ +            reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
+ +
+ +        # Got deb tarballs, now lets go through and determine what bits
+ +        # and pieces the deb had ...
+ +        if chunks[2] == "data.tar.gz":
+ +            data = tarfile.open("data.tar.gz", "r:gz")
+ +        elif data_tar == "data.tar.bz2":
+ +            data = tarfile.open("data.tar.bz2", "r:bz2")
+ +        else:
+ +            os.remove(chunks[2])
+ +            reject("couldn't find data.tar.*")
+ +
+ +        for tarinfo in data:
+ +            if not tarinfo.isdir():
+ +                contents.append(tarinfo.name[2:])
+ +
+ +    finally:
+ +        if os.path.exists( chunks[2] ):
+ +            os.remove( chunks[2] )
+ +
+ +    return contents
+ +
+ +###############################################################################
author	Mike O'Connor <stew@vireo.org>
	Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)
committer	Mike O'Connor <stew@vireo.org>
	Mon, 9 Feb 2009 07:46:48 +0000 (02:46 -0500)
		1	2
dak/dak.py	patch \|	diff1 \|	diff2 \|	blob \| history
dak/dakdb/update2.py	patch \|	diff1 \|	diff2 \|	blob \| history
dak/process_accepted.py	patch \|	diff1 \|	diff2 \|	blob \| history
dak/update_db.py	patch \|	diff1 \|	diff2 \|	blob \| history
daklib/database.py	patch \|	diff1 \|	diff2 \|	blob \| history
daklib/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history