Added content import, merged master, added update2 script,

author Michael Casadevall <sonicmctails@gmail.com>

Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)

committer Michael Casadevall <sonicmctails@gmail.com>

Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)
author Michael Casadevall <sonicmctails@gmail.com>
Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)
committer Michael Casadevall <sonicmctails@gmail.com>
Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)
diff --git a/dak/.generate_contents.py.swp b/dak/.generate_contents.py.swp

deleted file mode 100644 (file)

index d4e8329..0000000

Binary files a/dak/.generate_contents.py.swp and /dev/null differ
diff --git a/dak/dak.py b/dak/dak.py

index 92753ecc81d94004ee2b88db7822b68915a365a3..d04eebc2881cbd87a0b6c362d27334e18f2b8066 100755 (executable)
--- a/dak/dak.py
+++ b/dak/dak.py
@@ -138,6 +138,8 @@ def init():
           "Check for users with no packages in the archive"),
          ("import-archive",
           "Populate SQL database based from an archive tree"),
+        ("import-contents",
+         "Populate SQL database with Contents files"),
          ("import-keyring",
           "Populate fingerprint/uid table based on a new/updated keyring"),
          ("import-ldap-fingerprints",
diff --git a/dak/dakdb/update2.py b/dak/dakdb/update2.py

new file mode 100644 (file)

index 0000000..ec9650b
--- /dev/null
+++ b/dak/dakdb/update2.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Debian Archive Kit Database Update Script 2
+# Copyright (C) 2009  Michael Casadevall <mcasadevall@debian.org>
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+################################################################################
+
+# <tomv_w> really, if we want to screw ourselves, let's find a better way.
+# <Ganneff> rm -rf /srv/ftp.debian.org
+
+################################################################################
+
+import psycopg2, time
+
+################################################################################
+
+def do_update(self):
+    print "Adding content fields to database"
+
+    try:
+        c = self.db.cursor()
+        c.execute("""CREATE TABLE content_file_paths (
+                     id serial primary key not null,
+                     path text unique not null
+                   )""")
+
+        c.execute("""CREATE TABLE content_file_names (
+                    id serial primary key not null,
+                    file text unique not null
+                   )""")
+
+        c.execute("""CREATE TABLE content_associations (
+                    id serial not null,
+                    binary_pkg int4 not null references binaries(id) on delete cascade,
+                    filepath int4 not null references content_file_paths(id) on delete cascade,
+                    filename int4 not null references content_file_names(id) on delete cascade
+                  );""")
+
+        c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text
+                   AS $_$select case
+                   WHEN $2 is null or $2 = '' THEN $1
+                   WHEN $1 is null or $1 = '' THEN $2
+                   ELSE $1 || ',' || $2
+                   END$_$
+                   LANGUAGE sql""")
+
+        c.execute("""CREATE AGGREGATE comma_separated_list (
+                   BASETYPE = text,
+                   SFUNC = comma_concat,
+                   STYPE = text,
+                   INITCOND = ''
+                   );""")
+
+        c.execute("UPDATE config SET value = '2' WHERE name = 'db_revision'")
+        self.db.commit()
+
+        print "REMINDER: Remember to fully regenerate the Contents files before running import-contents"
+        print ""
+        print "Pausing for five seconds ..."
+        time.sleep (5)
+
+    except psycopg2.ProgrammingError, msg:
+        self.db.rollback()
+        print "FATAL: Unable to apply content table update 2!"
+        print "Error Message: " + str(msg)
+        print "Database changes have been rolled back."
diff --git a/dak/generate_contents.py b/dak/generate_contents.py

index 54b70bdecba7d51a5025df3c342f65d6e9b9f837..6d84d16b94a91cc62a31d9f5803e1fb6c14b3740 100755 (executable)
--- a/dak/generate_contents.py
+++ b/dak/generate_contents.py
@@ -66,7 +66,7 @@ def generate_contents(suites):
          h.close()
  
      # Get our suites, and the architectures
-    for s in suites:
+    for s in [i.lower() for i in suites]:
          suite_id = database.get_suite_id(s)
  
          q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
diff --git a/dak/import_contents.py b/dak/import_contents.py

new file mode 100755 (executable)

index 0000000..945b9ea
--- /dev/null
+++ b/dak/import_contents.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# Import contents files
+
+# Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+################################################################################
+################################################################################
+
+################################################################################
+
+import sys, os, popen2, tempfile, stat, time, pg
+import re, gzip, apt_pkg
+from daklib import database, utils
+from daklib.dak_exceptions import *
+
+################################################################################
+
+Cnf = None
+projectB = None
+out = None
+AptCnf = None
+
+################################################################################
+
+def usage (exit_code=0):
+    print """Usage: dak import-contents
+Import Contents files
+
+ -h, --help                 show this help and exit
+ -s, --suite=SUITE         only write file lists for this suite
+"""
+    sys.exit(exit_code)
+
+################################################################################
+
+def import_contents(suites):
+    global projectB, Cnf
+
+    # Start transaction
+    projectB.query("BEGIN WORK")
+
+    # Needed to make sure postgreSQL doesn't freak out on some of the data
+    projectB.query("SET CLIENT_ENCODING TO 'LATIN1'")
+
+    # Get our suites, and the architectures
+    for s in suites:
+        suite_id = database.get_suite_id(s)
+
+        q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
+
+        arch_list = [ ]
+        for r in q.getresult():
+            if r[1] != "source" and r[1] != "all":
+                arch_list.append((r[0], r[1]))
+
+        arch_all_id = database.get_architecture_id("all")
+
+        for arch in arch_list:
+            print "Processing %s/%s" % (s, arch[1])
+            arch_id = database.get_architecture_id(arch[1])
+            f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch[1]), "r")
+
+            # Get line count
+            lines = f.readlines()
+            num_of_lines = len(lines)
+
+            # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit
+            lines_processed = 0
+            found_header = False
+
+            for line in lines:
+                if found_header == False:
+                    if not line:
+                        print "Unable to find end of Contents-%s.gz header!" % ( arch[1])
+                        sys.exit(255)
+
+                    lines_processed += 1
+                    p = re.compile('^FILE')
+                    if p.match(line):
+                        found_header = True
+                    continue
+
+                # The format is simple enough, *filename*, *section/package1,section/package2,etc*
+                # Each file appears once per Contents file, so first, use some regex match
+                # to split the two bits
+
+                # Print out progress bar
+                print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, (float(lines_processed)/num_of_lines)),
+
+                # regex lifted from packages.d.o code
+                p = re.compile('^(.+?)\s+(\S+)$')
+                matchs = p.findall(line)
+                filename = matchs[0][0]
+                packages = matchs[0][1].split(',')
+
+                # Iterate through each file's packages
+                for package in packages:
+                    p = re.compile('(\S+)/(\S+)$')
+                    matchs = p.findall(package)
+
+                    # Needed since the DB is unicode, and these files
+                    # are ASCII
+                    section_name = matchs[0][0]
+                    package_name = matchs[0][1]
+
+                    section_id = database.get_section_id(section_name)
+                    package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id)
+
+                    if package_id == None:
+                        # Likely got an arch all package
+                        package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_all_id)
+
+                    database.insert_content_path(package_id, filename)
+
+                lines_processed += 1
+            f.close()
+
+    # Commit work
+    print "Committing to database ..."
+    projectB.query("COMMIT")
+
+################################################################################
+
+def main ():
+    global Cnf, projectB, out
+    out = sys.stdout
+
+    Cnf = utils.get_conf()
+
+    Arguments = [('h',"help","Import-Contents::Options::Help"),
+                 ('s',"suite","Import-Contents::Options::Suite","HasArg"),
+                ]
+
+    for i in [ "help", "suite" ]:
+        if not Cnf.has_key("Import-Contents::Options::%s" % (i)):
+            Cnf["Import-Contents::Options::%s" % (i)] = ""
+
+    suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
+    Options = Cnf.SubTree("Import-Contents::Options")
+
+    if Options["Help"]:
+        usage()
+
+    if Options["Suite"]:
+        suites = utils.split_args(Options["Suite"])
+    else:
+        suites = Cnf.SubTree("Suite").List()
+
+    projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
+    database.init(Cnf, projectB)
+
+    import_contents(suites)
+
+#######################################################################################
+
+if __name__ == '__main__':
+    main()
diff --git a/dak/update_db.py b/dak/update_db.py

index e59a558c5344418cf3f4b554152ed8672878ae8e..7d89e6bfeb8871bd7eb1423b21f52506085dd126 100755 (executable)
--- a/dak/update_db.py
+++ b/dak/update_db.py
@@ -36,7 +36,7 @@ from daklib import utils
  
  Cnf = None
  projectB = None
-required_database_schema = 1
+required_database_schema = 2
  
  ################################################################################
  
diff --git a/daklib/database.py b/daklib/database.py

index 1f659606b667f3238a6fb169f8228b4e934f01e0..c39c83b1dc7ca46c91036612f3fc0d9ce3e05238 100755 (executable)
--- a/daklib/database.py
+++ b/daklib/database.py
@@ -45,6 +45,7 @@ suite_version_cache = {}
  suite_bin_version_cache = {}
  content_path_id_cache = {}
  content_file_id_cache = {}
+insert_contents_file_cache = {}
  
  ################################################################################
  
@@ -250,14 +251,14 @@ def get_suite_version(source, suite, arch):
  
      return version
  
-def get_latest_binary_version_id(binary, suite, arch):
+def get_latest_binary_version_id(binary, section, suite, arch):
      global suite_bin_version_cache
-    cache_key = "%s_%s" % (binary, suite)
+    cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
  
      if suite_bin_version_cache.has_key(cache_key):
          return suite_bin_version_cache[cache_key]
  
-    q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d'" % (binary, int(arch), int(suite)))
+    q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
  
      highest_bid, highest_version = None, None
  
@@ -266,6 +267,7 @@ def get_latest_binary_version_id(binary, suite, arch):
               highest_bid = bi[0]
               highest_version = bi[1]
  
+    suite_bin_version_cache[cache_key] = highest_bid
      return highest_bid
  
  ################################################################################
@@ -459,6 +461,14 @@ def get_or_set_contents_path_id(path):
  ################################################################################
  
  def insert_content_path(bin_id, fullpath):
+    global insert_contents_file_cache
+    cache_key = "%s_%s" % (bin_id, fullpath)
+
+    # have we seen this contents before?
+    # probably only revelant during package import
+    if insert_contents_file_cache.has_key(cache_key):
+        return
+
      # split the path into basename, and pathname
      (path, file)  = os.path.split(fullpath)
  
@@ -466,6 +476,13 @@ def insert_content_path(bin_id, fullpath):
      file_id = get_or_set_contents_file_id(file)
      path_id = get_or_set_contents_path_id(path)
  
+    # Determine if we're inserting a duplicate row
+    q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
+    if q.getresult():
+        # Yes we are, return without doing the insert
+        print "Inserting dup row"
+        return
+
      # Put them into content_assiocations
      projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
      return
diff --git a/docs/README.quotes b/docs/README.quotes

index 3568ae7ad519cbf90ea165cc9fd4705568fa0de0..c696fbebe906b2ff1590f6a9b60464b128158184 100644 (file)
--- a/docs/README.quotes
+++ b/docs/README.quotes
@@ -344,3 +344,9 @@ Canadians: This is a lighthouse. Your call.
  <helix> elmo: I can't believe people pay you to fix computers
  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+* Ganneff ponders how to best write the text to -devel. (need to tell em in
+  case they find more bugs). "We fixed the fucking idiotic broken implementation
+  to be less so" is probably not the nicest, even if perfect valid, way to say so
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
author	Michael Casadevall <sonicmctails@gmail.com>
	Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)
committer	Michael Casadevall <sonicmctails@gmail.com>
	Sat, 3 Jan 2009 21:52:53 +0000 (16:52 -0500)
dak/.generate_contents.py.swp	[deleted file]	patch \| blob \| history
dak/dak.py		patch \| blob \| history
dak/dakdb/update2.py	[new file with mode: 0644]	patch \| blob
dak/generate_contents.py		patch \| blob \| history
dak/import_contents.py	[new file with mode: 0755]	patch \| blob
dak/update_db.py		patch \| blob \| history
daklib/database.py		patch \| blob \| history
docs/README.quotes		patch \| blob \| history