From: Michael Casadevall <sonicmctails@gmail.com>
Date: Tue, 30 Dec 2008 21:10:35 +0000 (-0500)
Subject: Merge branch 'master' into content_generation
X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=30c2db7d2f47ff65ec9126f03c1e1e893db1c22c;hp=-c;p=dak.git

Merge branch 'master' into content_generation

Signed-off-by: Michael Casadevall <sonicmctails@gmail.com>
---

30c2db7d2f47ff65ec9126f03c1e1e893db1c22c
diff --combined dak/generate_contents.py
index 9696bb86,00000000..54b70bde
mode 100755,000000..100755
--- a/dak/generate_contents.py
+++ b/dak/generate_contents.py
@@@ -1,139 -1,0 +1,167 @@@
 +#!/usr/bin/env python
 +# Create all the contents files
 +
 +# Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
 +
 +# This program is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 2 of the License, or
 +# (at your option) any later version.
 +
 +# This program is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write to the Free Software
 +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 +
 +################################################################################
 +# <Ganneff> there is the idea to slowly replace contents files
 +# <Ganneff> with a new generation of such files.
 +# <Ganneff> having more info.
 +# <Ganneff> of course that wont help for now where we need to generate them :)
 +################################################################################
 +
 +################################################################################
 +
 +import sys, os, popen2, tempfile, stat, time, pg
- import apt_pkg
++import gzip, apt_pkg
 +from daklib import database, utils
 +from daklib.dak_exceptions import *
 +
 +################################################################################
 +
 +Cnf = None
 +projectB = None
 +out = None
 +AptCnf = None
 +
 +################################################################################
 +
 +def usage (exit_code=0):
 +    print """Usage: dak generate-contents
 +Generate Contents files
 +
 + -h, --help                 show this help and exit
 + -s, --suite=SUITE         only write file lists for this suite
 +"""
 +    sys.exit(exit_code)
 +
 +################################################################################
 +
- def handle_dup_files(file_list):
-     # Sort the list, and then handle finding dups in the filenames key
- 
-     # Walk the list, seeing if the current entry and the next one are the same
-     # and if so, join them together
- 
- 
-     return file_list
- 
- ################################################################################
- 
 +def generate_contents(suites):
 +    global projectB, Cnf
 +    # Ok, the contents information is in the database
 +
 +    # We need to work and get the contents, and print it out on a per
 +    # architectual basis
 +
++    # Read in the contents file header
++    header = False
++    if Cnf.has_key("Generate-Contents::Header"):
++        h = open(Cnf["Generate-Contents::Header"], "r")
++        header = h.read()
++        h.close()
++
 +    # Get our suites, and the architectures
 +    for s in suites:
 +        suite_id = database.get_suite_id(s)
 +
-         q = projectB.query("SELECT architecture FROM suite_architectures WHERE suite = '%d'" % suite_id)
++        q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
 +
 +        arch_list = [ ]
 +        for r in q.getresult():
-             arch_list.append(r[0])
++            if r[1] != "source" and r[1] != "all":
++                arch_list.append((r[0], r[1]))
 +
 +        arch_all_id = database.get_architecture_id("all")
 +
-        # Got the arch all packages, now we need to get the arch dependent packages
-        # attach the arch all, stick them together, and write out the result
++        # Time for the query from hell. Essentially, we need to get the assiocations, the filenames, the paths,
++        # and all that fun stuff from the database.
 +
 +        for arch_id in arch_list:
-             print "SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id)
-             q = projectB.query("SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id))
-             # We need to copy the arch_all packages table into arch packages
++            q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d' AND b.type = 'deb' GROUP BY (p.path||'/'||n.file)""" % (arch_id[0], arch_all_id, suite_id))
++
++            f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch_id[1]), "w")
++
++            if header:
++                f.write(header)
++
++            for contents in q.getresult():
++                f.write(contents[0] + "\t\t\t" + contents[-1] + "\n")
++
++            f.close()
 +
-             # This is for the corner case of arch dependent packages colliding
-             # with arch all packages only on some architectures.
-             # Ugly, I know ...
++        # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
++        # This is HORRIBLY debian specific :-/
++        # First off, udeb
 +
-             arch_packages = []
-             for r in q.getresult():
-                 arch_packages.append((r[1], (r[2] + '/' + r[0])))
++        section_id = database.get_section_id('debian-installer') # all udebs should be here)
 +
-             arch_packages = handle_dup_files(arch_packages)
++        if section_id != -1:
++            q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE s.id = '%d' AND ba.suite = '%d' AND b.type = 'udeb' GROUP BY (p.path||'/'||n.file)""" % (section_id, suite_id))
 +
-             #for contents in arch_packages:
-                 #print contents[0] + '\t\t\t\t' + contents[1]
++            f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-udeb.gz" % (s), "w")
++
++            if header:
++                f.write(header)
++
++            for contents in q.getresult():
++                f.write(contents[0] + "\t\t\t" + contents[-1] + "\n")
++
++            f.close()
++
++        # Once more, with non-free
++        section_id = database.get_section_id('non-free/debian-installer') # all udebs should be here)
++
++        if section_id != -1:
++            q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE s.id = '%d' AND ba.suite = '%d' AND b.type = 'udeb' GROUP BY (p.path||'/'||n.file)""" % (section_id, suite_id))
++
++            f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-udeb-nf.gz" % (s), "w")
++
++            if header:
++                f.write(header)
++
++            for contents in q.getresult():
++                f.write(contents[0] + "\t\t\t" + contents[-1] + "\n")
++
++            f.close()
 +
 +################################################################################
 +
 +def main ():
 +    global Cnf, projectB, out
 +    out = sys.stdout
 +
 +    Cnf = utils.get_conf()
 +
 +    Arguments = [('h',"help","Generate-Contents::Options::Help"),
 +                 ('s',"suite","Generate-Contents::Options::Suite","HasArg"),
 +                ]
++
 +    for i in [ "help", "suite" ]:
 +        if not Cnf.has_key("Generate-Contents::Options::%s" % (i)):
 +            Cnf["Generate-Contents::Options::%s" % (i)] = ""
 +
 +    suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
 +    Options = Cnf.SubTree("Generate-Contents::Options")
 +
 +    if Options["Help"]:
 +        usage()
 +
 +    if Options["Suite"]:
 +        suites = utils.split_args(Options["Suite"])
 +    else:
 +        suites = Cnf.SubTree("Suite").List()
 +
 +    projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
 +    database.init(Cnf, projectB)
 +
 +    generate_contents(suites)
 +
 +#######################################################################################
 +
 +if __name__ == '__main__':
 +    main()
diff --combined dak/process_accepted.py
index b6f8a311,0d5a5387..4dd5b69d
--- a/dak/process_accepted.py
+++ b/dak/process_accepted.py
@@@ -30,7 -30,7 +30,7 @@@
  ###############################################################################
  
  import errno, fcntl, os, sys, time, re
 -import apt_pkg
 +import apt_pkg, tarfile, commands
  from daklib import database
  from daklib import logging
  from daklib import queue
@@@ -96,43 -96,6 +96,43 @@@ class Urgency_Log
          else:
              os.unlink(self.log_filename)
  
 +
 +###############################################################################
 +
 +def generate_contents_information(filename):
 +    # Generate all the contents for the database
 +    cmd = "ar t %s" % (filename)
 +    (result, output) = commands.getstatusoutput(cmd)
 +    if result != 0:
 +        reject("%s: 'ar t' invocation failed." % (filename))
 +        reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
 +
 +    # Ugh ... this is ugly ... Code ripped from process_unchecked.py
 +    chunks = output.split('\n')
 +    cmd = "ar x %s %s" % (filename, chunks[2])
 +    (result, output) = commands.getstatusoutput(cmd)
 +    if result != 0:
 +        reject("%s: 'ar t' invocation failed." % (filename))
 +        reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
 +
 +    # Got deb tarballs, now lets go through and determine what bits
 +    # and pieces the deb had ...
 +    if chunks[2] == "data.tar.gz":
 +        data = tarfile.open("data.tar.gz", "r:gz")
 +    elif data_tar == "data.tar.bz2":
 +        data = tarfile.open("data.tar.bz2", "r:bz2")
 +    else:
 +        os.remove(chunks[2])
 +        reject("couldn't find data.tar.*")
 +
 +    contents = []
 +    for tarinfo in data:
 +        if not tarinfo.isdir():
 +            contents.append(tarinfo.name[2:])
 +
 +    os.remove(chunks[2])
 +    return contents
 +
  ###############################################################################
  
  def reject (str, prefix="Rejected: "):
@@@ -391,7 -354,6 +391,7 @@@ def install ()
              source = files[file]["source package"]
              source_version = files[file]["source version"]
              filename = files[file]["pool name"] + file
 +            contents = generate_contents_information(file)
              if not files[file].has_key("location id") or not files[file]["location id"]:
                  files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i())
              if not files[file].has_key("files id") or not files[file]["files id"]:
@@@ -406,10 -368,6 +406,12 @@@
                  suite_id = database.get_suite_id(suite)
                  projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id))
  
 +            # insert contents into the database
++            q = projectB.query("SELECT currval('binaries_id_seq')")
++            bin_id = int(q.getresult()[0][0])
 +            for file in contents:
-                 projectB.query("INSERT INTO contents (binary_pkg, file) VALUES (currval('binaries_id_seq'), '%s')" % file)
++                database.insert_content_path(bin_id, file)
 +
      # If the .orig.tar.gz is in a legacy directory we need to poolify
      # it, so that apt-get source (and anything else that goes by the
      # "Directory:" field in the Sources.gz file) works.
@@@ -472,6 -430,7 +474,6 @@@
          utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest)
      for dest in copy_dot_dak.keys():
          utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest)
 -
      projectB.query("COMMIT WORK")
  
      # Move the .changes into the 'done' directory
diff --combined daklib/database.py
index 5c7bd838,5c7bd838..b9bb29dd
--- a/daklib/database.py
+++ b/daklib/database.py
@@@ -19,7 -19,7 +19,7 @@@
  
  ################################################################################
  
--import sys, time, types
++import os, sys, time, types
  
  ################################################################################
  
@@@ -42,6 -42,6 +42,8 @@@ fingerprint_id_cache = {
  queue_id_cache = {}
  uid_id_cache = {}
  suite_version_cache = {}
++content_path_id_cache = {}
++content_file_id_cache = {}
  
  ################################################################################
  
@@@ -247,6 -247,6 +249,26 @@@ def get_suite_version(source, suite)
  
      return version
  
++def get_latest_binary_version_id(binary, suite):
++    global suite_version_cache
++    cache_key = "%s_%s" % (binary, suite)
++
++
++    if suite_version_cache.has_key(cache_key):
++        return suite_version_cache[cache_key]
++
++        #print "SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s AND ba.suite = '%d'" % (binary, int(suite))
++        q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s AND ba.suite = '%d'" % (binary, int(suite)))
++
++        highest_bid, highest_version = None, None
++
++        for bi in q.getresult():
++            if highest_version == None or apt_pkg.VersionCompare(bi[1], highest_version) == 1:
++                 highest_bid = bi[0]
++                 highest_version = bi[1]
++
++        return highest_bid
++
  ################################################################################
  
  def get_or_set_maintainer_id (maintainer):
@@@ -397,3 -397,3 +419,54 @@@ def get_suites(pkgname, src=False)
          sql = "select suite_name from binaries, bin_associations,suite where binaries.id=bin_associations.bin and  package='%s' and bin_associations.suite = suite.id"%pkgname
      q = projectB.query(sql)
      return map(lambda x: x[0], q.getresult())
++
++################################################################################
++
++def get_or_set_contents_file_id(file):
++    global content_file_id_cache
++
++    if not content_file_id_cache.has_key(file):
++        sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file
++        q = projectB.query(sql_select)
++        if not q.getresult():
++            # since this can be called within a transaction, we can't use currval
++            q = projectB.query("SELECT nextval('content_file_names_id_seq')")
++            file_id = int(q.getresult()[0][0])
++            projectB.query("INSERT INTO content_file_names VALUES ('%d', '%s')" % (file_id, file))
++            content_file_id_cache[file] =  file_id
++        else:
++            content_file_id_cache[file] = int(q.getresult()[0][0])
++    return content_file_id_cache[file]
++
++################################################################################
++
++def get_or_set_contents_path_id(path):
++    global content_path_id_cache
++
++    if not content_path_id_cache.has_key(path):
++        sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path
++        q = projectB.query(sql_select)
++        if not q.getresult():
++            # since this can be called within a transaction, we can't use currval
++            q = projectB.query("SELECT nextval('content_file_names_id_seq')")
++            path_id = int(q.getresult()[0][0])
++            projectB.query("INSERT INTO content_file_paths VALUES ('%d', '%s')" % ( path_id, path))
++            content_path_id_cache[path] = path_id
++        else:
++            content_path_id_cache[path] = int(q.getresult()[0][0])
++
++    return content_path_id_cache[path]
++
++################################################################################
++
++def insert_content_path(bin_id, fullpath):
++    # split the path into basename, and pathname
++    (path, file)  = os.path.split(fullpath)
++
++    # Get the necessary IDs ...
++    file_id = get_or_set_contents_file_id(file)
++    path_id = get_or_set_contents_path_id(path)
++
++    # Put them into content_assiocations
++    projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
++    return