From: Michael Casadevall Date: Tue, 30 Dec 2008 21:10:35 +0000 (-0500) Subject: Merge branch 'master' into content_generation X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=30c2db7d2f47ff65ec9126f03c1e1e893db1c22c;hp=-c;p=dak.git Merge branch 'master' into content_generation Signed-off-by: Michael Casadevall --- 30c2db7d2f47ff65ec9126f03c1e1e893db1c22c diff --combined dak/generate_contents.py index 9696bb86,00000000..54b70bde mode 100755,000000..100755 --- a/dak/generate_contents.py +++ b/dak/generate_contents.py @@@ -1,139 -1,0 +1,167 @@@ +#!/usr/bin/env python +# Create all the contents files + +# Copyright (C) 2008, 2009 Michael Casadevall + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ +# there is the idea to slowly replace contents files +# with a new generation of such files. +# having more info. +# of course that wont help for now where we need to generate them :) +################################################################################ + +################################################################################ + +import sys, os, popen2, tempfile, stat, time, pg - import apt_pkg ++import gzip, apt_pkg +from daklib import database, utils +from daklib.dak_exceptions import * + +################################################################################ + +Cnf = None +projectB = None +out = None +AptCnf = None + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak generate-contents +Generate Contents files + + -h, --help show this help and exit + -s, --suite=SUITE only write file lists for this suite +""" + sys.exit(exit_code) + +################################################################################ + - def handle_dup_files(file_list): - # Sort the list, and then handle finding dups in the filenames key - - # Walk the list, seeing if the current entry and the next one are the same - # and if so, join them together - - - return file_list - - ################################################################################ - +def generate_contents(suites): + global projectB, Cnf + # Ok, the contents information is in the database + + # We need to work and get the contents, and print it out on a per + # architectual basis + ++ # Read in the contents file header ++ header = False ++ if Cnf.has_key("Generate-Contents::Header"): ++ h = open(Cnf["Generate-Contents::Header"], "r") ++ header = h.read() ++ h.close() ++ + # Get our suites, and the architectures + for s in suites: + suite_id = database.get_suite_id(s) + - q = projectB.query("SELECT architecture FROM suite_architectures WHERE suite = '%d'" % suite_id) ++ q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id) + + arch_list = [ ] + for r in q.getresult(): - arch_list.append(r[0]) ++ if r[1] != "source" and r[1] != "all": ++ arch_list.append((r[0], r[1])) + + arch_all_id = database.get_architecture_id("all") + - # Got the arch all packages, now we need to get the arch dependent packages - # attach the arch all, stick them together, and write out the result ++ # Time for the query from hell. Essentially, we need to get the assiocations, the filenames, the paths, ++ # and all that fun stuff from the database. + + for arch_id in arch_list: - print "SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id) - q = projectB.query("SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id)) - # We need to copy the arch_all packages table into arch packages ++ q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d' AND b.type = 'deb' GROUP BY (p.path||'/'||n.file)""" % (arch_id[0], arch_all_id, suite_id)) ++ ++ f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch_id[1]), "w") ++ ++ if header: ++ f.write(header) ++ ++ for contents in q.getresult(): ++ f.write(contents[0] + "\t\t\t" + contents[-1] + "\n") ++ ++ f.close() + - # This is for the corner case of arch dependent packages colliding - # with arch all packages only on some architectures. - # Ugly, I know ... ++ # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free) ++ # This is HORRIBLY debian specific :-/ ++ # First off, udeb + - arch_packages = [] - for r in q.getresult(): - arch_packages.append((r[1], (r[2] + '/' + r[0]))) ++ section_id = database.get_section_id('debian-installer') # all udebs should be here) + - arch_packages = handle_dup_files(arch_packages) ++ if section_id != -1: ++ q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE s.id = '%d' AND ba.suite = '%d' AND b.type = 'udeb' GROUP BY (p.path||'/'||n.file)""" % (section_id, suite_id)) + - #for contents in arch_packages: - #print contents[0] + '\t\t\t\t' + contents[1] ++ f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-udeb.gz" % (s), "w") ++ ++ if header: ++ f.write(header) ++ ++ for contents in q.getresult(): ++ f.write(contents[0] + "\t\t\t" + contents[-1] + "\n") ++ ++ f.close() ++ ++ # Once more, with non-free ++ section_id = database.get_section_id('non-free/debian-installer') # all udebs should be here) ++ ++ if section_id != -1: ++ q = projectB.query("""SELECT p.path||'/'||n.file, comma_separated_list(s.section||'/'||b.package) FROM content_associations c JOIN content_file_paths p ON (c.filepath=p.id) JOIN content_file_names n ON (c.filename=n.id) JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE s.id = '%d' AND ba.suite = '%d' AND b.type = 'udeb' GROUP BY (p.path||'/'||n.file)""" % (section_id, suite_id)) ++ ++ f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-udeb-nf.gz" % (s), "w") ++ ++ if header: ++ f.write(header) ++ ++ for contents in q.getresult(): ++ f.write(contents[0] + "\t\t\t" + contents[-1] + "\n") ++ ++ f.close() + +################################################################################ + +def main (): + global Cnf, projectB, out + out = sys.stdout + + Cnf = utils.get_conf() + + Arguments = [('h',"help","Generate-Contents::Options::Help"), + ('s',"suite","Generate-Contents::Options::Suite","HasArg"), + ] ++ + for i in [ "help", "suite" ]: + if not Cnf.has_key("Generate-Contents::Options::%s" % (i)): + Cnf["Generate-Contents::Options::%s" % (i)] = "" + + suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv) + Options = Cnf.SubTree("Generate-Contents::Options") + + if Options["Help"]: + usage() + + if Options["Suite"]: + suites = utils.split_args(Options["Suite"]) + else: + suites = Cnf.SubTree("Suite").List() + + projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) + database.init(Cnf, projectB) + + generate_contents(suites) + +####################################################################################### + +if __name__ == '__main__': + main() diff --combined dak/process_accepted.py index b6f8a311,0d5a5387..4dd5b69d --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@@ -30,7 -30,7 +30,7 @@@ ############################################################################### import errno, fcntl, os, sys, time, re -import apt_pkg +import apt_pkg, tarfile, commands from daklib import database from daklib import logging from daklib import queue @@@ -96,43 -96,6 +96,43 @@@ class Urgency_Log else: os.unlink(self.log_filename) + +############################################################################### + +def generate_contents_information(filename): + # Generate all the contents for the database + cmd = "ar t %s" % (filename) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Ugh ... this is ugly ... Code ripped from process_unchecked.py + chunks = output.split('\n') + cmd = "ar x %s %s" % (filename, chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Got deb tarballs, now lets go through and determine what bits + # and pieces the deb had ... + if chunks[2] == "data.tar.gz": + data = tarfile.open("data.tar.gz", "r:gz") + elif data_tar == "data.tar.bz2": + data = tarfile.open("data.tar.bz2", "r:bz2") + else: + os.remove(chunks[2]) + reject("couldn't find data.tar.*") + + contents = [] + for tarinfo in data: + if not tarinfo.isdir(): + contents.append(tarinfo.name[2:]) + + os.remove(chunks[2]) + return contents + ############################################################################### def reject (str, prefix="Rejected: "): @@@ -391,7 -354,6 +391,7 @@@ def install () source = files[file]["source package"] source_version = files[file]["source version"] filename = files[file]["pool name"] + file + contents = generate_contents_information(file) if not files[file].has_key("location id") or not files[file]["location id"]: files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i()) if not files[file].has_key("files id") or not files[file]["files id"]: @@@ -406,10 -368,6 +406,12 @@@ suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) + # insert contents into the database ++ q = projectB.query("SELECT currval('binaries_id_seq')") ++ bin_id = int(q.getresult()[0][0]) + for file in contents: - projectB.query("INSERT INTO contents (binary_pkg, file) VALUES (currval('binaries_id_seq'), '%s')" % file) ++ database.insert_content_path(bin_id, file) + # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the # "Directory:" field in the Sources.gz file) works. @@@ -472,6 -430,7 +474,6 @@@ utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest) for dest in copy_dot_dak.keys(): utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest) - projectB.query("COMMIT WORK") # Move the .changes into the 'done' directory diff --combined daklib/database.py index 5c7bd838,5c7bd838..b9bb29dd --- a/daklib/database.py +++ b/daklib/database.py @@@ -19,7 -19,7 +19,7 @@@ ################################################################################ --import sys, time, types ++import os, sys, time, types ################################################################################ @@@ -42,6 -42,6 +42,8 @@@ fingerprint_id_cache = { queue_id_cache = {} uid_id_cache = {} suite_version_cache = {} ++content_path_id_cache = {} ++content_file_id_cache = {} ################################################################################ @@@ -247,6 -247,6 +249,26 @@@ def get_suite_version(source, suite) return version ++def get_latest_binary_version_id(binary, suite): ++ global suite_version_cache ++ cache_key = "%s_%s" % (binary, suite) ++ ++ ++ if suite_version_cache.has_key(cache_key): ++ return suite_version_cache[cache_key] ++ ++ #print "SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s AND ba.suite = '%d'" % (binary, int(suite)) ++ q = projectB.query("SELECT b.id, b.version FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) WHERE b.package = '%s AND ba.suite = '%d'" % (binary, int(suite))) ++ ++ highest_bid, highest_version = None, None ++ ++ for bi in q.getresult(): ++ if highest_version == None or apt_pkg.VersionCompare(bi[1], highest_version) == 1: ++ highest_bid = bi[0] ++ highest_version = bi[1] ++ ++ return highest_bid ++ ################################################################################ def get_or_set_maintainer_id (maintainer): @@@ -397,3 -397,3 +419,54 @@@ def get_suites(pkgname, src=False) sql = "select suite_name from binaries, bin_associations,suite where binaries.id=bin_associations.bin and package='%s' and bin_associations.suite = suite.id"%pkgname q = projectB.query(sql) return map(lambda x: x[0], q.getresult()) ++ ++################################################################################ ++ ++def get_or_set_contents_file_id(file): ++ global content_file_id_cache ++ ++ if not content_file_id_cache.has_key(file): ++ sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file ++ q = projectB.query(sql_select) ++ if not q.getresult(): ++ # since this can be called within a transaction, we can't use currval ++ q = projectB.query("SELECT nextval('content_file_names_id_seq')") ++ file_id = int(q.getresult()[0][0]) ++ projectB.query("INSERT INTO content_file_names VALUES ('%d', '%s')" % (file_id, file)) ++ content_file_id_cache[file] = file_id ++ else: ++ content_file_id_cache[file] = int(q.getresult()[0][0]) ++ return content_file_id_cache[file] ++ ++################################################################################ ++ ++def get_or_set_contents_path_id(path): ++ global content_path_id_cache ++ ++ if not content_path_id_cache.has_key(path): ++ sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path ++ q = projectB.query(sql_select) ++ if not q.getresult(): ++ # since this can be called within a transaction, we can't use currval ++ q = projectB.query("SELECT nextval('content_file_names_id_seq')") ++ path_id = int(q.getresult()[0][0]) ++ projectB.query("INSERT INTO content_file_paths VALUES ('%d', '%s')" % ( path_id, path)) ++ content_path_id_cache[path] = path_id ++ else: ++ content_path_id_cache[path] = int(q.getresult()[0][0]) ++ ++ return content_path_id_cache[path] ++ ++################################################################################ ++ ++def insert_content_path(bin_id, fullpath): ++ # split the path into basename, and pathname ++ (path, file) = os.path.split(fullpath) ++ ++ # Get the necessary IDs ... ++ file_id = get_or_set_contents_file_id(file) ++ path_id = get_or_set_contents_path_id(path) ++ ++ # Put them into content_assiocations ++ projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id)) ++ return