From: Michael Casadevall Date: Tue, 30 Dec 2008 15:00:33 +0000 (-0500) Subject: Start of contents scanner X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=5e1dd4d24cf24e29213ae7d461f3bc8f4f040fb4;p=dak.git Start of contents scanner Signed-off-by: Michael Casadevall --- diff --git a/dak/.generate_contents.py.swp b/dak/.generate_contents.py.swp new file mode 100644 index 00000000..d4e83290 Binary files /dev/null and b/dak/.generate_contents.py.swp differ diff --git a/dak/dak.py b/dak/dak.py index 9dfd026b..77999abe 100755 --- a/dak/dak.py +++ b/dak/dak.py @@ -105,6 +105,8 @@ def init(): "Generate lists of packages per suite for apt-ftparchive"), ("generate-releases", "Generate Release files"), + ("generate-contents", + "Generate contest files"), ("generate-index-diffs", "Generate .diff/Index files"), ("clean-suites", diff --git a/dak/generate_contents.py b/dak/generate_contents.py new file mode 100755 index 00000000..9696bb86 --- /dev/null +++ b/dak/generate_contents.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# Create all the contents files + +# Copyright (C) 2008, 2009 Michael Casadevall + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ +# there is the idea to slowly replace contents files +# with a new generation of such files. +# having more info. +# of course that wont help for now where we need to generate them :) +################################################################################ + +################################################################################ + +import sys, os, popen2, tempfile, stat, time, pg +import apt_pkg +from daklib import database, utils +from daklib.dak_exceptions import * + +################################################################################ + +Cnf = None +projectB = None +out = None +AptCnf = None + +################################################################################ + +def usage (exit_code=0): + print """Usage: dak generate-contents +Generate Contents files + + -h, --help show this help and exit + -s, --suite=SUITE only write file lists for this suite +""" + sys.exit(exit_code) + +################################################################################ + +def handle_dup_files(file_list): + # Sort the list, and then handle finding dups in the filenames key + + # Walk the list, seeing if the current entry and the next one are the same + # and if so, join them together + + + return file_list + +################################################################################ + +def generate_contents(suites): + global projectB, Cnf + # Ok, the contents information is in the database + + # We need to work and get the contents, and print it out on a per + # architectual basis + + # Get our suites, and the architectures + for s in suites: + suite_id = database.get_suite_id(s) + + q = projectB.query("SELECT architecture FROM suite_architectures WHERE suite = '%d'" % suite_id) + + arch_list = [ ] + for r in q.getresult(): + arch_list.append(r[0]) + + arch_all_id = database.get_architecture_id("all") + + # Got the arch all packages, now we need to get the arch dependent packages + # attach the arch all, stick them together, and write out the result + + for arch_id in arch_list: + print "SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id) + q = projectB.query("SELECT b.package, c.file, s.section FROM contents c JOIN binaries b ON (b.id=c.binary_pkg) JOIN bin_associations ba ON (b.id=ba.bin) JOIN override o ON (o.package=b.package) JOIN section s ON (s.id=o.section) WHERE (b.architecture = '%d' OR b.architecture = '%d') AND ba.suite = '%d'" % (arch_id, arch_all_id, suite_id)) + # We need to copy the arch_all packages table into arch packages + + # This is for the corner case of arch dependent packages colliding + # with arch all packages only on some architectures. + # Ugly, I know ... + + arch_packages = [] + for r in q.getresult(): + arch_packages.append((r[1], (r[2] + '/' + r[0]))) + + arch_packages = handle_dup_files(arch_packages) + + #for contents in arch_packages: + #print contents[0] + '\t\t\t\t' + contents[1] + +################################################################################ + +def main (): + global Cnf, projectB, out + out = sys.stdout + + Cnf = utils.get_conf() + + Arguments = [('h',"help","Generate-Contents::Options::Help"), + ('s',"suite","Generate-Contents::Options::Suite","HasArg"), + ] + for i in [ "help", "suite" ]: + if not Cnf.has_key("Generate-Contents::Options::%s" % (i)): + Cnf["Generate-Contents::Options::%s" % (i)] = "" + + suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv) + Options = Cnf.SubTree("Generate-Contents::Options") + + if Options["Help"]: + usage() + + if Options["Suite"]: + suites = utils.split_args(Options["Suite"]) + else: + suites = Cnf.SubTree("Suite").List() + + projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) + database.init(Cnf, projectB) + + generate_contents(suites) + +####################################################################################### + +if __name__ == '__main__': + main() diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 0d5a5387..b6f8a311 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -30,7 +30,7 @@ ############################################################################### import errno, fcntl, os, sys, time, re -import apt_pkg +import apt_pkg, tarfile, commands from daklib import database from daklib import logging from daklib import queue @@ -96,6 +96,43 @@ class Urgency_Log: else: os.unlink(self.log_filename) + +############################################################################### + +def generate_contents_information(filename): + # Generate all the contents for the database + cmd = "ar t %s" % (filename) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Ugh ... this is ugly ... Code ripped from process_unchecked.py + chunks = output.split('\n') + cmd = "ar x %s %s" % (filename, chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Got deb tarballs, now lets go through and determine what bits + # and pieces the deb had ... + if chunks[2] == "data.tar.gz": + data = tarfile.open("data.tar.gz", "r:gz") + elif data_tar == "data.tar.bz2": + data = tarfile.open("data.tar.bz2", "r:bz2") + else: + os.remove(chunks[2]) + reject("couldn't find data.tar.*") + + contents = [] + for tarinfo in data: + if not tarinfo.isdir(): + contents.append(tarinfo.name[2:]) + + os.remove(chunks[2]) + return contents + ############################################################################### def reject (str, prefix="Rejected: "): @@ -354,6 +391,7 @@ def install (): source = files[file]["source package"] source_version = files[file]["source version"] filename = files[file]["pool name"] + file + contents = generate_contents_information(file) if not files[file].has_key("location id") or not files[file]["location id"]: files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i()) if not files[file].has_key("files id") or not files[file]["files id"]: @@ -368,6 +406,10 @@ def install (): suite_id = database.get_suite_id(suite) projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id)) + # insert contents into the database + for file in contents: + projectB.query("INSERT INTO contents (binary_pkg, file) VALUES (currval('binaries_id_seq'), '%s')" % file) + # If the .orig.tar.gz is in a legacy directory we need to poolify # it, so that apt-get source (and anything else that goes by the # "Directory:" field in the Sources.gz file) works. @@ -430,7 +472,6 @@ def install (): utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest) for dest in copy_dot_dak.keys(): utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest) - projectB.query("COMMIT WORK") # Move the .changes into the 'done' directory