X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=neve;h=a23abb706f51803a34e4967c24d1c1b086825e08;hb=9540d873fa78598454af57f5f8a4875969ed0439;hp=21f7413150405386b6583c8c6b4fca62902611c4;hpb=b34c941cf7245905b1c49abcfb0dfcdf13506e90;p=dak.git diff --git a/neve b/neve index 21f74131..a23abb70 100755 --- a/neve +++ b/neve @@ -1,8 +1,8 @@ #!/usr/bin/env python # Populate the DB -# Copyright (C) 2000, 2001, 2002 James Troup -# $Id: neve,v 1.11 2002-05-18 23:55:15 troup Exp $ +# Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup +# $Id: neve,v 1.20 2004-06-17 14:59:57 troup Exp $ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -37,7 +37,7 @@ ############################################################################### -import commands, os, pg, re, select, string, tempfile, time; +import commands, os, pg, re, sys, time; import apt_pkg; import db_access, utils; @@ -53,6 +53,7 @@ files_id_cache = {}; source_cache = {}; arch_all_cache = {}; binary_cache = {}; +location_path_cache = {}; # files_id_serial = 0; source_id_serial = 0; @@ -72,76 +73,22 @@ bin_associations_query_cache = None; source_cache_for_binaries = {}; reject_message = ""; -############################################################################### +################################################################################ -# Our very own version of commands.getouputstatus(), hacked to support -# gpgv's status fd. -def get_status_output(cmd, status_read, status_write): - cmd = ['/bin/sh', '-c', cmd]; - p2cread, p2cwrite = os.pipe(); - c2pread, c2pwrite = os.pipe(); - errout, errin = os.pipe(); - pid = os.fork(); - if pid == 0: - # Child - os.close(0); - os.close(1); - os.dup(p2cread); - os.dup(c2pwrite); - os.close(2); - os.dup(errin); - for i in range(3, 256): - if i != status_write: - try: - os.close(i); - except: - pass; - try: - os.execvp(cmd[0], cmd); - finally: - os._exit(1); - - # parent - os.close(p2cread) - os.dup2(c2pread, c2pwrite); - os.dup2(errout, errin); - - output = status = ""; - while 1: - i, o, e = select.select([c2pwrite, errin, status_read], [], []); - more_data = []; - for fd in i: - r = os.read(fd, 8196); - if len(r) > 0: - more_data.append(fd); - if fd == c2pwrite or fd == errin: - output = output + r; - elif fd == status_read: - status = status + r; - else: - utils.fubar("Unexpected file descriptor [%s] returned from select\n" % (fd)); - if not more_data: - pid, exit_status = os.waitpid(pid, 0) - try: - os.close(status_write); - os.close(status_read); - os.close(c2pwrite); - os.close(p2cwrite); - os.close(errin); - except: - pass; - break; - - return output, status, exit_status; +def usage(exit_code=0): + print """Usage: neve +Initializes a projectB database from an existing archive -############################################################################### + -a, --action actually perform the initalization + -h, --help show this help and exit.""" + sys.exit(exit_code) -def Dict(**dict): return dict +############################################################################### def reject (str, prefix="Rejected: "): global reject_message; if str: - reject_message = reject_message + prefix + str + "\n"; + reject_message += prefix + str + "\n"; ############################################################################### @@ -153,26 +100,26 @@ def check_signature (filename): status_read, status_write = os.pipe(); cmd = "gpgv --status-fd %s --keyring %s --keyring %s %s" \ % (status_write, Cnf["Dinstall::PGPKeyring"], Cnf["Dinstall::GPGKeyring"], filename); - (output, status, exit_status) = get_status_output(cmd, status_read, status_write); + (output, status, exit_status) = utils.gpgv_get_status_output(cmd, status_read, status_write); # Process the status-fd output keywords = {}; bad = internal_error = ""; - for line in string.split(status, '\n'): - line = string.strip(line); + for line in status.split('\n'): + line = line.strip(); if line == "": continue; - split = string.split(line); + split = line.split(); if len(split) < 2: - internal_error = internal_error + "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line); + internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line); continue; (gnupg, keyword) = split[:2]; if gnupg != "[GNUPG:]": - internal_error = internal_error + "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg); + internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg); continue; args = split[2:]; - if keywords.has_key(keyword) and keyword != "NODATA": - internal_error = internal_error + "found duplicate status token ('%s')." % (keyword); + if keywords.has_key(keyword) and keyword != "NODATA" and keyword != "SIGEXPIRED": + internal_error += "found duplicate status token ('%s').\n" % (keyword); continue; else: keywords[keyword] = args; @@ -186,8 +133,7 @@ def check_signature (filename): # Now check for obviously bad things in the processed output if keywords.has_key("SIGEXPIRED"): - reject("key used to sign %s has expired." % (filename)); - bad = 1; + utils.warn("%s: signing key has expired." % (filename)); if keywords.has_key("KEYREVOKED"): reject("key used to sign %s has been revoked." % (filename)); bad = 1; @@ -219,7 +165,7 @@ def check_signature (filename): # Next check gpgv exited with a zero return code if exit_status and not keywords.has_key("NO_PUBKEY"): reject("gpgv failed while checking %s." % (filename)); - if string.strip(status): + if status.strip(): reject(utils.prefix_multi_line_string(status, " [GPG status-fd output:] "), ""); else: reject(utils.prefix_multi_line_string(output, " [GPG output:] "), ""); @@ -245,13 +191,13 @@ def check_signature (filename): bad = 1; # Finally ensure there's not something we don't recognise - known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="", - SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="", - NODATA=""); + known_keywords = utils.Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="", + SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="", + NODATA=""); for keyword in keywords.keys(): if not known_keywords.has_key(keyword): - reject("found unknown status token '%s' from gpgv with args '%s' in %s." % (keyword, repr(keywords[keyword]), filename)); + reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], filename)); bad = 1; if bad: @@ -259,7 +205,7 @@ def check_signature (filename): else: return fingerprint; -######################################################################################### +################################################################################ # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it. def poolify (s, sub): @@ -305,69 +251,118 @@ def update_suites (): projectB.query("DELETE FROM suite") for suite in Cnf.SubTree("Suite").List(): SubSec = Cnf.SubTree("Suite::%s" %(suite)) - projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % string.lower(suite)); + projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % suite.lower()); for i in ("Version", "Origin", "Description"): if SubSec.has_key(i): - projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (string.lower(i), SubSec[i], string.lower(suite))) + projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (i.lower(), SubSec[i], suite.lower())) for architecture in Cnf.ValueList("Suite::%s::Architectures" % (suite)): architecture_id = db_access.get_architecture_id (architecture); projectB.query("INSERT INTO suite_architectures (suite, architecture) VALUES (currval('suite_id_seq'), %d)" % (architecture_id)); -############################################################################### +def update_override_type(): + projectB.query("DELETE FROM override_type"); + for type in Cnf.ValueList("OverrideType"): + projectB.query("INSERT INTO override_type (type) VALUES ('%s')" % (type)); + +def update_priority(): + projectB.query("DELETE FROM priority"); + for priority in Cnf.SubTree("Priority").List(): + projectB.query("INSERT INTO priority (priority, level) VALUES ('%s', %s)" % (priority, Cnf["Priority::%s" % (priority)])); + +def update_section(): + projectB.query("DELETE FROM section"); + for component in Cnf.SubTree("Component").List(): + if Cnf["Natalie::ComponentPosition"] == "prefix": + suffix = ""; + if component != 'main': + prefix = component + '/'; + else: + prefix = ""; + else: + prefix = ""; + component = component.replace("non-US/", ""); + if component != 'main': + suffix = '/' + component; + else: + suffix = ""; + for section in Cnf.ValueList("Section"): + projectB.query("INSERT INTO section (section) VALUES ('%s%s%s')" % (prefix, section, suffix)); + +def get_location_path(directory): + global location_path_cache; + + if location_path_cache.has_key(directory): + return location_path_cache[directory]; + + q = projectB.query("SELECT DISTINCT path FROM location WHERE path ~ '%s'" % (directory)); + try: + path = q.getresult()[0][0]; + except: + utils.fubar("[neve] get_location_path(): Couldn't get path for %s" % (directory)); + location_path_cache[directory] = path; + return path; + +################################################################################ def get_or_set_files_id (filename, size, md5sum, location_id): global files_id_cache, files_id_serial, files_query_cache; - cache_key = string.join((filename, size, md5sum, repr(location_id)), '~') + cache_key = "~".join((filename, size, md5sum, repr(location_id))); if not files_id_cache.has_key(cache_key): - files_id_serial = files_id_serial + 1 - files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id)); + files_id_serial += 1 + files_query_cache.write("%d\t%s\t%s\t%s\t%d\t\\N\n" % (files_id_serial, filename, size, md5sum, location_id)); files_id_cache[cache_key] = files_id_serial return files_id_cache[cache_key] ############################################################################### -def process_sources (location, filename, suite, component, archive, dsc_dir): +def process_sources (filename, suite, component, archive): global source_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, source_id_serial, src_associations_id_serial, dsc_files_id_serial, source_cache_for_binaries, orig_tar_gz_cache, reject_message; - suite = string.lower(suite); + suite = suite.lower(); suite_id = db_access.get_suite_id(suite); - if suite == 'stable': - testing_id = db_access.get_suite_id("testing"); try: file = utils.open_file (filename); except utils.cant_open_exc: - print "WARNING: can't open '%s'" % (filename); + utils.warn("can't open '%s'" % (filename)); return; Scanner = apt_pkg.ParseTagFile(file); while Scanner.Step() != 0: package = Scanner.Section["package"]; version = Scanner.Section["version"]; - dsc_file = os.path.join(dsc_dir, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version))); + directory = Scanner.Section["directory"]; + dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version))); + # Sometimes the Directory path is a lie; check in the pool + if not os.path.exists(dsc_file): + if directory.split('/')[0] == "dists": + directory = Cnf["Dir::PoolRoot"] + utils.poolify(package, component); + dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version))); + if not os.path.exists(dsc_file): + utils.fubar("%s not found." % (dsc_file)); install_date = time.strftime("%Y-%m-%d", time.localtime(os.path.getmtime(dsc_file))); fingerprint = check_signature(dsc_file); fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint); if reject_message: utils.fubar("%s: %s" % (dsc_file, reject_message)); maintainer = Scanner.Section["maintainer"] - maintainer = string.replace(maintainer, "'", "\\'"); + maintainer = maintainer.replace("'", "\\'"); maintainer_id = db_access.get_or_set_maintainer_id(maintainer); - directory = Scanner.Section["directory"]; + location = get_location_path(directory.split('/')[0]); location_id = db_access.get_location_id (location, component, archive); - if directory[-1:] != "/": - directory = directory + '/'; + if not directory.endswith("/"): + directory += '/'; directory = poolify (directory, location); - if directory != "" and directory[-1:] != "/": - directory = directory + '/'; + if directory != "" and not directory.endswith("/"): + directory += '/'; no_epoch_version = utils.re_no_epoch.sub('', version); # Add all files referenced by the .dsc to the files table ids = []; - for line in string.split(Scanner.Section["files"],'\n'): + for line in Scanner.Section["files"].split('\n'): id = None; - (md5sum, size, filename) = string.split(string.strip(line)); + (md5sum, size, filename) = line.strip().split(); # Don't duplicate .orig.tar.gz's - if filename[-12:] == ".orig.tar.gz": + if filename.endswith(".orig.tar.gz"): cache_key = "%s~%s~%s" % (filename, size, md5sum); if orig_tar_gz_cache.has_key(cache_key): id = orig_tar_gz_cache[cache_key]; @@ -378,55 +373,49 @@ def process_sources (location, filename, suite, component, archive, dsc_dir): id = get_or_set_files_id (directory + filename, size, md5sum, location_id); ids.append(id); # If this is the .dsc itself; save the ID for later. - if filename[-4:] == ".dsc": + if filename.endswith(".dsc"): files_id = id; filename = directory + package + '_' + no_epoch_version + '.dsc' cache_key = "%s~%s" % (package, version); if not source_cache.has_key(cache_key): nasty_key = "%s~%s" % (package, version) - source_id_serial = source_id_serial + 1; + source_id_serial += 1; if not source_cache_for_binaries.has_key(nasty_key): source_cache_for_binaries[nasty_key] = source_id_serial; tmp_source_id = source_id_serial; source_cache[cache_key] = source_id_serial; source_query_cache.write("%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (source_id_serial, package, version, maintainer_id, files_id, install_date, fingerprint_id)) for id in ids: - dsc_files_id_serial = dsc_files_id_serial + 1; + dsc_files_id_serial += 1; dsc_files_query_cache.write("%d\t%d\t%d\n" % (dsc_files_id_serial, tmp_source_id,id)); else: tmp_source_id = source_cache[cache_key]; - src_associations_id_serial = src_associations_id_serial + 1; + src_associations_id_serial += 1; src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, suite_id, tmp_source_id)) - # populate 'testing' with a mirror of 'stable' - if suite == "stable": - src_associations_id_serial = src_associations_id_serial + 1; - src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, testing_id, tmp_source_id)) file.close(); ############################################################################### -def process_packages (location, filename, suite, component, archive): +def process_packages (filename, suite, component, archive): global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache, reject_message; count_total = 0; count_bad = 0; - suite = string.lower(suite); + suite = suite.lower(); suite_id = db_access.get_suite_id(suite); - if suite == "stable": - testing_id = db_access.get_suite_id("testing"); try: file = utils.open_file (filename); except utils.cant_open_exc: - print "WARNING: can't open '%s'" % (filename); + utils.warn("can't open '%s'" % (filename)); return; Scanner = apt_pkg.ParseTagFile(file); while Scanner.Step() != 0: package = Scanner.Section["package"] version = Scanner.Section["version"] maintainer = Scanner.Section["maintainer"] - maintainer = string.replace(maintainer, "'", "\\'") + maintainer = maintainer.replace("'", "\\'") maintainer_id = db_access.get_or_set_maintainer_id(maintainer); architecture = Scanner.Section["architecture"] architecture_id = db_access.get_architecture_id (architecture); @@ -437,13 +426,14 @@ def process_packages (location, filename, suite, component, archive): else: source = Scanner.Section["source"] source_version = "" - if string.find(source, "(") != -1: + if source.find("(") != -1: m = utils.re_extract_src_version.match(source) source = m.group(1) source_version = m.group(2) if not source_version: source_version = version filename = Scanner.Section["filename"] + location = get_location_path(filename.split('/')[0]); location_id = db_access.get_location_id (location, component, archive) filename = poolify (filename, location) if architecture == "all": @@ -454,29 +444,26 @@ def process_packages (location, filename, suite, component, archive): md5sum = Scanner.Section["md5sum"]; files_id = get_or_set_files_id (filename, size, md5sum, location_id); type = "deb"; # FIXME - cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id); + cache_key = "%s~%s~%s~%d~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id, suite_id); if not arch_all_cache.has_key(cache_key): arch_all_cache[cache_key] = 1; cache_key = "%s~%s~%s~%d" % (package, version, repr(source_id), architecture_id); if not binary_cache.has_key(cache_key): if not source_id: source_id = "\N"; - count_bad = count_bad + 1; + count_bad += 1; else: source_id = repr(source_id); - binaries_id_serial = binaries_id_serial + 1; + binaries_id_serial += 1; binaries_query_cache.write("%d\t%s\t%s\t%d\t%s\t%d\t%d\t%s\t%s\n" % (binaries_id_serial, package, version, maintainer_id, source_id, architecture_id, files_id, type, fingerprint_id)); binary_cache[cache_key] = binaries_id_serial; tmp_binaries_id = binaries_id_serial; else: tmp_binaries_id = binary_cache[cache_key]; - bin_associations_id_serial = bin_associations_id_serial + 1; + bin_associations_id_serial += 1; bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, suite_id, tmp_binaries_id)); - if suite == "stable": - bin_associations_id_serial = bin_associations_id_serial + 1; - bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, testing_id, tmp_binaries_id)); - count_total = count_total +1; + count_total += 1; file.close(); if count_bad != 0: @@ -486,24 +473,38 @@ def process_packages (location, filename, suite, component, archive): ############################################################################### -def do_sources(location, prefix, suite, component, server): - temp_filename = tempfile.mktemp(); - fd = os.open(temp_filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700); - os.close(fd); - sources = location + prefix + 'Sources.gz'; +def do_sources(sources, suite, component, server): + temp_filename = utils.temp_filename(); (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (sources, temp_filename)); if (result != 0): utils.fubar("Gunzip invocation failed!\n%s" % (output), result); print 'Processing '+sources+'...'; - process_sources (location, temp_filename, suite, component, server, os.path.dirname(sources)); + process_sources (temp_filename, suite, component, server); os.unlink(temp_filename); ############################################################################### -def main (): +def do_da_do_da (): global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache; Cnf = utils.get_conf(); + Arguments = [('a', "action", "Neve::Options::Action"), + ('h', "help", "Neve::Options::Help")]; + for i in [ "action", "help" ]: + if not Cnf.has_key("Neve::Options::%s" % (i)): + Cnf["Neve::Options::%s" % (i)] = ""; + + apt_pkg.ParseCommandLine(Cnf, Arguments, sys.argv); + + Options = Cnf.SubTree("Neve::Options") + if Options["Help"]: + usage(); + + if not Options["Action"]: + utils.warn("""no -a/--action given; not doing anything. +Please read the documentation before running this script. +"""); + usage(1); print "Re-Creating DB..." (result, output) = commands.getstatusoutput("psql -f init_pool.sql template1"); @@ -522,6 +523,9 @@ def main (): update_archives(); update_locations(); update_suites(); + update_override_type(); + update_priority(); + update_section(); projectB.query("COMMIT WORK"); files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w"); @@ -538,19 +542,14 @@ def main (): server = SubSec["Archive"]; type = Cnf.Find("Location::%s::Type" % (location)); if type == "legacy-mixed": - prefix = '' + sources = location + 'Sources.gz'; suite = Cnf.Find("Location::%s::Suite" % (location)); - do_sources(location, prefix, suite, "", server); - elif type == "legacy": - for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List(): + do_sources(sources, suite, "", server); + elif type == "legacy" or type == "pool": + for suite in Cnf.ValueList("Location::%s::Suites" % (location)): for component in Cnf.SubTree("Component").List(): - prefix = Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/source/' - do_sources(location, prefix, suite, component, server); - elif type == "pool": - continue; -# for component in Cnf.SubTree("Component").List(): -# prefix = component + '/' -# do_sources(location, prefix); + sources = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/source/' + 'Sources.gz'; + do_sources(sources, suite, component, server); else: utils.fubar("Unknown location type ('%s')." % (type)); @@ -564,18 +563,16 @@ def main (): packages = location + 'Packages'; suite = Cnf.Find("Location::%s::Suite" % (location)); print 'Processing '+location+'...'; - process_packages (location, packages, suite, "", server); - elif type == "legacy": + process_packages (packages, suite, "", server); + elif type == "legacy" or type == "pool": for suite in Cnf.ValueList("Location::%s::Suites" % (location)): for component in Cnf.SubTree("Component").List(): - for architecture in Cnf.ValueList("Suite::%s::Architectures" % (suite)): - if architecture == "source" or architecture == "all": - continue; - packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages' + architectures = filter(utils.real_arch, + Cnf.ValueList("Suite::%s::Architectures" % (suite))); + for architecture in architectures: + packages = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/binary-' + architecture + '/Packages' print 'Processing '+packages+'...'; - process_packages (location, packages, suite, component, server); - elif type == "pool": - continue; + process_packages (packages, suite, component, server); files_query_cache.close(); source_query_cache.close(); @@ -609,5 +606,12 @@ def main (): return; +################################################################################ + +def main(): + utils.try_with_debug(do_da_do_da); + +################################################################################ + if __name__ == '__main__': main();