#!/usr/bin/env python
# Populate the DB
-# Copyright (C) 2000, 2001, 2002 James Troup <james@nocrew.org>
-# $Id: neve,v 1.12 2002-06-08 00:23:35 troup Exp $
+# Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup <james@nocrew.org>
+# $Id: neve,v 1.20 2004-06-17 14:59:57 troup Exp $
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
###############################################################################
-import commands, os, pg, re, select, string, tempfile, time;
+import commands, os, pg, re, sys, time;
import apt_pkg;
import db_access, utils;
source_cache = {};
arch_all_cache = {};
binary_cache = {};
+location_path_cache = {};
#
files_id_serial = 0;
source_id_serial = 0;
source_cache_for_binaries = {};
reject_message = "";
-###############################################################################
+################################################################################
-# Our very own version of commands.getouputstatus(), hacked to support
-# gpgv's status fd.
-def get_status_output(cmd, status_read, status_write):
- cmd = ['/bin/sh', '-c', cmd];
- p2cread, p2cwrite = os.pipe();
- c2pread, c2pwrite = os.pipe();
- errout, errin = os.pipe();
- pid = os.fork();
- if pid == 0:
- # Child
- os.close(0);
- os.close(1);
- os.dup(p2cread);
- os.dup(c2pwrite);
- os.close(2);
- os.dup(errin);
- for i in range(3, 256):
- if i != status_write:
- try:
- os.close(i);
- except:
- pass;
- try:
- os.execvp(cmd[0], cmd);
- finally:
- os._exit(1);
-
- # parent
- os.close(p2cread)
- os.dup2(c2pread, c2pwrite);
- os.dup2(errout, errin);
-
- output = status = "";
- while 1:
- i, o, e = select.select([c2pwrite, errin, status_read], [], []);
- more_data = [];
- for fd in i:
- r = os.read(fd, 8196);
- if len(r) > 0:
- more_data.append(fd);
- if fd == c2pwrite or fd == errin:
- output = output + r;
- elif fd == status_read:
- status = status + r;
- else:
- utils.fubar("Unexpected file descriptor [%s] returned from select\n" % (fd));
- if not more_data:
- pid, exit_status = os.waitpid(pid, 0)
- try:
- os.close(status_write);
- os.close(status_read);
- os.close(c2pwrite);
- os.close(p2cwrite);
- os.close(errin);
- except:
- pass;
- break;
-
- return output, status, exit_status;
+def usage(exit_code=0):
+ print """Usage: neve
+Initializes a projectB database from an existing archive
-###############################################################################
+ -a, --action actually perform the initalization
+ -h, --help show this help and exit."""
+ sys.exit(exit_code)
-def Dict(**dict): return dict
+###############################################################################
def reject (str, prefix="Rejected: "):
global reject_message;
if str:
- reject_message = reject_message + prefix + str + "\n";
+ reject_message += prefix + str + "\n";
###############################################################################
status_read, status_write = os.pipe();
cmd = "gpgv --status-fd %s --keyring %s --keyring %s %s" \
% (status_write, Cnf["Dinstall::PGPKeyring"], Cnf["Dinstall::GPGKeyring"], filename);
- (output, status, exit_status) = get_status_output(cmd, status_read, status_write);
+ (output, status, exit_status) = utils.gpgv_get_status_output(cmd, status_read, status_write);
# Process the status-fd output
keywords = {};
bad = internal_error = "";
- for line in string.split(status, '\n'):
- line = string.strip(line);
+ for line in status.split('\n'):
+ line = line.strip();
if line == "":
continue;
- split = string.split(line);
+ split = line.split();
if len(split) < 2:
- internal_error = internal_error + "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line);
+ internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line);
continue;
(gnupg, keyword) = split[:2];
if gnupg != "[GNUPG:]":
- internal_error = internal_error + "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg);
+ internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg);
continue;
args = split[2:];
if keywords.has_key(keyword) and keyword != "NODATA" and keyword != "SIGEXPIRED":
- internal_error = internal_error + "found duplicate status token ('%s')." % (keyword);
+ internal_error += "found duplicate status token ('%s').\n" % (keyword);
continue;
else:
keywords[keyword] = args;
# Next check gpgv exited with a zero return code
if exit_status and not keywords.has_key("NO_PUBKEY"):
reject("gpgv failed while checking %s." % (filename));
- if string.strip(status):
+ if status.strip():
reject(utils.prefix_multi_line_string(status, " [GPG status-fd output:] "), "");
else:
reject(utils.prefix_multi_line_string(output, " [GPG output:] "), "");
bad = 1;
# Finally ensure there's not something we don't recognise
- known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
- SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
- NODATA="");
+ known_keywords = utils.Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
+ SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
+ NODATA="");
for keyword in keywords.keys():
if not known_keywords.has_key(keyword):
- reject("found unknown status token '%s' from gpgv with args '%s' in %s." % (keyword, repr(keywords[keyword]), filename));
+ reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], filename));
bad = 1;
if bad:
else:
return fingerprint;
-#########################################################################################
+################################################################################
# Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
def poolify (s, sub):
projectB.query("DELETE FROM suite")
for suite in Cnf.SubTree("Suite").List():
SubSec = Cnf.SubTree("Suite::%s" %(suite))
- projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % string.lower(suite));
+ projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % suite.lower());
for i in ("Version", "Origin", "Description"):
if SubSec.has_key(i):
- projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (string.lower(i), SubSec[i], string.lower(suite)))
+ projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (i.lower(), SubSec[i], suite.lower()))
for architecture in Cnf.ValueList("Suite::%s::Architectures" % (suite)):
architecture_id = db_access.get_architecture_id (architecture);
projectB.query("INSERT INTO suite_architectures (suite, architecture) VALUES (currval('suite_id_seq'), %d)" % (architecture_id));
def update_override_type():
- projectB.query("BEGIN WORK");
projectB.query("DELETE FROM override_type");
for type in Cnf.ValueList("OverrideType"):
projectB.query("INSERT INTO override_type (type) VALUES ('%s')" % (type));
- projectB.query("COMMIT WORK");
def update_priority():
- projectB.query("BEGIN WORK");
projectB.query("DELETE FROM priority");
for priority in Cnf.SubTree("Priority").List():
projectB.query("INSERT INTO priority (priority, level) VALUES ('%s', %s)" % (priority, Cnf["Priority::%s" % (priority)]));
- projectB.query("COMMIT WORK");
-###############################################################################
+def update_section():
+ projectB.query("DELETE FROM section");
+ for component in Cnf.SubTree("Component").List():
+ if Cnf["Natalie::ComponentPosition"] == "prefix":
+ suffix = "";
+ if component != 'main':
+ prefix = component + '/';
+ else:
+ prefix = "";
+ else:
+ prefix = "";
+ component = component.replace("non-US/", "");
+ if component != 'main':
+ suffix = '/' + component;
+ else:
+ suffix = "";
+ for section in Cnf.ValueList("Section"):
+ projectB.query("INSERT INTO section (section) VALUES ('%s%s%s')" % (prefix, section, suffix));
+
+def get_location_path(directory):
+ global location_path_cache;
+
+ if location_path_cache.has_key(directory):
+ return location_path_cache[directory];
+
+ q = projectB.query("SELECT DISTINCT path FROM location WHERE path ~ '%s'" % (directory));
+ try:
+ path = q.getresult()[0][0];
+ except:
+ utils.fubar("[neve] get_location_path(): Couldn't get path for %s" % (directory));
+ location_path_cache[directory] = path;
+ return path;
+
+################################################################################
def get_or_set_files_id (filename, size, md5sum, location_id):
global files_id_cache, files_id_serial, files_query_cache;
- cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
+ cache_key = "~".join((filename, size, md5sum, repr(location_id)));
if not files_id_cache.has_key(cache_key):
- files_id_serial = files_id_serial + 1
- files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
+ files_id_serial += 1
+ files_query_cache.write("%d\t%s\t%s\t%s\t%d\t\\N\n" % (files_id_serial, filename, size, md5sum, location_id));
files_id_cache[cache_key] = files_id_serial
return files_id_cache[cache_key]
###############################################################################
-def process_sources (location, filename, suite, component, archive, dsc_dir):
+def process_sources (filename, suite, component, archive):
global source_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, source_id_serial, src_associations_id_serial, dsc_files_id_serial, source_cache_for_binaries, orig_tar_gz_cache, reject_message;
- suite = string.lower(suite);
+ suite = suite.lower();
suite_id = db_access.get_suite_id(suite);
- if suite == 'stable':
- testing_id = db_access.get_suite_id("testing");
try:
file = utils.open_file (filename);
except utils.cant_open_exc:
- print "WARNING: can't open '%s'" % (filename);
+ utils.warn("can't open '%s'" % (filename));
return;
Scanner = apt_pkg.ParseTagFile(file);
while Scanner.Step() != 0:
package = Scanner.Section["package"];
version = Scanner.Section["version"];
- dsc_file = os.path.join(dsc_dir, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
+ directory = Scanner.Section["directory"];
+ dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
+ # Sometimes the Directory path is a lie; check in the pool
+ if not os.path.exists(dsc_file):
+ if directory.split('/')[0] == "dists":
+ directory = Cnf["Dir::PoolRoot"] + utils.poolify(package, component);
+ dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
+ if not os.path.exists(dsc_file):
+ utils.fubar("%s not found." % (dsc_file));
install_date = time.strftime("%Y-%m-%d", time.localtime(os.path.getmtime(dsc_file)));
fingerprint = check_signature(dsc_file);
fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint);
if reject_message:
utils.fubar("%s: %s" % (dsc_file, reject_message));
maintainer = Scanner.Section["maintainer"]
- maintainer = string.replace(maintainer, "'", "\\'");
+ maintainer = maintainer.replace("'", "\\'");
maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
- directory = Scanner.Section["directory"];
+ location = get_location_path(directory.split('/')[0]);
location_id = db_access.get_location_id (location, component, archive);
- if directory[-1:] != "/":
- directory = directory + '/';
+ if not directory.endswith("/"):
+ directory += '/';
directory = poolify (directory, location);
- if directory != "" and directory[-1:] != "/":
- directory = directory + '/';
+ if directory != "" and not directory.endswith("/"):
+ directory += '/';
no_epoch_version = utils.re_no_epoch.sub('', version);
# Add all files referenced by the .dsc to the files table
ids = [];
- for line in string.split(Scanner.Section["files"],'\n'):
+ for line in Scanner.Section["files"].split('\n'):
id = None;
- (md5sum, size, filename) = string.split(string.strip(line));
+ (md5sum, size, filename) = line.strip().split();
# Don't duplicate .orig.tar.gz's
- if filename[-12:] == ".orig.tar.gz":
+ if filename.endswith(".orig.tar.gz"):
cache_key = "%s~%s~%s" % (filename, size, md5sum);
if orig_tar_gz_cache.has_key(cache_key):
id = orig_tar_gz_cache[cache_key];
id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
ids.append(id);
# If this is the .dsc itself; save the ID for later.
- if filename[-4:] == ".dsc":
+ if filename.endswith(".dsc"):
files_id = id;
filename = directory + package + '_' + no_epoch_version + '.dsc'
cache_key = "%s~%s" % (package, version);
if not source_cache.has_key(cache_key):
nasty_key = "%s~%s" % (package, version)
- source_id_serial = source_id_serial + 1;
+ source_id_serial += 1;
if not source_cache_for_binaries.has_key(nasty_key):
source_cache_for_binaries[nasty_key] = source_id_serial;
tmp_source_id = source_id_serial;
source_cache[cache_key] = source_id_serial;
source_query_cache.write("%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (source_id_serial, package, version, maintainer_id, files_id, install_date, fingerprint_id))
for id in ids:
- dsc_files_id_serial = dsc_files_id_serial + 1;
+ dsc_files_id_serial += 1;
dsc_files_query_cache.write("%d\t%d\t%d\n" % (dsc_files_id_serial, tmp_source_id,id));
else:
tmp_source_id = source_cache[cache_key];
- src_associations_id_serial = src_associations_id_serial + 1;
+ src_associations_id_serial += 1;
src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, suite_id, tmp_source_id))
- # populate 'testing' with a mirror of 'stable'
- if suite == "stable":
- src_associations_id_serial = src_associations_id_serial + 1;
- src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, testing_id, tmp_source_id))
file.close();
###############################################################################
-def process_packages (location, filename, suite, component, archive):
+def process_packages (filename, suite, component, archive):
global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache, reject_message;
count_total = 0;
count_bad = 0;
- suite = string.lower(suite);
+ suite = suite.lower();
suite_id = db_access.get_suite_id(suite);
- if suite == "stable":
- testing_id = db_access.get_suite_id("testing");
try:
file = utils.open_file (filename);
except utils.cant_open_exc:
- print "WARNING: can't open '%s'" % (filename);
+ utils.warn("can't open '%s'" % (filename));
return;
Scanner = apt_pkg.ParseTagFile(file);
while Scanner.Step() != 0:
package = Scanner.Section["package"]
version = Scanner.Section["version"]
maintainer = Scanner.Section["maintainer"]
- maintainer = string.replace(maintainer, "'", "\\'")
+ maintainer = maintainer.replace("'", "\\'")
maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
architecture = Scanner.Section["architecture"]
architecture_id = db_access.get_architecture_id (architecture);
else:
source = Scanner.Section["source"]
source_version = ""
- if string.find(source, "(") != -1:
+ if source.find("(") != -1:
m = utils.re_extract_src_version.match(source)
source = m.group(1)
source_version = m.group(2)
if not source_version:
source_version = version
filename = Scanner.Section["filename"]
+ location = get_location_path(filename.split('/')[0]);
location_id = db_access.get_location_id (location, component, archive)
filename = poolify (filename, location)
if architecture == "all":
md5sum = Scanner.Section["md5sum"];
files_id = get_or_set_files_id (filename, size, md5sum, location_id);
type = "deb"; # FIXME
- cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
+ cache_key = "%s~%s~%s~%d~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id, suite_id);
if not arch_all_cache.has_key(cache_key):
arch_all_cache[cache_key] = 1;
cache_key = "%s~%s~%s~%d" % (package, version, repr(source_id), architecture_id);
if not binary_cache.has_key(cache_key):
if not source_id:
source_id = "\N";
- count_bad = count_bad + 1;
+ count_bad += 1;
else:
source_id = repr(source_id);
- binaries_id_serial = binaries_id_serial + 1;
+ binaries_id_serial += 1;
binaries_query_cache.write("%d\t%s\t%s\t%d\t%s\t%d\t%d\t%s\t%s\n" % (binaries_id_serial, package, version, maintainer_id, source_id, architecture_id, files_id, type, fingerprint_id));
binary_cache[cache_key] = binaries_id_serial;
tmp_binaries_id = binaries_id_serial;
else:
tmp_binaries_id = binary_cache[cache_key];
- bin_associations_id_serial = bin_associations_id_serial + 1;
+ bin_associations_id_serial += 1;
bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, suite_id, tmp_binaries_id));
- if suite == "stable":
- bin_associations_id_serial = bin_associations_id_serial + 1;
- bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, testing_id, tmp_binaries_id));
- count_total = count_total +1;
+ count_total += 1;
file.close();
if count_bad != 0:
###############################################################################
-def do_sources(location, prefix, suite, component, server):
- temp_filename = tempfile.mktemp();
- fd = os.open(temp_filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700);
- os.close(fd);
- sources = location + prefix + 'Sources.gz';
+def do_sources(sources, suite, component, server):
+ temp_filename = utils.temp_filename();
(result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (sources, temp_filename));
if (result != 0):
utils.fubar("Gunzip invocation failed!\n%s" % (output), result);
print 'Processing '+sources+'...';
- process_sources (location, temp_filename, suite, component, server, os.path.dirname(sources));
+ process_sources (temp_filename, suite, component, server);
os.unlink(temp_filename);
###############################################################################
-def main ():
+def do_da_do_da ():
global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
Cnf = utils.get_conf();
+ Arguments = [('a', "action", "Neve::Options::Action"),
+ ('h', "help", "Neve::Options::Help")];
+ for i in [ "action", "help" ]:
+ if not Cnf.has_key("Neve::Options::%s" % (i)):
+ Cnf["Neve::Options::%s" % (i)] = "";
+
+ apt_pkg.ParseCommandLine(Cnf, Arguments, sys.argv);
+
+ Options = Cnf.SubTree("Neve::Options")
+ if Options["Help"]:
+ usage();
+
+ if not Options["Action"]:
+ utils.warn("""no -a/--action given; not doing anything.
+Please read the documentation before running this script.
+""");
+ usage(1);
print "Re-Creating DB..."
(result, output) = commands.getstatusoutput("psql -f init_pool.sql template1");
update_suites();
update_override_type();
update_priority();
+ update_section();
projectB.query("COMMIT WORK");
files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
server = SubSec["Archive"];
type = Cnf.Find("Location::%s::Type" % (location));
if type == "legacy-mixed":
- prefix = ''
+ sources = location + 'Sources.gz';
suite = Cnf.Find("Location::%s::Suite" % (location));
- do_sources(location, prefix, suite, "", server);
- elif type == "legacy":
+ do_sources(sources, suite, "", server);
+ elif type == "legacy" or type == "pool":
for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
for component in Cnf.SubTree("Component").List():
- prefix = Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/source/'
- do_sources(location, prefix, suite, component, server);
- elif type == "pool":
- continue;
-# for component in Cnf.SubTree("Component").List():
-# prefix = component + '/'
-# do_sources(location, prefix);
+ sources = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/source/' + 'Sources.gz';
+ do_sources(sources, suite, component, server);
else:
utils.fubar("Unknown location type ('%s')." % (type));
packages = location + 'Packages';
suite = Cnf.Find("Location::%s::Suite" % (location));
print 'Processing '+location+'...';
- process_packages (location, packages, suite, "", server);
- elif type == "legacy":
+ process_packages (packages, suite, "", server);
+ elif type == "legacy" or type == "pool":
for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
for component in Cnf.SubTree("Component").List():
architectures = filter(utils.real_arch,
Cnf.ValueList("Suite::%s::Architectures" % (suite)));
for architecture in architectures:
- packages = location + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/binary-' + architecture + '/Packages'
+ packages = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/binary-' + architecture + '/Packages'
print 'Processing '+packages+'...';
- process_packages (location, packages, suite, component, server);
- elif type == "pool":
- continue;
+ process_packages (packages, suite, component, server);
files_query_cache.close();
source_query_cache.close();
return;
+################################################################################
+
+def main():
+ utils.try_with_debug(do_da_do_da);
+
+################################################################################
+
if __name__ == '__main__':
main();