+#!/usr/bin/env python
+
+# Populate the DB
+# Copyright (C) 2000 James Troup <james@nocrew.org>
+# $Id: fix.1,v 1.1 2000-11-24 00:20:11 troup Exp $
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+# 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
+
+################################################################################
+
+# This code is a horrible mess for two reasons:
+
+# (o) For Debian's usage, it's doing something like 160k INSERTs,
+# even on auric, that makes the program unusable unless we get
+# involed in sorts of silly optimization games (local dicts to avoid
+# redundant SELECTS, using COPY FROM rather than INSERTS etc.)
+
+# (o) It's very site specific, because I don't expect to use this
+# script again in a hurry, and I don't want to spend any more time
+# on it than absolutely necessary.
+
+###############################################################################################################
+
+import commands, os, pg, re, sys, string, tempfile
+import apt_pkg
+import db_access, utils
+
+###############################################################################################################
+
+re_arch_from_filename = re.compile(r"binary-[^/]+")
+
+###############################################################################################################
+
+Cnf = None;
+projectB = None;
+files_id_cache = {};
+source_cache = {};
+arch_all_cache = {};
+binary_cache = {};
+#
+files_id_serial = 0;
+source_id_serial = 0;
+src_associations_id_serial = 0;
+dsc_files_id_serial = 0;
+files_query_cache = None;
+source_query_cache = None;
+src_associations_query_cache = None;
+dsc_files_query_cache = None;
+orig_tar_gz_cache = {};
+#
+binaries_id_serial = 0;
+binaries_query_cache = None;
+bin_associations_id_serial = 0;
+bin_associations_query_cache = None;
+#
+source_cache_for_binaries = {};
+
+###############################################################################################################
+
+# Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
+def poolify (s, sub):
+ for i in xrange(len(sub)):
+ if sub[i:] == s[0:len(sub)-i]:
+ return s[len(sub)-i:];
+ return s;
+
+##############################################################################################################
+
+def get_or_set_files_id (filename, size, md5sum, location_id):
+ global files_id_cache, files_id_serial, files_query_cache;
+
+ cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
+ if not files_id_cache.has_key(cache_key):
+ files_id_serial = files_id_serial + 1
+ files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
+ files_id_cache[cache_key] = files_id_serial
+
+ return files_id_cache[cache_key]
+
+##############################################################################################################
+def process_packages (location, filename, suite, component, archive):
+ global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache;
+
+ apt_pkg.init();
+
+ Cnf = apt_pkg.newConfiguration();
+ apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
+
+ projectB = pg.connect('projectb', 'localhost', -1, None, None, 'postgres')
+
+ db_access.init (Cnf, projectB);
+
+ count_total = 0;
+ count_bad = 0;
+ suite = string.lower(suite);
+ suite_id = db_access.get_suite_id(suite);
+ if suite == "stable":
+ testing_id = db_access.get_suite_id("testing");
+ suite_codename = Cnf["Suite::%s::CodeName" % (suite)];
+ try:
+ file = utils.open_file (filename, "r")
+ except utils.cant_open_exc:
+ print "WARNING: can't open '%s'" % (filename);
+ return;
+ Scanner = apt_pkg.ParseTagFile(file);
+ while Scanner.Step() != 0:
+ package = Scanner.Section["package"]
+ version = Scanner.Section["version"]
+ maintainer = Scanner.Section["maintainer"]
+ maintainer = string.replace(maintainer, "'", "\\'")
+ maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
+ architecture = Scanner.Section["architecture"]
+ architecture_id = db_access.get_architecture_id (architecture);
+ if not Scanner.Section.has_key("source"):
+ source = package
+ else:
+ source = Scanner.Section["source"]
+ source_version = ""
+ if string.find(source, "(") != -1:
+ m = utils.re_extract_src_version.match(source)
+ source = m.group(1)
+ source_version = m.group(2)
+ if not source_version:
+ source_version = version
+ xfilename = Scanner.Section["filename"]
+ filename = xfilename
+ location_id = db_access.get_location_id (location, component, archive)
+ filename = poolify (filename, location)
+ if architecture == "all":
+ filename = re_arch_from_filename.sub("binary-all", filename);
+ cache_key = "%s~%s" % (source, source_version);
+ source_id = source_cache_for_binaries.get(cache_key, None);
+ size = Scanner.Section["size"];
+ md5sum = Scanner.Section["md5sum"];
+ files_id = get_or_set_files_id (filename, size, md5sum, location_id);
+ cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
+ if not arch_all_cache.has_key(cache_key):
+ arch_all_cache[cache_key] = 1;
+ cache_key = "%s~%s~%d" % (package, version, architecture_id);
+ if not binary_cache.has_key(cache_key):
+ binary_cache[cache_key] = (size, md5sum);
+ else:
+ (oldsize, oldmd5sum) = binary_cache[cache_key];
+ if oldsize != size or oldmd5sum != md5sum:
+ #print "/org/ftp.debian.org/ftp/%s" % (xfilename);
+ print "%s: %s vs. %s and %s vs. %s" % (xfilename, oldsize, size, oldmd5sum, md5sum);
+ #count_bad = count_bad + 1;
+
+ count_total = count_total +1;
+
+ file.close();
+ if count_bad != 0:
+ print "Found %d bad." % (count_bad)
+
+##############################################################################################################
+
+def main ():
+ global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
+
+ apt_pkg.init();
+
+ Cnf = apt_pkg.newConfiguration();
+ apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
+
+ files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
+ source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
+ src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
+ dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
+ binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
+ bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
+
+ # Process Packages files to populate `binaries' and friends
+
+ for location in Cnf.SubTree("Location").List():
+ SubSec = Cnf.SubTree("Location::%s" % (location));
+ server = SubSec["Archive"];
+ if server != "ftp-master": # FIXME, don't hard code
+ continue;
+ type = Cnf.Find("Location::%s::Type" % (location));
+ if type == "legacy-mixed":
+ packages = location + 'Packages';
+ suite = Cnf.Find("Location::%s::Suite" % (location));
+ process_packages (location, packages, suite, "", server);
+ elif type == "legacy":
+ for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List():
+ for component in Cnf.SubTree("Component").List():
+ for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List():
+ if architecture == "source" or architecture == "all":
+ continue;
+ packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages'
+ process_packages (location, packages, suite, component, server);
+ elif type == "pool":
+ continue;
+
+ files_query_cache.close();
+ source_query_cache.close();
+ src_associations_query_cache.close();
+ dsc_files_query_cache.close();
+ binaries_query_cache.close();
+ bin_associations_query_cache.close();
+
+ return;
+
+if __name__ == '__main__':
+ main()