+++ /dev/null
-#!/usr/bin/env python
-
-# Populate the DB
-# Copyright (C) 2000 James Troup <james@nocrew.org>
-# $Id: fix.1,v 1.1 2000-11-24 00:20:11 troup Exp $
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-################################################################################
-
-# 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
-
-################################################################################
-
-# This code is a horrible mess for two reasons:
-
-# (o) For Debian's usage, it's doing something like 160k INSERTs,
-# even on auric, that makes the program unusable unless we get
-# involed in sorts of silly optimization games (local dicts to avoid
-# redundant SELECTS, using COPY FROM rather than INSERTS etc.)
-
-# (o) It's very site specific, because I don't expect to use this
-# script again in a hurry, and I don't want to spend any more time
-# on it than absolutely necessary.
-
-###############################################################################################################
-
-import commands, os, pg, re, sys, string, tempfile
-import apt_pkg
-import db_access, utils
-
-###############################################################################################################
-
-re_arch_from_filename = re.compile(r"binary-[^/]+")
-
-###############################################################################################################
-
-Cnf = None;
-projectB = None;
-files_id_cache = {};
-source_cache = {};
-arch_all_cache = {};
-binary_cache = {};
-#
-files_id_serial = 0;
-source_id_serial = 0;
-src_associations_id_serial = 0;
-dsc_files_id_serial = 0;
-files_query_cache = None;
-source_query_cache = None;
-src_associations_query_cache = None;
-dsc_files_query_cache = None;
-orig_tar_gz_cache = {};
-#
-binaries_id_serial = 0;
-binaries_query_cache = None;
-bin_associations_id_serial = 0;
-bin_associations_query_cache = None;
-#
-source_cache_for_binaries = {};
-
-###############################################################################################################
-
-# Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
-def poolify (s, sub):
- for i in xrange(len(sub)):
- if sub[i:] == s[0:len(sub)-i]:
- return s[len(sub)-i:];
- return s;
-
-##############################################################################################################
-
-def get_or_set_files_id (filename, size, md5sum, location_id):
- global files_id_cache, files_id_serial, files_query_cache;
-
- cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
- if not files_id_cache.has_key(cache_key):
- files_id_serial = files_id_serial + 1
- files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
- files_id_cache[cache_key] = files_id_serial
-
- return files_id_cache[cache_key]
-
-##############################################################################################################
-def process_packages (location, filename, suite, component, archive):
- global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache;
-
- apt_pkg.init();
-
- Cnf = apt_pkg.newConfiguration();
- apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
-
- projectB = pg.connect('projectb', 'localhost', -1, None, None, 'postgres')
-
- db_access.init (Cnf, projectB);
-
- count_total = 0;
- count_bad = 0;
- suite = string.lower(suite);
- suite_id = db_access.get_suite_id(suite);
- if suite == "stable":
- testing_id = db_access.get_suite_id("testing");
- suite_codename = Cnf["Suite::%s::CodeName" % (suite)];
- try:
- file = utils.open_file (filename, "r")
- except utils.cant_open_exc:
- print "WARNING: can't open '%s'" % (filename);
- return;
- Scanner = apt_pkg.ParseTagFile(file);
- while Scanner.Step() != 0:
- package = Scanner.Section["package"]
- version = Scanner.Section["version"]
- maintainer = Scanner.Section["maintainer"]
- maintainer = string.replace(maintainer, "'", "\\'")
- maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
- architecture = Scanner.Section["architecture"]
- architecture_id = db_access.get_architecture_id (architecture);
- if not Scanner.Section.has_key("source"):
- source = package
- else:
- source = Scanner.Section["source"]
- source_version = ""
- if string.find(source, "(") != -1:
- m = utils.re_extract_src_version.match(source)
- source = m.group(1)
- source_version = m.group(2)
- if not source_version:
- source_version = version
- xfilename = Scanner.Section["filename"]
- filename = xfilename
- location_id = db_access.get_location_id (location, component, archive)
- filename = poolify (filename, location)
- if architecture == "all":
- filename = re_arch_from_filename.sub("binary-all", filename);
- cache_key = "%s~%s" % (source, source_version);
- source_id = source_cache_for_binaries.get(cache_key, None);
- size = Scanner.Section["size"];
- md5sum = Scanner.Section["md5sum"];
- files_id = get_or_set_files_id (filename, size, md5sum, location_id);
- cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
- if not arch_all_cache.has_key(cache_key):
- arch_all_cache[cache_key] = 1;
- cache_key = "%s~%s~%d" % (package, version, architecture_id);
- if not binary_cache.has_key(cache_key):
- binary_cache[cache_key] = (size, md5sum);
- else:
- (oldsize, oldmd5sum) = binary_cache[cache_key];
- if oldsize != size or oldmd5sum != md5sum:
- #print "/org/ftp.debian.org/ftp/%s" % (xfilename);
- print "%s: %s vs. %s and %s vs. %s" % (xfilename, oldsize, size, oldmd5sum, md5sum);
- #count_bad = count_bad + 1;
-
- count_total = count_total +1;
-
- file.close();
- if count_bad != 0:
- print "Found %d bad." % (count_bad)
-
-##############################################################################################################
-
-def main ():
- global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
-
- apt_pkg.init();
-
- Cnf = apt_pkg.newConfiguration();
- apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
-
- files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
- source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
- src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
- dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
- binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
- bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
-
- # Process Packages files to populate `binaries' and friends
-
- for location in Cnf.SubTree("Location").List():
- SubSec = Cnf.SubTree("Location::%s" % (location));
- server = SubSec["Archive"];
- if server != "ftp-master": # FIXME, don't hard code
- continue;
- type = Cnf.Find("Location::%s::Type" % (location));
- if type == "legacy-mixed":
- packages = location + 'Packages';
- suite = Cnf.Find("Location::%s::Suite" % (location));
- process_packages (location, packages, suite, "", server);
- elif type == "legacy":
- for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List():
- for component in Cnf.SubTree("Component").List():
- for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List():
- if architecture == "source" or architecture == "all":
- continue;
- packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages'
- process_packages (location, packages, suite, component, server);
- elif type == "pool":
- continue;
-
- files_query_cache.close();
- source_query_cache.close();
- src_associations_query_cache.close();
- dsc_files_query_cache.close();
- binaries_query_cache.close();
- bin_associations_query_cache.close();
-
- return;
-
-if __name__ == '__main__':
- main()