#!/usr/bin/env python # Populate the DB # Copyright (C) 2000 James Troup # $Id: fix.1,v 1.1 2000-11-24 00:20:11 troup Exp $ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ################################################################################ # 04:36| elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!! ################################################################################ # This code is a horrible mess for two reasons: # (o) For Debian's usage, it's doing something like 160k INSERTs, # even on auric, that makes the program unusable unless we get # involed in sorts of silly optimization games (local dicts to avoid # redundant SELECTS, using COPY FROM rather than INSERTS etc.) # (o) It's very site specific, because I don't expect to use this # script again in a hurry, and I don't want to spend any more time # on it than absolutely necessary. ############################################################################################################### import commands, os, pg, re, sys, string, tempfile import apt_pkg import db_access, utils ############################################################################################################### re_arch_from_filename = re.compile(r"binary-[^/]+") ############################################################################################################### Cnf = None; projectB = None; files_id_cache = {}; source_cache = {}; arch_all_cache = {}; binary_cache = {}; # files_id_serial = 0; source_id_serial = 0; src_associations_id_serial = 0; dsc_files_id_serial = 0; files_query_cache = None; source_query_cache = None; src_associations_query_cache = None; dsc_files_query_cache = None; orig_tar_gz_cache = {}; # binaries_id_serial = 0; binaries_query_cache = None; bin_associations_id_serial = 0; bin_associations_query_cache = None; # source_cache_for_binaries = {}; ############################################################################################################### # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it. def poolify (s, sub): for i in xrange(len(sub)): if sub[i:] == s[0:len(sub)-i]: return s[len(sub)-i:]; return s; ############################################################################################################## def get_or_set_files_id (filename, size, md5sum, location_id): global files_id_cache, files_id_serial, files_query_cache; cache_key = string.join((filename, size, md5sum, repr(location_id)), '~') if not files_id_cache.has_key(cache_key): files_id_serial = files_id_serial + 1 files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id)); files_id_cache[cache_key] = files_id_serial return files_id_cache[cache_key] ############################################################################################################## def process_packages (location, filename, suite, component, archive): global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache; apt_pkg.init(); Cnf = apt_pkg.newConfiguration(); apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf'); projectB = pg.connect('projectb', 'localhost', -1, None, None, 'postgres') db_access.init (Cnf, projectB); count_total = 0; count_bad = 0; suite = string.lower(suite); suite_id = db_access.get_suite_id(suite); if suite == "stable": testing_id = db_access.get_suite_id("testing"); suite_codename = Cnf["Suite::%s::CodeName" % (suite)]; try: file = utils.open_file (filename, "r") except utils.cant_open_exc: print "WARNING: can't open '%s'" % (filename); return; Scanner = apt_pkg.ParseTagFile(file); while Scanner.Step() != 0: package = Scanner.Section["package"] version = Scanner.Section["version"] maintainer = Scanner.Section["maintainer"] maintainer = string.replace(maintainer, "'", "\\'") maintainer_id = db_access.get_or_set_maintainer_id(maintainer); architecture = Scanner.Section["architecture"] architecture_id = db_access.get_architecture_id (architecture); if not Scanner.Section.has_key("source"): source = package else: source = Scanner.Section["source"] source_version = "" if string.find(source, "(") != -1: m = utils.re_extract_src_version.match(source) source = m.group(1) source_version = m.group(2) if not source_version: source_version = version xfilename = Scanner.Section["filename"] filename = xfilename location_id = db_access.get_location_id (location, component, archive) filename = poolify (filename, location) if architecture == "all": filename = re_arch_from_filename.sub("binary-all", filename); cache_key = "%s~%s" % (source, source_version); source_id = source_cache_for_binaries.get(cache_key, None); size = Scanner.Section["size"]; md5sum = Scanner.Section["md5sum"]; files_id = get_or_set_files_id (filename, size, md5sum, location_id); cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id); if not arch_all_cache.has_key(cache_key): arch_all_cache[cache_key] = 1; cache_key = "%s~%s~%d" % (package, version, architecture_id); if not binary_cache.has_key(cache_key): binary_cache[cache_key] = (size, md5sum); else: (oldsize, oldmd5sum) = binary_cache[cache_key]; if oldsize != size or oldmd5sum != md5sum: #print "/org/ftp.debian.org/ftp/%s" % (xfilename); print "%s: %s vs. %s and %s vs. %s" % (xfilename, oldsize, size, oldmd5sum, md5sum); #count_bad = count_bad + 1; count_total = count_total +1; file.close(); if count_bad != 0: print "Found %d bad." % (count_bad) ############################################################################################################## def main (): global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache; apt_pkg.init(); Cnf = apt_pkg.newConfiguration(); apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf'); files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w"); source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w"); src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w"); dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w"); binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w"); bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w"); # Process Packages files to populate `binaries' and friends for location in Cnf.SubTree("Location").List(): SubSec = Cnf.SubTree("Location::%s" % (location)); server = SubSec["Archive"]; if server != "ftp-master": # FIXME, don't hard code continue; type = Cnf.Find("Location::%s::Type" % (location)); if type == "legacy-mixed": packages = location + 'Packages'; suite = Cnf.Find("Location::%s::Suite" % (location)); process_packages (location, packages, suite, "", server); elif type == "legacy": for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List(): for component in Cnf.SubTree("Component").List(): for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List(): if architecture == "source" or architecture == "all": continue; packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages' process_packages (location, packages, suite, component, server); elif type == "pool": continue; files_query_cache.close(); source_query_cache.close(); src_associations_query_cache.close(); dsc_files_query_cache.close(); binaries_query_cache.close(); bin_associations_query_cache.close(); return; if __name__ == '__main__': main()