4 # Copyright (C) 2000 James Troup <james@nocrew.org>
5 # $Id: fix.1,v 1.1 2000-11-24 00:20:11 troup Exp $
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 ################################################################################
23 # 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
25 ################################################################################
27 # This code is a horrible mess for two reasons:
29 # (o) For Debian's usage, it's doing something like 160k INSERTs,
30 # even on auric, that makes the program unusable unless we get
31 # involed in sorts of silly optimization games (local dicts to avoid
32 # redundant SELECTS, using COPY FROM rather than INSERTS etc.)
34 # (o) It's very site specific, because I don't expect to use this
35 # script again in a hurry, and I don't want to spend any more time
36 # on it than absolutely necessary.
38 ###############################################################################################################
40 import commands, os, pg, re, sys, string, tempfile
42 import db_access, utils
44 ###############################################################################################################
46 re_arch_from_filename = re.compile(r"binary-[^/]+")
48 ###############################################################################################################
59 src_associations_id_serial = 0;
60 dsc_files_id_serial = 0;
61 files_query_cache = None;
62 source_query_cache = None;
63 src_associations_query_cache = None;
64 dsc_files_query_cache = None;
65 orig_tar_gz_cache = {};
67 binaries_id_serial = 0;
68 binaries_query_cache = None;
69 bin_associations_id_serial = 0;
70 bin_associations_query_cache = None;
72 source_cache_for_binaries = {};
74 ###############################################################################################################
76 # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
78 for i in xrange(len(sub)):
79 if sub[i:] == s[0:len(sub)-i]:
80 return s[len(sub)-i:];
83 ##############################################################################################################
85 def get_or_set_files_id (filename, size, md5sum, location_id):
86 global files_id_cache, files_id_serial, files_query_cache;
88 cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
89 if not files_id_cache.has_key(cache_key):
90 files_id_serial = files_id_serial + 1
91 files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
92 files_id_cache[cache_key] = files_id_serial
94 return files_id_cache[cache_key]
96 ##############################################################################################################
97 def process_packages (location, filename, suite, component, archive):
98 global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache;
102 Cnf = apt_pkg.newConfiguration();
103 apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
105 projectB = pg.connect('projectb', 'localhost', -1, None, None, 'postgres')
107 db_access.init (Cnf, projectB);
111 suite = string.lower(suite);
112 suite_id = db_access.get_suite_id(suite);
113 if suite == "stable":
114 testing_id = db_access.get_suite_id("testing");
115 suite_codename = Cnf["Suite::%s::CodeName" % (suite)];
117 file = utils.open_file (filename, "r")
118 except utils.cant_open_exc:
119 print "WARNING: can't open '%s'" % (filename);
121 Scanner = apt_pkg.ParseTagFile(file);
122 while Scanner.Step() != 0:
123 package = Scanner.Section["package"]
124 version = Scanner.Section["version"]
125 maintainer = Scanner.Section["maintainer"]
126 maintainer = string.replace(maintainer, "'", "\\'")
127 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
128 architecture = Scanner.Section["architecture"]
129 architecture_id = db_access.get_architecture_id (architecture);
130 if not Scanner.Section.has_key("source"):
133 source = Scanner.Section["source"]
135 if string.find(source, "(") != -1:
136 m = utils.re_extract_src_version.match(source)
138 source_version = m.group(2)
139 if not source_version:
140 source_version = version
141 xfilename = Scanner.Section["filename"]
143 location_id = db_access.get_location_id (location, component, archive)
144 filename = poolify (filename, location)
145 if architecture == "all":
146 filename = re_arch_from_filename.sub("binary-all", filename);
147 cache_key = "%s~%s" % (source, source_version);
148 source_id = source_cache_for_binaries.get(cache_key, None);
149 size = Scanner.Section["size"];
150 md5sum = Scanner.Section["md5sum"];
151 files_id = get_or_set_files_id (filename, size, md5sum, location_id);
152 cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
153 if not arch_all_cache.has_key(cache_key):
154 arch_all_cache[cache_key] = 1;
155 cache_key = "%s~%s~%d" % (package, version, architecture_id);
156 if not binary_cache.has_key(cache_key):
157 binary_cache[cache_key] = (size, md5sum);
159 (oldsize, oldmd5sum) = binary_cache[cache_key];
160 if oldsize != size or oldmd5sum != md5sum:
161 #print "/org/ftp.debian.org/ftp/%s" % (xfilename);
162 print "%s: %s vs. %s and %s vs. %s" % (xfilename, oldsize, size, oldmd5sum, md5sum);
163 #count_bad = count_bad + 1;
165 count_total = count_total +1;
169 print "Found %d bad." % (count_bad)
171 ##############################################################################################################
174 global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
178 Cnf = apt_pkg.newConfiguration();
179 apt_pkg.ReadConfigFileISC(Cnf,'/home/troup/katie/katie.conf');
181 files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
182 source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
183 src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
184 dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
185 binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
186 bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
188 # Process Packages files to populate `binaries' and friends
190 for location in Cnf.SubTree("Location").List():
191 SubSec = Cnf.SubTree("Location::%s" % (location));
192 server = SubSec["Archive"];
193 if server != "ftp-master": # FIXME, don't hard code
195 type = Cnf.Find("Location::%s::Type" % (location));
196 if type == "legacy-mixed":
197 packages = location + 'Packages';
198 suite = Cnf.Find("Location::%s::Suite" % (location));
199 process_packages (location, packages, suite, "", server);
200 elif type == "legacy":
201 for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List():
202 for component in Cnf.SubTree("Component").List():
203 for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List():
204 if architecture == "source" or architecture == "all":
206 packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages'
207 process_packages (location, packages, suite, component, server);
211 files_query_cache.close();
212 source_query_cache.close();
213 src_associations_query_cache.close();
214 dsc_files_query_cache.close();
215 binaries_query_cache.close();
216 bin_associations_query_cache.close();
220 if __name__ == '__main__':