4 # Copyright (C) 2000, 2001 James Troup <james@nocrew.org>
5 # $Id: neve,v 1.3 2001-03-20 00:28:11 troup Exp $
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 ################################################################################
23 # 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
25 ################################################################################
27 # This code is a horrible mess for two reasons:
29 # (o) For Debian's usage, it's doing something like 160k INSERTs,
30 # even on auric, that makes the program unusable unless we get
31 # involed in sorts of silly optimization games (local dicts to avoid
32 # redundant SELECTS, using COPY FROM rather than INSERTS etc.)
34 # (o) It's very site specific, because I don't expect to use this
35 # script again in a hurry, and I don't want to spend any more time
36 # on it than absolutely necessary.
38 ###############################################################################################################
40 import commands, os, pg, re, sys, string, tempfile
42 import db_access, utils
44 ###############################################################################################################
46 re_arch_from_filename = re.compile(r"binary-[^/]+")
48 ###############################################################################################################
59 src_associations_id_serial = 0;
60 dsc_files_id_serial = 0;
61 files_query_cache = None;
62 source_query_cache = None;
63 src_associations_query_cache = None;
64 dsc_files_query_cache = None;
65 orig_tar_gz_cache = {};
67 binaries_id_serial = 0;
68 binaries_query_cache = None;
69 bin_associations_id_serial = 0;
70 bin_associations_query_cache = None;
72 source_cache_for_binaries = {};
74 ###############################################################################################################
76 # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
78 for i in xrange(len(sub)):
79 if sub[i:] == s[0:len(sub)-i]:
80 return s[len(sub)-i:];
83 def update_archives ():
84 projectB.query("DELETE FROM archive")
85 for archive in Cnf.SubTree("Archive").List():
86 SubSec = Cnf.SubTree("Archive::%s" % (archive));
87 projectB.query("INSERT INTO archive (name, origin_server, description) VALUES ('%s', '%s', '%s')"
88 % (archive, SubSec["OriginServer"], SubSec["Description"]));
90 def update_components ():
91 projectB.query("DELETE FROM component")
92 for component in Cnf.SubTree("Component").List():
93 SubSec = Cnf.SubTree("Component::%s" % (component));
94 projectB.query("INSERT INTO component (name, description, meets_dfsg) VALUES ('%s', '%s', '%s')" %
95 (component, SubSec["Description"], SubSec["MeetsDFSG"]));
97 def update_locations ():
98 projectB.query("DELETE FROM location")
99 for location in Cnf.SubTree("Location").List():
100 SubSec = Cnf.SubTree("Location::%s" % (location));
101 archive_id = db_access.get_archive_id(SubSec["archive"]);
102 type = SubSec.Find("type");
103 if type == "legacy-mixed":
104 projectB.query("INSERT INTO location (path, archive, type) VALUES ('%s', %d, '%s')" % (location, archive_id, SubSec["type"]));
106 for component in Cnf.SubTree("Component").List():
107 component_id = db_access.get_component_id(component);
108 projectB.query("INSERT INTO location (path, component, archive, type) VALUES ('%s', %d, %d, '%s')" %
109 (location, component_id, archive_id, SubSec["type"]));
111 def update_architectures ():
112 projectB.query("DELETE FROM architecture")
113 for arch in Cnf.SubTree("Architectures").List():
114 projectB.query("INSERT INTO architecture (arch_string, description) VALUES ('%s', '%s')" % (arch, Cnf["Architectures::%s" % (arch)]))
116 def update_suites ():
117 projectB.query("DELETE FROM suite")
118 for suite in Cnf.SubTree("Suite").List():
119 SubSec = Cnf.SubTree("Suite::%s" %(suite))
120 projectB.query("INSERT INTO suite (suite_name, version, origin, description) VALUES ('%s', '%s', '%s', '%s')"
121 % (string.lower(suite), SubSec["Version"], SubSec["Origin"], SubSec["Description"]))
122 for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List():
123 architecture_id = db_access.get_architecture_id (architecture);
124 projectB.query("INSERT INTO suite_architectures (suite, architecture) VALUES (currval('suite_id_seq'), %d)" % (architecture_id));
126 ##############################################################################################################
128 def get_or_set_files_id (filename, size, md5sum, location_id):
129 global files_id_cache, files_id_serial, files_query_cache;
131 cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
132 if not files_id_cache.has_key(cache_key):
133 files_id_serial = files_id_serial + 1
134 files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
135 files_id_cache[cache_key] = files_id_serial
137 return files_id_cache[cache_key]
139 ##############################################################################################################
141 def process_sources (location, filename, suite, component, archive):
142 global source_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, source_id_serial, src_associations_id_serial, dsc_files_id_serial, source_cache_for_binaries, orig_tar_gz_cache;
144 suite = string.lower(suite)
145 suite_id = db_access.get_suite_id(suite);
146 if suite == 'stable':
147 testing_id = db_access.get_suite_id("testing");
149 file = utils.open_file (filename, "r")
150 except utils.cant_open_exc:
151 print "WARNING: can't open '%s'" % (filename);
153 Scanner = apt_pkg.ParseTagFile(file)
154 while Scanner.Step() != 0:
155 package = Scanner.Section["package"]
156 version = Scanner.Section["version"]
157 maintainer = Scanner.Section["maintainer"]
158 maintainer = string.replace(maintainer, "'", "\\'")
159 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
160 directory = Scanner.Section["directory"]
161 location_id = db_access.get_location_id (location, component, archive)
162 if directory[-1:] != "/":
163 directory = directory + '/';
164 directory = poolify (directory, location);
165 if directory != "" and directory[-1:] != "/":
166 directory = directory + '/';
167 no_epoch_version = utils.re_no_epoch.sub('', version)
168 # Add all files referenced by the .dsc to the files table
170 for line in string.split(Scanner.Section["files"],'\n'):
172 (md5sum, size, filename) = string.split(string.strip(line));
173 # Don't duplicate .orig.tar.gz's
174 if filename[-12:] == ".orig.tar.gz":
175 cache_key = "%s~%s~%s" % (filename, size, md5sum);
176 if orig_tar_gz_cache.has_key(cache_key):
177 id = orig_tar_gz_cache[cache_key];
179 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
180 orig_tar_gz_cache[cache_key] = id;
182 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
184 # If this is the .dsc itself; save the ID for later.
185 if filename[-4:] == ".dsc":
187 filename = directory + package + '_' + no_epoch_version + '.dsc'
188 cache_key = "%s~%s" % (package, version)
189 if not source_cache.has_key(cache_key):
190 nasty_key = "%s~%s" % (package, version)
191 source_id_serial = source_id_serial + 1;
192 if not source_cache_for_binaries.has_key(nasty_key):
193 source_cache_for_binaries[nasty_key] = source_id_serial;
194 tmp_source_id = source_id_serial;
195 source_cache[cache_key] = source_id_serial;
196 source_query_cache.write("%d\t%s\t%s\t%d\t%d\n" % (source_id_serial, package, version, maintainer_id, files_id))
198 dsc_files_id_serial = dsc_files_id_serial + 1;
199 dsc_files_query_cache.write("%d\t%d\t%d\n" % (dsc_files_id_serial, tmp_source_id,id));
201 tmp_source_id = source_cache[cache_key];
203 src_associations_id_serial = src_associations_id_serial + 1;
204 src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, suite_id, tmp_source_id))
205 # populate 'testing' with a mirror of 'stable'
206 if suite == "stable":
207 src_associations_id_serial = src_associations_id_serial + 1;
208 src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, testing_id, tmp_source_id))
212 ##############################################################################################################
214 def process_packages (location, filename, suite, component, archive):
215 global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache;
219 suite = string.lower(suite);
220 suite_id = db_access.get_suite_id(suite);
221 if suite == "stable":
222 testing_id = db_access.get_suite_id("testing");
224 file = utils.open_file (filename, "r")
225 except utils.cant_open_exc:
226 print "WARNING: can't open '%s'" % (filename);
228 Scanner = apt_pkg.ParseTagFile(file);
229 while Scanner.Step() != 0:
230 package = Scanner.Section["package"]
231 version = Scanner.Section["version"]
232 maintainer = Scanner.Section["maintainer"]
233 maintainer = string.replace(maintainer, "'", "\\'")
234 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
235 architecture = Scanner.Section["architecture"]
236 architecture_id = db_access.get_architecture_id (architecture);
237 if not Scanner.Section.has_key("source"):
240 source = Scanner.Section["source"]
242 if string.find(source, "(") != -1:
243 m = utils.re_extract_src_version.match(source)
245 source_version = m.group(2)
246 if not source_version:
247 source_version = version
248 filename = Scanner.Section["filename"]
249 location_id = db_access.get_location_id (location, component, archive)
250 filename = poolify (filename, location)
251 if architecture == "all":
252 filename = re_arch_from_filename.sub("binary-all", filename);
253 cache_key = "%s~%s" % (source, source_version);
254 source_id = source_cache_for_binaries.get(cache_key, None);
255 size = Scanner.Section["size"];
256 md5sum = Scanner.Section["md5sum"];
257 files_id = get_or_set_files_id (filename, size, md5sum, location_id);
258 type = "deb"; # FIXME
259 cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
260 if not arch_all_cache.has_key(cache_key):
261 arch_all_cache[cache_key] = 1;
262 cache_key = "%s~%s~%s~%d" % (package, version, repr(source_id), architecture_id);
263 if not binary_cache.has_key(cache_key):
266 count_bad = count_bad + 1;
268 source_id = repr(source_id);
269 binaries_id_serial = binaries_id_serial + 1;
270 binaries_query_cache.write("%d\t%s\t%s\t%d\t%s\t%d\t%d\t%s\n" % (binaries_id_serial, package, version, maintainer_id, source_id, architecture_id, files_id, type));
271 binary_cache[cache_key] = binaries_id_serial;
272 tmp_binaries_id = binaries_id_serial;
274 tmp_binaries_id = binary_cache[cache_key];
276 bin_associations_id_serial = bin_associations_id_serial + 1;
277 bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, suite_id, tmp_binaries_id));
278 if suite == "stable":
279 bin_associations_id_serial = bin_associations_id_serial + 1;
280 bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, testing_id, tmp_binaries_id));
281 count_total = count_total +1;
285 print "%d binary packages processed; %d with no source match which is %.2f%%" % (count_total, count_bad, (float(count_bad)/count_total)*100);
287 print "%d binary packages processed; 0 with no source match which is 0%%" % (count_total);
289 ##############################################################################################################
291 def do_sources(location, prefix, suite, component, server):
292 temp_filename = tempfile.mktemp();
293 fd = os.open(temp_filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700);
295 sources = location + prefix + 'Sources.gz';
296 (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (sources, temp_filename));
298 sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output));
300 print 'Processing '+sources+'...';
301 process_sources (location, temp_filename, suite, component, server);
302 os.unlink(temp_filename);
304 ##############################################################################################################
307 global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
311 Cnf = apt_pkg.newConfiguration();
312 apt_pkg.ReadConfigFileISC(Cnf,utils.which_conf_file());
314 print "Re-Creating DB..."
315 (result, output) = commands.getstatusoutput("psql -f init_pool.sql")
320 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]), None, None, 'postgres')
322 db_access.init (Cnf, projectB);
324 print "Adding static tables from conf file..."
325 projectB.query("BEGIN WORK");
326 update_architectures();
331 projectB.query("COMMIT WORK");
333 files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
334 source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
335 src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
336 dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
337 binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
338 bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
340 projectB.query("BEGIN WORK");
341 # Process Sources files to popoulate `source' and friends
342 for location in Cnf.SubTree("Location").List():
343 SubSec = Cnf.SubTree("Location::%s" % (location));
344 server = SubSec["Archive"];
345 type = Cnf.Find("Location::%s::Type" % (location));
346 if type == "legacy-mixed":
348 suite = Cnf.Find("Location::%s::Suite" % (location));
349 do_sources(location, prefix, suite, "", server);
350 elif type == "legacy":
351 for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List():
352 for component in Cnf.SubTree("Component").List():
353 prefix = Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/source/'
354 do_sources(location, prefix, suite, component, server);
357 # for component in Cnf.SubTree("Component").List():
358 # prefix = component + '/'
359 # do_sources(location, prefix);
361 sys.stderr.write("Unknown location type ('%s').\n" % (type));
364 # Process Packages files to populate `binaries' and friends
366 for location in Cnf.SubTree("Location").List():
367 SubSec = Cnf.SubTree("Location::%s" % (location));
368 server = SubSec["Archive"];
369 type = Cnf.Find("Location::%s::Type" % (location));
370 if type == "legacy-mixed":
371 packages = location + 'Packages';
372 suite = Cnf.Find("Location::%s::Suite" % (location));
373 print 'Processing '+location+'...';
374 process_packages (location, packages, suite, "", server);
375 elif type == "legacy":
376 for suite in Cnf.SubTree("Location::%s::Suites" % (location)).List():
377 for component in Cnf.SubTree("Component").List():
378 for architecture in Cnf.SubTree("Suite::%s::Architectures" % (suite)).List():
379 if architecture == "source" or architecture == "all":
381 packages = location + Cnf.Find("Suite::%s::CodeName" % (suite)) + '/' + component + '/binary-' + architecture + '/Packages'
382 print 'Processing '+packages+'...';
383 process_packages (location, packages, suite, component, server);
387 files_query_cache.close();
388 source_query_cache.close();
389 src_associations_query_cache.close();
390 dsc_files_query_cache.close();
391 binaries_query_cache.close();
392 bin_associations_query_cache.close();
393 print "Writing data to `files' table...";
394 projectB.query("COPY files FROM '%s'" % (Cnf["Neve::ExportDir"]+"files"));
395 print "Writing data to `source' table...";
396 projectB.query("COPY source FROM '%s'" % (Cnf["Neve::ExportDir"]+"source"));
397 print "Writing data to `src_associations' table...";
398 projectB.query("COPY src_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"src_associations"));
399 print "Writing data to `dsc_files' table...";
400 projectB.query("COPY dsc_files FROM '%s'" % (Cnf["Neve::ExportDir"]+"dsc_files"));
401 print "Writing data to `binaries' table...";
402 projectB.query("COPY binaries FROM '%s'" % (Cnf["Neve::ExportDir"]+"binaries"));
403 print "Writing data to `bin_associations' table...";
404 projectB.query("COPY bin_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"bin_associations"));
405 print "Committing...";
406 projectB.query("COMMIT WORK");
408 # Add the constraints and otherwise generally clean up the database.
409 # See add_constraints.sql for more details...
411 print "Running add_constraints.sql...";
412 (result, output) = commands.getstatusoutput("psql projectb < add_constraints.sql");
415 sys.stderr.write("psql invocation failed!\n");
420 if __name__ == '__main__':