4 # Copyright (C) 2000, 2001, 2002 James Troup <james@nocrew.org>
5 # $Id: neve,v 1.12 2002-06-08 00:23:35 troup Exp $
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 ###############################################################################
23 # 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
25 ###############################################################################
27 # This code is a horrible mess for two reasons:
29 # (o) For Debian's usage, it's doing something like 160k INSERTs,
30 # even on auric, that makes the program unusable unless we get
31 # involed in sorts of silly optimization games (local dicts to avoid
32 # redundant SELECTS, using COPY FROM rather than INSERTS etc.)
34 # (o) It's very site specific, because I don't expect to use this
35 # script again in a hurry, and I don't want to spend any more time
36 # on it than absolutely necessary.
38 ###############################################################################
40 import commands, os, pg, re, select, string, tempfile, time;
42 import db_access, utils;
44 ###############################################################################
46 re_arch_from_filename = re.compile(r"binary-[^/]+")
48 ###############################################################################
59 src_associations_id_serial = 0;
60 dsc_files_id_serial = 0;
61 files_query_cache = None;
62 source_query_cache = None;
63 src_associations_query_cache = None;
64 dsc_files_query_cache = None;
65 orig_tar_gz_cache = {};
67 binaries_id_serial = 0;
68 binaries_query_cache = None;
69 bin_associations_id_serial = 0;
70 bin_associations_query_cache = None;
72 source_cache_for_binaries = {};
75 ###############################################################################
77 # Our very own version of commands.getouputstatus(), hacked to support
79 def get_status_output(cmd, status_read, status_write):
80 cmd = ['/bin/sh', '-c', cmd];
81 p2cread, p2cwrite = os.pipe();
82 c2pread, c2pwrite = os.pipe();
83 errout, errin = os.pipe();
93 for i in range(3, 256):
100 os.execvp(cmd[0], cmd);
106 os.dup2(c2pread, c2pwrite);
107 os.dup2(errout, errin);
109 output = status = "";
111 i, o, e = select.select([c2pwrite, errin, status_read], [], []);
114 r = os.read(fd, 8196);
116 more_data.append(fd);
117 if fd == c2pwrite or fd == errin:
119 elif fd == status_read:
122 utils.fubar("Unexpected file descriptor [%s] returned from select\n" % (fd));
124 pid, exit_status = os.waitpid(pid, 0)
126 os.close(status_write);
127 os.close(status_read);
135 return output, status, exit_status;
137 ###############################################################################
139 def Dict(**dict): return dict
141 def reject (str, prefix="Rejected: "):
142 global reject_message;
144 reject_message = reject_message + prefix + str + "\n";
146 ###############################################################################
148 def check_signature (filename):
149 if not utils.re_taint_free.match(os.path.basename(filename)):
150 reject("!!WARNING!! tainted filename: '%s'." % (filename));
153 status_read, status_write = os.pipe();
154 cmd = "gpgv --status-fd %s --keyring %s --keyring %s %s" \
155 % (status_write, Cnf["Dinstall::PGPKeyring"], Cnf["Dinstall::GPGKeyring"], filename);
156 (output, status, exit_status) = get_status_output(cmd, status_read, status_write);
158 # Process the status-fd output
160 bad = internal_error = "";
161 for line in string.split(status, '\n'):
162 line = string.strip(line);
165 split = string.split(line);
167 internal_error = internal_error + "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line);
169 (gnupg, keyword) = split[:2];
170 if gnupg != "[GNUPG:]":
171 internal_error = internal_error + "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg);
174 if keywords.has_key(keyword) and keyword != "NODATA" and keyword != "SIGEXPIRED":
175 internal_error = internal_error + "found duplicate status token ('%s')." % (keyword);
178 keywords[keyword] = args;
180 # If we failed to parse the status-fd output, let's just whine and bail now
182 reject("internal error while performing signature check on %s." % (filename));
183 reject(internal_error, "");
184 reject("Please report the above errors to the Archive maintainers by replying to this mail.", "");
187 # Now check for obviously bad things in the processed output
188 if keywords.has_key("SIGEXPIRED"):
189 utils.warn("%s: signing key has expired." % (filename));
190 if keywords.has_key("KEYREVOKED"):
191 reject("key used to sign %s has been revoked." % (filename));
193 if keywords.has_key("BADSIG"):
194 reject("bad signature on %s." % (filename));
196 if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
197 reject("failed to check signature on %s." % (filename));
199 if keywords.has_key("NO_PUBKEY"):
200 args = keywords["NO_PUBKEY"];
202 reject("internal error while checking signature on %s." % (filename));
205 fingerprint = args[0];
206 if keywords.has_key("BADARMOR"):
207 reject("ascii armour of signature was corrupt in %s." % (filename));
209 if keywords.has_key("NODATA"):
210 utils.warn("no signature found for %s." % (filename));
212 #reject("no signature found in %s." % (filename));
218 # Next check gpgv exited with a zero return code
219 if exit_status and not keywords.has_key("NO_PUBKEY"):
220 reject("gpgv failed while checking %s." % (filename));
221 if string.strip(status):
222 reject(utils.prefix_multi_line_string(status, " [GPG status-fd output:] "), "");
224 reject(utils.prefix_multi_line_string(output, " [GPG output:] "), "");
227 # Sanity check the good stuff we expect
228 if not keywords.has_key("VALIDSIG"):
229 if not keywords.has_key("NO_PUBKEY"):
230 reject("signature on %s does not appear to be valid [No VALIDSIG]." % (filename));
233 args = keywords["VALIDSIG"];
235 reject("internal error while checking signature on %s." % (filename));
238 fingerprint = args[0];
239 if not keywords.has_key("GOODSIG") and not keywords.has_key("NO_PUBKEY"):
240 reject("signature on %s does not appear to be valid [No GOODSIG]." % (filename));
242 if not keywords.has_key("SIG_ID") and not keywords.has_key("NO_PUBKEY"):
243 reject("signature on %s does not appear to be valid [No SIG_ID]." % (filename));
246 # Finally ensure there's not something we don't recognise
247 known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
248 SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
251 for keyword in keywords.keys():
252 if not known_keywords.has_key(keyword):
253 reject("found unknown status token '%s' from gpgv with args '%s' in %s." % (keyword, repr(keywords[keyword]), filename));
261 #########################################################################################
263 # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
264 def poolify (s, sub):
265 for i in xrange(len(sub)):
266 if sub[i:] == s[0:len(sub)-i]:
267 return s[len(sub)-i:];
270 def update_archives ():
271 projectB.query("DELETE FROM archive")
272 for archive in Cnf.SubTree("Archive").List():
273 SubSec = Cnf.SubTree("Archive::%s" % (archive));
274 projectB.query("INSERT INTO archive (name, origin_server, description) VALUES ('%s', '%s', '%s')"
275 % (archive, SubSec["OriginServer"], SubSec["Description"]));
277 def update_components ():
278 projectB.query("DELETE FROM component")
279 for component in Cnf.SubTree("Component").List():
280 SubSec = Cnf.SubTree("Component::%s" % (component));
281 projectB.query("INSERT INTO component (name, description, meets_dfsg) VALUES ('%s', '%s', '%s')" %
282 (component, SubSec["Description"], SubSec["MeetsDFSG"]));
284 def update_locations ():
285 projectB.query("DELETE FROM location")
286 for location in Cnf.SubTree("Location").List():
287 SubSec = Cnf.SubTree("Location::%s" % (location));
288 archive_id = db_access.get_archive_id(SubSec["archive"]);
289 type = SubSec.Find("type");
290 if type == "legacy-mixed":
291 projectB.query("INSERT INTO location (path, archive, type) VALUES ('%s', %d, '%s')" % (location, archive_id, SubSec["type"]));
293 for component in Cnf.SubTree("Component").List():
294 component_id = db_access.get_component_id(component);
295 projectB.query("INSERT INTO location (path, component, archive, type) VALUES ('%s', %d, %d, '%s')" %
296 (location, component_id, archive_id, SubSec["type"]));
298 def update_architectures ():
299 projectB.query("DELETE FROM architecture")
300 for arch in Cnf.SubTree("Architectures").List():
301 projectB.query("INSERT INTO architecture (arch_string, description) VALUES ('%s', '%s')" % (arch, Cnf["Architectures::%s" % (arch)]))
303 def update_suites ():
304 projectB.query("DELETE FROM suite")
305 for suite in Cnf.SubTree("Suite").List():
306 SubSec = Cnf.SubTree("Suite::%s" %(suite))
307 projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % string.lower(suite));
308 for i in ("Version", "Origin", "Description"):
309 if SubSec.has_key(i):
310 projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (string.lower(i), SubSec[i], string.lower(suite)))
311 for architecture in Cnf.ValueList("Suite::%s::Architectures" % (suite)):
312 architecture_id = db_access.get_architecture_id (architecture);
313 projectB.query("INSERT INTO suite_architectures (suite, architecture) VALUES (currval('suite_id_seq'), %d)" % (architecture_id));
315 def update_override_type():
316 projectB.query("BEGIN WORK");
317 projectB.query("DELETE FROM override_type");
318 for type in Cnf.ValueList("OverrideType"):
319 projectB.query("INSERT INTO override_type (type) VALUES ('%s')" % (type));
320 projectB.query("COMMIT WORK");
322 def update_priority():
323 projectB.query("BEGIN WORK");
324 projectB.query("DELETE FROM priority");
325 for priority in Cnf.SubTree("Priority").List():
326 projectB.query("INSERT INTO priority (priority, level) VALUES ('%s', %s)" % (priority, Cnf["Priority::%s" % (priority)]));
327 projectB.query("COMMIT WORK");
329 ###############################################################################
331 def get_or_set_files_id (filename, size, md5sum, location_id):
332 global files_id_cache, files_id_serial, files_query_cache;
334 cache_key = string.join((filename, size, md5sum, repr(location_id)), '~')
335 if not files_id_cache.has_key(cache_key):
336 files_id_serial = files_id_serial + 1
337 files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
338 files_id_cache[cache_key] = files_id_serial
340 return files_id_cache[cache_key]
342 ###############################################################################
344 def process_sources (location, filename, suite, component, archive, dsc_dir):
345 global source_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, source_id_serial, src_associations_id_serial, dsc_files_id_serial, source_cache_for_binaries, orig_tar_gz_cache, reject_message;
347 suite = string.lower(suite);
348 suite_id = db_access.get_suite_id(suite);
349 if suite == 'stable':
350 testing_id = db_access.get_suite_id("testing");
352 file = utils.open_file (filename);
353 except utils.cant_open_exc:
354 print "WARNING: can't open '%s'" % (filename);
356 Scanner = apt_pkg.ParseTagFile(file);
357 while Scanner.Step() != 0:
358 package = Scanner.Section["package"];
359 version = Scanner.Section["version"];
360 dsc_file = os.path.join(dsc_dir, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
361 install_date = time.strftime("%Y-%m-%d", time.localtime(os.path.getmtime(dsc_file)));
362 fingerprint = check_signature(dsc_file);
363 fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint);
365 utils.fubar("%s: %s" % (dsc_file, reject_message));
366 maintainer = Scanner.Section["maintainer"]
367 maintainer = string.replace(maintainer, "'", "\\'");
368 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
369 directory = Scanner.Section["directory"];
370 location_id = db_access.get_location_id (location, component, archive);
371 if directory[-1:] != "/":
372 directory = directory + '/';
373 directory = poolify (directory, location);
374 if directory != "" and directory[-1:] != "/":
375 directory = directory + '/';
376 no_epoch_version = utils.re_no_epoch.sub('', version);
377 # Add all files referenced by the .dsc to the files table
379 for line in string.split(Scanner.Section["files"],'\n'):
381 (md5sum, size, filename) = string.split(string.strip(line));
382 # Don't duplicate .orig.tar.gz's
383 if filename[-12:] == ".orig.tar.gz":
384 cache_key = "%s~%s~%s" % (filename, size, md5sum);
385 if orig_tar_gz_cache.has_key(cache_key):
386 id = orig_tar_gz_cache[cache_key];
388 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
389 orig_tar_gz_cache[cache_key] = id;
391 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
393 # If this is the .dsc itself; save the ID for later.
394 if filename[-4:] == ".dsc":
396 filename = directory + package + '_' + no_epoch_version + '.dsc'
397 cache_key = "%s~%s" % (package, version);
398 if not source_cache.has_key(cache_key):
399 nasty_key = "%s~%s" % (package, version)
400 source_id_serial = source_id_serial + 1;
401 if not source_cache_for_binaries.has_key(nasty_key):
402 source_cache_for_binaries[nasty_key] = source_id_serial;
403 tmp_source_id = source_id_serial;
404 source_cache[cache_key] = source_id_serial;
405 source_query_cache.write("%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (source_id_serial, package, version, maintainer_id, files_id, install_date, fingerprint_id))
407 dsc_files_id_serial = dsc_files_id_serial + 1;
408 dsc_files_query_cache.write("%d\t%d\t%d\n" % (dsc_files_id_serial, tmp_source_id,id));
410 tmp_source_id = source_cache[cache_key];
412 src_associations_id_serial = src_associations_id_serial + 1;
413 src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, suite_id, tmp_source_id))
414 # populate 'testing' with a mirror of 'stable'
415 if suite == "stable":
416 src_associations_id_serial = src_associations_id_serial + 1;
417 src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, testing_id, tmp_source_id))
421 ###############################################################################
423 def process_packages (location, filename, suite, component, archive):
424 global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache, reject_message;
428 suite = string.lower(suite);
429 suite_id = db_access.get_suite_id(suite);
430 if suite == "stable":
431 testing_id = db_access.get_suite_id("testing");
433 file = utils.open_file (filename);
434 except utils.cant_open_exc:
435 print "WARNING: can't open '%s'" % (filename);
437 Scanner = apt_pkg.ParseTagFile(file);
438 while Scanner.Step() != 0:
439 package = Scanner.Section["package"]
440 version = Scanner.Section["version"]
441 maintainer = Scanner.Section["maintainer"]
442 maintainer = string.replace(maintainer, "'", "\\'")
443 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
444 architecture = Scanner.Section["architecture"]
445 architecture_id = db_access.get_architecture_id (architecture);
446 fingerprint = "NOSIG";
447 fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint);
448 if not Scanner.Section.has_key("source"):
451 source = Scanner.Section["source"]
453 if string.find(source, "(") != -1:
454 m = utils.re_extract_src_version.match(source)
456 source_version = m.group(2)
457 if not source_version:
458 source_version = version
459 filename = Scanner.Section["filename"]
460 location_id = db_access.get_location_id (location, component, archive)
461 filename = poolify (filename, location)
462 if architecture == "all":
463 filename = re_arch_from_filename.sub("binary-all", filename);
464 cache_key = "%s~%s" % (source, source_version);
465 source_id = source_cache_for_binaries.get(cache_key, None);
466 size = Scanner.Section["size"];
467 md5sum = Scanner.Section["md5sum"];
468 files_id = get_or_set_files_id (filename, size, md5sum, location_id);
469 type = "deb"; # FIXME
470 cache_key = "%s~%s~%s~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id);
471 if not arch_all_cache.has_key(cache_key):
472 arch_all_cache[cache_key] = 1;
473 cache_key = "%s~%s~%s~%d" % (package, version, repr(source_id), architecture_id);
474 if not binary_cache.has_key(cache_key):
477 count_bad = count_bad + 1;
479 source_id = repr(source_id);
480 binaries_id_serial = binaries_id_serial + 1;
481 binaries_query_cache.write("%d\t%s\t%s\t%d\t%s\t%d\t%d\t%s\t%s\n" % (binaries_id_serial, package, version, maintainer_id, source_id, architecture_id, files_id, type, fingerprint_id));
482 binary_cache[cache_key] = binaries_id_serial;
483 tmp_binaries_id = binaries_id_serial;
485 tmp_binaries_id = binary_cache[cache_key];
487 bin_associations_id_serial = bin_associations_id_serial + 1;
488 bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, suite_id, tmp_binaries_id));
489 if suite == "stable":
490 bin_associations_id_serial = bin_associations_id_serial + 1;
491 bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, testing_id, tmp_binaries_id));
492 count_total = count_total +1;
496 print "%d binary packages processed; %d with no source match which is %.2f%%" % (count_total, count_bad, (float(count_bad)/count_total)*100);
498 print "%d binary packages processed; 0 with no source match which is 0%%" % (count_total);
500 ###############################################################################
502 def do_sources(location, prefix, suite, component, server):
503 temp_filename = tempfile.mktemp();
504 fd = os.open(temp_filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700);
506 sources = location + prefix + 'Sources.gz';
507 (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (sources, temp_filename));
509 utils.fubar("Gunzip invocation failed!\n%s" % (output), result);
510 print 'Processing '+sources+'...';
511 process_sources (location, temp_filename, suite, component, server, os.path.dirname(sources));
512 os.unlink(temp_filename);
514 ###############################################################################
517 global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
519 Cnf = utils.get_conf();
521 print "Re-Creating DB..."
522 (result, output) = commands.getstatusoutput("psql -f init_pool.sql template1");
524 utils.fubar("psql invocation failed!\n", result);
527 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]));
529 db_access.init (Cnf, projectB);
531 print "Adding static tables from conf file..."
532 projectB.query("BEGIN WORK");
533 update_architectures();
538 update_override_type();
540 projectB.query("COMMIT WORK");
542 files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
543 source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
544 src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
545 dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
546 binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
547 bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
549 projectB.query("BEGIN WORK");
550 # Process Sources files to popoulate `source' and friends
551 for location in Cnf.SubTree("Location").List():
552 SubSec = Cnf.SubTree("Location::%s" % (location));
553 server = SubSec["Archive"];
554 type = Cnf.Find("Location::%s::Type" % (location));
555 if type == "legacy-mixed":
557 suite = Cnf.Find("Location::%s::Suite" % (location));
558 do_sources(location, prefix, suite, "", server);
559 elif type == "legacy":
560 for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
561 for component in Cnf.SubTree("Component").List():
562 prefix = Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/source/'
563 do_sources(location, prefix, suite, component, server);
566 # for component in Cnf.SubTree("Component").List():
567 # prefix = component + '/'
568 # do_sources(location, prefix);
570 utils.fubar("Unknown location type ('%s')." % (type));
572 # Process Packages files to populate `binaries' and friends
574 for location in Cnf.SubTree("Location").List():
575 SubSec = Cnf.SubTree("Location::%s" % (location));
576 server = SubSec["Archive"];
577 type = Cnf.Find("Location::%s::Type" % (location));
578 if type == "legacy-mixed":
579 packages = location + 'Packages';
580 suite = Cnf.Find("Location::%s::Suite" % (location));
581 print 'Processing '+location+'...';
582 process_packages (location, packages, suite, "", server);
583 elif type == "legacy":
584 for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
585 for component in Cnf.SubTree("Component").List():
586 architectures = filter(utils.real_arch,
587 Cnf.ValueList("Suite::%s::Architectures" % (suite)));
588 for architecture in architectures:
589 packages = location + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/binary-' + architecture + '/Packages'
590 print 'Processing '+packages+'...';
591 process_packages (location, packages, suite, component, server);
595 files_query_cache.close();
596 source_query_cache.close();
597 src_associations_query_cache.close();
598 dsc_files_query_cache.close();
599 binaries_query_cache.close();
600 bin_associations_query_cache.close();
601 print "Writing data to `files' table...";
602 projectB.query("COPY files FROM '%s'" % (Cnf["Neve::ExportDir"]+"files"));
603 print "Writing data to `source' table...";
604 projectB.query("COPY source FROM '%s'" % (Cnf["Neve::ExportDir"]+"source"));
605 print "Writing data to `src_associations' table...";
606 projectB.query("COPY src_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"src_associations"));
607 print "Writing data to `dsc_files' table...";
608 projectB.query("COPY dsc_files FROM '%s'" % (Cnf["Neve::ExportDir"]+"dsc_files"));
609 print "Writing data to `binaries' table...";
610 projectB.query("COPY binaries FROM '%s'" % (Cnf["Neve::ExportDir"]+"binaries"));
611 print "Writing data to `bin_associations' table...";
612 projectB.query("COPY bin_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"bin_associations"));
613 print "Committing...";
614 projectB.query("COMMIT WORK");
616 # Add the constraints and otherwise generally clean up the database.
617 # See add_constraints.sql for more details...
619 print "Running add_constraints.sql...";
620 (result, output) = commands.getstatusoutput("psql %s < add_constraints.sql" % (Cnf["DB::Name"]));
623 utils.fubar("psql invocation failed!\n%s" % (output), result);
627 if __name__ == '__main__':