4 # Copyright (C) 2000, 2001, 2002, 2003 James Troup <james@nocrew.org>
5 # $Id: neve,v 1.18 2003-02-07 14:53:42 troup Exp $
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 ###############################################################################
23 # 04:36|<aj> elmo: you're making me waste 5 seconds per architecture!!!!!! YOU BASTARD!!!!!
25 ###############################################################################
27 # This code is a horrible mess for two reasons:
29 # (o) For Debian's usage, it's doing something like 160k INSERTs,
30 # even on auric, that makes the program unusable unless we get
31 # involed in sorts of silly optimization games (local dicts to avoid
32 # redundant SELECTS, using COPY FROM rather than INSERTS etc.)
34 # (o) It's very site specific, because I don't expect to use this
35 # script again in a hurry, and I don't want to spend any more time
36 # on it than absolutely necessary.
38 ###############################################################################
40 import commands, os, pg, re, sys, tempfile, time;
42 import db_access, utils;
44 ###############################################################################
46 re_arch_from_filename = re.compile(r"binary-[^/]+")
48 ###############################################################################
56 location_path_cache = {};
60 src_associations_id_serial = 0;
61 dsc_files_id_serial = 0;
62 files_query_cache = None;
63 source_query_cache = None;
64 src_associations_query_cache = None;
65 dsc_files_query_cache = None;
66 orig_tar_gz_cache = {};
68 binaries_id_serial = 0;
69 binaries_query_cache = None;
70 bin_associations_id_serial = 0;
71 bin_associations_query_cache = None;
73 source_cache_for_binaries = {};
76 ################################################################################
78 def usage(exit_code=0):
80 Initializes a projectB database from an existing archive
82 -a, --action actually perform the initalization
83 -h, --help show this help and exit."""
86 ###############################################################################
88 def reject (str, prefix="Rejected: "):
89 global reject_message;
91 reject_message += prefix + str + "\n";
93 ###############################################################################
95 def check_signature (filename):
96 if not utils.re_taint_free.match(os.path.basename(filename)):
97 reject("!!WARNING!! tainted filename: '%s'." % (filename));
100 status_read, status_write = os.pipe();
101 cmd = "gpgv --status-fd %s --keyring %s --keyring %s %s" \
102 % (status_write, Cnf["Dinstall::PGPKeyring"], Cnf["Dinstall::GPGKeyring"], filename);
103 (output, status, exit_status) = utils.gpgv_get_status_output(cmd, status_read, status_write);
105 # Process the status-fd output
107 bad = internal_error = "";
108 for line in status.split('\n'):
112 split = line.split();
114 internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line);
116 (gnupg, keyword) = split[:2];
117 if gnupg != "[GNUPG:]":
118 internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg);
121 if keywords.has_key(keyword) and keyword != "NODATA" and keyword != "SIGEXPIRED":
122 internal_error += "found duplicate status token ('%s').\n" % (keyword);
125 keywords[keyword] = args;
127 # If we failed to parse the status-fd output, let's just whine and bail now
129 reject("internal error while performing signature check on %s." % (filename));
130 reject(internal_error, "");
131 reject("Please report the above errors to the Archive maintainers by replying to this mail.", "");
134 # Now check for obviously bad things in the processed output
135 if keywords.has_key("SIGEXPIRED"):
136 utils.warn("%s: signing key has expired." % (filename));
137 if keywords.has_key("KEYREVOKED"):
138 reject("key used to sign %s has been revoked." % (filename));
140 if keywords.has_key("BADSIG"):
141 reject("bad signature on %s." % (filename));
143 if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
144 reject("failed to check signature on %s." % (filename));
146 if keywords.has_key("NO_PUBKEY"):
147 args = keywords["NO_PUBKEY"];
149 reject("internal error while checking signature on %s." % (filename));
152 fingerprint = args[0];
153 if keywords.has_key("BADARMOR"):
154 reject("ascii armour of signature was corrupt in %s." % (filename));
156 if keywords.has_key("NODATA"):
157 utils.warn("no signature found for %s." % (filename));
159 #reject("no signature found in %s." % (filename));
165 # Next check gpgv exited with a zero return code
166 if exit_status and not keywords.has_key("NO_PUBKEY"):
167 reject("gpgv failed while checking %s." % (filename));
169 reject(utils.prefix_multi_line_string(status, " [GPG status-fd output:] "), "");
171 reject(utils.prefix_multi_line_string(output, " [GPG output:] "), "");
174 # Sanity check the good stuff we expect
175 if not keywords.has_key("VALIDSIG"):
176 if not keywords.has_key("NO_PUBKEY"):
177 reject("signature on %s does not appear to be valid [No VALIDSIG]." % (filename));
180 args = keywords["VALIDSIG"];
182 reject("internal error while checking signature on %s." % (filename));
185 fingerprint = args[0];
186 if not keywords.has_key("GOODSIG") and not keywords.has_key("NO_PUBKEY"):
187 reject("signature on %s does not appear to be valid [No GOODSIG]." % (filename));
189 if not keywords.has_key("SIG_ID") and not keywords.has_key("NO_PUBKEY"):
190 reject("signature on %s does not appear to be valid [No SIG_ID]." % (filename));
193 # Finally ensure there's not something we don't recognise
194 known_keywords = utils.Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
195 SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
198 for keyword in keywords.keys():
199 if not known_keywords.has_key(keyword):
200 reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], filename));
208 ################################################################################
210 # Prepares a filename or directory (s) to be file.filename by stripping any part of the location (sub) from it.
211 def poolify (s, sub):
212 for i in xrange(len(sub)):
213 if sub[i:] == s[0:len(sub)-i]:
214 return s[len(sub)-i:];
217 def update_archives ():
218 projectB.query("DELETE FROM archive")
219 for archive in Cnf.SubTree("Archive").List():
220 SubSec = Cnf.SubTree("Archive::%s" % (archive));
221 projectB.query("INSERT INTO archive (name, origin_server, description) VALUES ('%s', '%s', '%s')"
222 % (archive, SubSec["OriginServer"], SubSec["Description"]));
224 def update_components ():
225 projectB.query("DELETE FROM component")
226 for component in Cnf.SubTree("Component").List():
227 SubSec = Cnf.SubTree("Component::%s" % (component));
228 projectB.query("INSERT INTO component (name, description, meets_dfsg) VALUES ('%s', '%s', '%s')" %
229 (component, SubSec["Description"], SubSec["MeetsDFSG"]));
231 def update_locations ():
232 projectB.query("DELETE FROM location")
233 for location in Cnf.SubTree("Location").List():
234 SubSec = Cnf.SubTree("Location::%s" % (location));
235 archive_id = db_access.get_archive_id(SubSec["archive"]);
236 type = SubSec.Find("type");
237 if type == "legacy-mixed":
238 projectB.query("INSERT INTO location (path, archive, type) VALUES ('%s', %d, '%s')" % (location, archive_id, SubSec["type"]));
240 for component in Cnf.SubTree("Component").List():
241 component_id = db_access.get_component_id(component);
242 projectB.query("INSERT INTO location (path, component, archive, type) VALUES ('%s', %d, %d, '%s')" %
243 (location, component_id, archive_id, SubSec["type"]));
245 def update_architectures ():
246 projectB.query("DELETE FROM architecture")
247 for arch in Cnf.SubTree("Architectures").List():
248 projectB.query("INSERT INTO architecture (arch_string, description) VALUES ('%s', '%s')" % (arch, Cnf["Architectures::%s" % (arch)]))
250 def update_suites ():
251 projectB.query("DELETE FROM suite")
252 for suite in Cnf.SubTree("Suite").List():
253 SubSec = Cnf.SubTree("Suite::%s" %(suite))
254 projectB.query("INSERT INTO suite (suite_name) VALUES ('%s')" % suite.lower());
255 for i in ("Version", "Origin", "Description"):
256 if SubSec.has_key(i):
257 projectB.query("UPDATE suite SET %s = '%s' WHERE suite_name = '%s'" % (i.lower(), SubSec[i], suite.lower()))
258 for architecture in Cnf.ValueList("Suite::%s::Architectures" % (suite)):
259 architecture_id = db_access.get_architecture_id (architecture);
260 projectB.query("INSERT INTO suite_architectures (suite, architecture) VALUES (currval('suite_id_seq'), %d)" % (architecture_id));
262 def update_override_type():
263 projectB.query("DELETE FROM override_type");
264 for type in Cnf.ValueList("OverrideType"):
265 projectB.query("INSERT INTO override_type (type) VALUES ('%s')" % (type));
267 def update_priority():
268 projectB.query("DELETE FROM priority");
269 for priority in Cnf.SubTree("Priority").List():
270 projectB.query("INSERT INTO priority (priority, level) VALUES ('%s', %s)" % (priority, Cnf["Priority::%s" % (priority)]));
272 def update_section():
273 projectB.query("DELETE FROM section");
274 for component in Cnf.SubTree("Component").List():
275 if Cnf["Natalie::ComponentPosition"] == "prefix":
277 if component != 'main':
278 prefix = component + '/';
283 component = component.replace("non-US/", "");
284 if component != 'main':
285 suffix = '/' + component;
288 for section in Cnf.ValueList("Section"):
289 projectB.query("INSERT INTO section (section) VALUES ('%s%s%s')" % (prefix, section, suffix));
291 def get_location_path(directory):
292 global location_path_cache;
294 if location_path_cache.has_key(directory):
295 return location_path_cache[directory];
297 q = projectB.query("SELECT DISTINCT path FROM location WHERE path ~ '%s'" % (directory));
299 path = q.getresult()[0][0];
301 utils.fubar("[neve] get_location_path(): Couldn't get path for %s" % (directory));
302 location_path_cache[directory] = path;
305 ################################################################################
307 def get_or_set_files_id (filename, size, md5sum, location_id):
308 global files_id_cache, files_id_serial, files_query_cache;
310 cache_key = "~".join((filename, size, md5sum, repr(location_id)));
311 if not files_id_cache.has_key(cache_key):
313 files_query_cache.write("%d\t%s\t%s\t%s\t%d\n" % (files_id_serial, filename, size, md5sum, location_id));
314 files_id_cache[cache_key] = files_id_serial
316 return files_id_cache[cache_key]
318 ###############################################################################
320 def process_sources (filename, suite, component, archive):
321 global source_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, source_id_serial, src_associations_id_serial, dsc_files_id_serial, source_cache_for_binaries, orig_tar_gz_cache, reject_message;
323 suite = suite.lower();
324 suite_id = db_access.get_suite_id(suite);
326 file = utils.open_file (filename);
327 except utils.cant_open_exc:
328 utils.warn("can't open '%s'" % (filename));
330 Scanner = apt_pkg.ParseTagFile(file);
331 while Scanner.Step() != 0:
332 package = Scanner.Section["package"];
333 version = Scanner.Section["version"];
334 directory = Scanner.Section["directory"];
335 dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
336 # Sometimes the Directory path is a lie; check in the pool
337 if not os.path.exists(dsc_file):
338 if directory.split('/')[0] == "dists":
339 directory = Cnf["Dir::PoolRoot"] + utils.poolify(package, component);
340 dsc_file = os.path.join(Cnf["Dir::Root"], directory, "%s_%s.dsc" % (package, utils.re_no_epoch.sub('', version)));
341 if not os.path.exists(dsc_file):
342 utils.fubar("%s not found." % (dsc_file));
343 install_date = time.strftime("%Y-%m-%d", time.localtime(os.path.getmtime(dsc_file)));
344 fingerprint = check_signature(dsc_file);
345 fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint);
347 utils.fubar("%s: %s" % (dsc_file, reject_message));
348 maintainer = Scanner.Section["maintainer"]
349 maintainer = maintainer.replace("'", "\\'");
350 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
351 location = get_location_path(directory.split('/')[0]);
352 location_id = db_access.get_location_id (location, component, archive);
353 if not directory.endswith("/"):
355 directory = poolify (directory, location);
356 if directory != "" and not directory.endswith("/"):
358 no_epoch_version = utils.re_no_epoch.sub('', version);
359 # Add all files referenced by the .dsc to the files table
361 for line in Scanner.Section["files"].split('\n'):
363 (md5sum, size, filename) = line.strip().split();
364 # Don't duplicate .orig.tar.gz's
365 if filename.endswith(".orig.tar.gz"):
366 cache_key = "%s~%s~%s" % (filename, size, md5sum);
367 if orig_tar_gz_cache.has_key(cache_key):
368 id = orig_tar_gz_cache[cache_key];
370 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
371 orig_tar_gz_cache[cache_key] = id;
373 id = get_or_set_files_id (directory + filename, size, md5sum, location_id);
375 # If this is the .dsc itself; save the ID for later.
376 if filename.endswith(".dsc"):
378 filename = directory + package + '_' + no_epoch_version + '.dsc'
379 cache_key = "%s~%s" % (package, version);
380 if not source_cache.has_key(cache_key):
381 nasty_key = "%s~%s" % (package, version)
382 source_id_serial += 1;
383 if not source_cache_for_binaries.has_key(nasty_key):
384 source_cache_for_binaries[nasty_key] = source_id_serial;
385 tmp_source_id = source_id_serial;
386 source_cache[cache_key] = source_id_serial;
387 source_query_cache.write("%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (source_id_serial, package, version, maintainer_id, files_id, install_date, fingerprint_id))
389 dsc_files_id_serial += 1;
390 dsc_files_query_cache.write("%d\t%d\t%d\n" % (dsc_files_id_serial, tmp_source_id,id));
392 tmp_source_id = source_cache[cache_key];
394 src_associations_id_serial += 1;
395 src_associations_query_cache.write("%d\t%d\t%d\n" % (src_associations_id_serial, suite_id, tmp_source_id))
399 ###############################################################################
401 def process_packages (filename, suite, component, archive):
402 global arch_all_cache, binary_cache, binaries_id_serial, binaries_query_cache, bin_associations_id_serial, bin_associations_query_cache, reject_message;
406 suite = suite.lower();
407 suite_id = db_access.get_suite_id(suite);
409 file = utils.open_file (filename);
410 except utils.cant_open_exc:
411 utils.warn("can't open '%s'" % (filename));
413 Scanner = apt_pkg.ParseTagFile(file);
414 while Scanner.Step() != 0:
415 package = Scanner.Section["package"]
416 version = Scanner.Section["version"]
417 maintainer = Scanner.Section["maintainer"]
418 maintainer = maintainer.replace("'", "\\'")
419 maintainer_id = db_access.get_or_set_maintainer_id(maintainer);
420 architecture = Scanner.Section["architecture"]
421 architecture_id = db_access.get_architecture_id (architecture);
422 fingerprint = "NOSIG";
423 fingerprint_id = db_access.get_or_set_fingerprint_id(fingerprint);
424 if not Scanner.Section.has_key("source"):
427 source = Scanner.Section["source"]
429 if source.find("(") != -1:
430 m = utils.re_extract_src_version.match(source)
432 source_version = m.group(2)
433 if not source_version:
434 source_version = version
435 filename = Scanner.Section["filename"]
436 location = get_location_path(filename.split('/')[0]);
437 location_id = db_access.get_location_id (location, component, archive)
438 filename = poolify (filename, location)
439 if architecture == "all":
440 filename = re_arch_from_filename.sub("binary-all", filename);
441 cache_key = "%s~%s" % (source, source_version);
442 source_id = source_cache_for_binaries.get(cache_key, None);
443 size = Scanner.Section["size"];
444 md5sum = Scanner.Section["md5sum"];
445 files_id = get_or_set_files_id (filename, size, md5sum, location_id);
446 type = "deb"; # FIXME
447 cache_key = "%s~%s~%s~%d~%d~%d~%d" % (package, version, repr(source_id), architecture_id, location_id, files_id, suite_id);
448 if not arch_all_cache.has_key(cache_key):
449 arch_all_cache[cache_key] = 1;
450 cache_key = "%s~%s~%s~%d" % (package, version, repr(source_id), architecture_id);
451 if not binary_cache.has_key(cache_key):
456 source_id = repr(source_id);
457 binaries_id_serial += 1;
458 binaries_query_cache.write("%d\t%s\t%s\t%d\t%s\t%d\t%d\t%s\t%s\n" % (binaries_id_serial, package, version, maintainer_id, source_id, architecture_id, files_id, type, fingerprint_id));
459 binary_cache[cache_key] = binaries_id_serial;
460 tmp_binaries_id = binaries_id_serial;
462 tmp_binaries_id = binary_cache[cache_key];
464 bin_associations_id_serial += 1;
465 bin_associations_query_cache.write("%d\t%d\t%d\n" % (bin_associations_id_serial, suite_id, tmp_binaries_id));
470 print "%d binary packages processed; %d with no source match which is %.2f%%" % (count_total, count_bad, (float(count_bad)/count_total)*100);
472 print "%d binary packages processed; 0 with no source match which is 0%%" % (count_total);
474 ###############################################################################
476 def do_sources(sources, suite, component, server):
477 temp_filename = tempfile.mktemp();
478 fd = os.open(temp_filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700);
480 (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (sources, temp_filename));
482 utils.fubar("Gunzip invocation failed!\n%s" % (output), result);
483 print 'Processing '+sources+'...';
484 process_sources (temp_filename, suite, component, server);
485 os.unlink(temp_filename);
487 ###############################################################################
490 global Cnf, projectB, query_cache, files_query_cache, source_query_cache, src_associations_query_cache, dsc_files_query_cache, bin_associations_query_cache, binaries_query_cache;
492 Cnf = utils.get_conf();
493 Arguments = [('a', "action", "Neve::Options::Action"),
494 ('h', "help", "Neve::Options::Help")];
495 for i in [ "action", "help" ]:
496 if not Cnf.has_key("Neve::Options::%s" % (i)):
497 Cnf["Neve::Options::%s" % (i)] = "";
499 apt_pkg.ParseCommandLine(Cnf, Arguments, sys.argv);
501 Options = Cnf.SubTree("Neve::Options")
505 if not Options["Action"]:
506 utils.warn("""no -a/--action given; not doing anything.
507 Please read the documentation before running this script.
511 print "Re-Creating DB..."
512 (result, output) = commands.getstatusoutput("psql -f init_pool.sql template1");
514 utils.fubar("psql invocation failed!\n", result);
517 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]));
519 db_access.init (Cnf, projectB);
521 print "Adding static tables from conf file..."
522 projectB.query("BEGIN WORK");
523 update_architectures();
528 update_override_type();
531 projectB.query("COMMIT WORK");
533 files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"files","w");
534 source_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"source","w");
535 src_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"src_associations","w");
536 dsc_files_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"dsc_files","w");
537 binaries_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"binaries","w");
538 bin_associations_query_cache = utils.open_file(Cnf["Neve::ExportDir"]+"bin_associations","w");
540 projectB.query("BEGIN WORK");
541 # Process Sources files to popoulate `source' and friends
542 for location in Cnf.SubTree("Location").List():
543 SubSec = Cnf.SubTree("Location::%s" % (location));
544 server = SubSec["Archive"];
545 type = Cnf.Find("Location::%s::Type" % (location));
546 if type == "legacy-mixed":
547 sources = location + 'Sources.gz';
548 suite = Cnf.Find("Location::%s::Suite" % (location));
549 do_sources(sources, suite, "", server);
550 elif type == "legacy" or type == "pool":
551 for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
552 for component in Cnf.SubTree("Component").List():
553 sources = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/source/' + 'Sources.gz';
554 do_sources(sources, suite, component, server);
556 utils.fubar("Unknown location type ('%s')." % (type));
558 # Process Packages files to populate `binaries' and friends
560 for location in Cnf.SubTree("Location").List():
561 SubSec = Cnf.SubTree("Location::%s" % (location));
562 server = SubSec["Archive"];
563 type = Cnf.Find("Location::%s::Type" % (location));
564 if type == "legacy-mixed":
565 packages = location + 'Packages';
566 suite = Cnf.Find("Location::%s::Suite" % (location));
567 print 'Processing '+location+'...';
568 process_packages (packages, suite, "", server);
569 elif type == "legacy" or type == "pool":
570 for suite in Cnf.ValueList("Location::%s::Suites" % (location)):
571 for component in Cnf.SubTree("Component").List():
572 architectures = filter(utils.real_arch,
573 Cnf.ValueList("Suite::%s::Architectures" % (suite)));
574 for architecture in architectures:
575 packages = Cnf["Dir::Root"] + "dists/" + Cnf["Suite::%s::CodeName" % (suite)] + '/' + component + '/binary-' + architecture + '/Packages'
576 print 'Processing '+packages+'...';
577 process_packages (packages, suite, component, server);
579 files_query_cache.close();
580 source_query_cache.close();
581 src_associations_query_cache.close();
582 dsc_files_query_cache.close();
583 binaries_query_cache.close();
584 bin_associations_query_cache.close();
585 print "Writing data to `files' table...";
586 projectB.query("COPY files FROM '%s'" % (Cnf["Neve::ExportDir"]+"files"));
587 print "Writing data to `source' table...";
588 projectB.query("COPY source FROM '%s'" % (Cnf["Neve::ExportDir"]+"source"));
589 print "Writing data to `src_associations' table...";
590 projectB.query("COPY src_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"src_associations"));
591 print "Writing data to `dsc_files' table...";
592 projectB.query("COPY dsc_files FROM '%s'" % (Cnf["Neve::ExportDir"]+"dsc_files"));
593 print "Writing data to `binaries' table...";
594 projectB.query("COPY binaries FROM '%s'" % (Cnf["Neve::ExportDir"]+"binaries"));
595 print "Writing data to `bin_associations' table...";
596 projectB.query("COPY bin_associations FROM '%s'" % (Cnf["Neve::ExportDir"]+"bin_associations"));
597 print "Committing...";
598 projectB.query("COMMIT WORK");
600 # Add the constraints and otherwise generally clean up the database.
601 # See add_constraints.sql for more details...
603 print "Running add_constraints.sql...";
604 (result, output) = commands.getstatusoutput("psql %s < add_constraints.sql" % (Cnf["DB::Name"]));
607 utils.fubar("psql invocation failed!\n%s" % (output), result);
611 ################################################################################
614 utils.try_with_debug(do_da_do_da);
616 ################################################################################
618 if __name__ == '__main__':