X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=dak%2Fcheck_archive.py;h=2eb450bd434b9e6736da1222f16b967d478da4de;hb=250fb82d468dcd488cad753b7c624e5a6436a358;hp=1e434e34428c6117a6f9aab002187b79224ee226;hpb=7aaaad3135c9164390af5897925660842368660b;p=dak.git diff --git a/dak/check_archive.py b/dak/check_archive.py index 1e434e34..2eb450bd 100755 --- a/dak/check_archive.py +++ b/dak/check_archive.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Various different sanity checks +""" Various different sanity checks """ # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup # This program is free software; you can redistribute it and/or modify @@ -26,10 +26,11 @@ ################################################################################ -import commands, os, pg, stat, string, sys, time +import commands, os, pg, stat, sys, time import apt_pkg, apt_inst -import dak.lib.database as database -import dak.lib.utils as utils +from daklib import database +from daklib import utils +from daklib.regexes import re_issource ################################################################################ @@ -52,7 +53,7 @@ Run various sanity checks of the archive and/or database. The following MODEs are available: - md5sums - validate the md5sums stored in the database + checksums - validate the checksums stored in the database files - check files in the database against what's in the archive dsc-syntax - validate the syntax of .dsc files in the archive missing-overrides - check for missing overrides @@ -80,7 +81,7 @@ def process_dir (unused, dirname, filenames): filename = filename.replace('potato-proposed-updates', 'proposed-updates') if os.path.isfile(filename) and not os.path.islink(filename) and not db_files.has_key(filename) and not excluded.has_key(filename): waste += os.stat(filename)[stat.ST_SIZE] - print filename + print "%s" % (filename) ################################################################################ @@ -88,24 +89,29 @@ def check_files(): global db_files print "Building list of database files..." - q = projectB.query("SELECT l.path, f.filename FROM files f, location l WHERE f.location = l.id") + q = projectB.query("SELECT l.path, f.filename, f.last_used FROM files f, location l WHERE f.location = l.id ORDER BY l.path, f.filename") ql = q.getresult() + print "Missing files:" db_files.clear() for i in ql: - filename = os.path.abspath(i[0] + i[1]) + filename = os.path.abspath(i[0] + i[1]) db_files[filename] = "" if os.access(filename, os.R_OK) == 0: - utils.warn("'%s' doesn't exist." % (filename)) + if i[2]: + print "(last used: %s) %s" % (i[2], filename) + else: + print "%s" % (filename) + filename = Cnf["Dir::Override"]+'override.unreferenced' if os.path.exists(filename): - file = utils.open_file(filename) - for filename in file.readlines(): + f = utils.open_file(filename) + for filename in f.readlines(): filename = filename[:-1] excluded[filename] = "" - print "Checking against existent files..." + print "Existent files not in db:" os.path.walk(Cnf["Dir::Root"]+'pool/', process_dir, None) @@ -124,11 +130,11 @@ def check_dscs(): list_filename = '%s%s_%s_source.list' % (Cnf["Dir::Lists"], suite, component) list_file = utils.open_file(list_filename) for line in list_file.readlines(): - file = line[:-1] + f = line[:-1] try: - utils.parse_changes(file, signing_rules=1) - except utils.invalid_dsc_format_exc, line: - utils.warn("syntax error in .dsc file '%s', line %s." % (file, line)) + utils.parse_changes(f, signing_rules=1) + except InvalidDscError, line: + utils.warn("syntax error in .dsc file '%s', line %s." % (f, line)) count += 1 if count: @@ -189,28 +195,42 @@ SELECT l.path, f.filename FROM files f, dsc_files df, location l WHERE df.source ################################################################################ -def check_md5sums(): +def check_checksums(): print "Getting file information from database..." - q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id") + q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id") ql = q.getresult() - print "Checking file md5sums & sizes..." + print "Checking file checksums & sizes..." for i in ql: - filename = os.path.abspath(i[0] + i[1]) + filename = os.path.abspath(i[0] + i[1]) db_md5sum = i[2] - db_size = int(i[3]) + db_sha1sum = i[3] + db_sha256sum = i[4] + db_size = int(i[5]) try: - file = utils.open_file(filename) + f = utils.open_file(filename) except: utils.warn("can't open '%s'." % (filename)) continue - md5sum = apt_pkg.md5sum(file) + md5sum = apt_pkg.md5sum(f) size = os.stat(filename)[stat.ST_SIZE] if md5sum != db_md5sum: utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum)) if size != db_size: utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size)) + # Check the sha1sum + f.seek(0) + sha1sum = apt_pkg.sha1sum(f) + if sha1sum != db_sha1sum: + utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum)) + + # Check the sha256sum + f.seek(0) + sha256sum = apt_pkg.sha256sum(f) + if sha256sum != db_sha256sum: + utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum)) + print "Done." ################################################################################ @@ -233,14 +253,14 @@ def check_timestamps(): db_files.clear() count = 0 for i in ql: - filename = os.path.abspath(i[0] + i[1]) + filename = os.path.abspath(i[0] + i[1]) if os.access(filename, os.R_OK): - file = utils.open_file(filename) + f = utils.open_file(filename) current_file = filename sys.stderr.write("Processing %s.\n" % (filename)) - apt_inst.debExtract(file,Ent,"control.tar.gz") - file.seek(0) - apt_inst.debExtract(file,Ent,"data.tar.gz") + apt_inst.debExtract(f, Ent, "control.tar.gz") + f.seek(0) + apt_inst.debExtract(f, Ent, "data.tar.gz") count += 1 print "Checked %d files (out of %d)." % (count, len(db_files.keys())) @@ -265,15 +285,15 @@ def check_missing_tar_gz_in_dsc(): utils.fubar("error parsing .dsc file '%s'." % (filename)) dsc_files = utils.build_file_list(dsc, is_a_dsc=1) has_tar = 0 - for file in dsc_files.keys(): - m = utils.re_issource.match(file) + for f in dsc_files.keys(): + m = re_issource.match(f) if not m: - utils.fubar("%s not recognised as source." % (file)) - type = m.group(3) - if type == "orig.tar.gz" or type == "tar.gz": + utils.fubar("%s not recognised as source." % (f)) + ftype = m.group(3) + if ftype == "orig.tar.gz" or ftype == "tar.gz": has_tar = 1 if not has_tar: - utils.warn("%s has no .tar.gz in the .dsc file." % (file)) + utils.warn("%s has no .tar.gz in the .dsc file." % (f)) count += 1 if count: @@ -286,7 +306,7 @@ def validate_sources(suite, component): filename = "%s/dists/%s/%s/source/Sources.gz" % (Cnf["Dir::Root"], suite, component) print "Processing %s..." % (filename) # apt_pkg.ParseTagFile needs a real file handle and can't handle a GzipFile instance... - temp_filename = utils.temp_filename() + (fd, temp_filename) = utils.temp_filename() (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename)) if (result != 0): sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output)) @@ -325,7 +345,7 @@ def validate_packages(suite, component, architecture): % (Cnf["Dir::Root"], suite, component, architecture) print "Processing %s..." % (filename) # apt_pkg.ParseTagFile needs a real file handle and can't handle a GzipFile instance... - temp_filename = utils.temp_filename() + (fd, temp_filename) = utils.temp_filename() (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename)) if (result != 0): sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output)) @@ -345,7 +365,7 @@ def check_indices_files_exist(): for suite in [ "stable", "testing", "unstable" ]: for component in Cnf.ValueList("Suite::%s::Components" % (suite)): architectures = Cnf.ValueList("Suite::%s::Architectures" % (suite)) - for arch in map(string.lower, architectures): + for arch in [ i.lower() for i in architectures ]: if arch == "source": validate_sources(suite, component) elif arch == "all": @@ -362,39 +382,13 @@ def check_files_not_symlinks(): print "done. (%d seconds)" % (int(time.time()-before)) q_files = q.getresult() -# locations = {} -# q = projectB.query("SELECT l.path, c.name, l.id FROM location l, component c WHERE l.component = c.id") -# for i in q.getresult(): -# path = os.path.normpath(i[0] + i[1]) -# locations[path] = (i[0], i[2]) - -# q = projectB.query("BEGIN WORK") for i in q_files: - filename = os.path.normpath(i[0] + i[1]) -# file_id = i[2] + filename = os.path.normpath(i[0] + i[1]) if os.access(filename, os.R_OK) == 0: utils.warn("%s: doesn't exist." % (filename)) else: if os.path.islink(filename): utils.warn("%s: is a symlink." % (filename)) - # You probably don't want to use the rest of this... -# print "%s: is a symlink." % (filename) -# dest = os.readlink(filename) -# if not os.path.isabs(dest): -# dest = os.path.normpath(os.path.join(os.path.dirname(filename), dest)) -# print "--> %s" % (dest) -# # Determine suitable location ID -# # [in what must be the suckiest way possible?] -# location_id = None -# for path in locations.keys(): -# if dest.find(path) == 0: -# (location, location_id) = locations[path] -# break -# if not location_id: -# utils.fubar("Can't find location for %s (%s)." % (dest, filename)) -# new_filename = dest.replace(location, "") -# q = projectB.query("UPDATE files SET filename = '%s', location = %s WHERE id = %s" % (new_filename, location_id, file_id)) -# q = projectB.query("COMMIT WORK") ################################################################################ @@ -426,14 +420,14 @@ def main (): Cnf = utils.get_conf() Arguments = [('h',"help","Check-Archive::Options::Help")] for i in [ "help" ]: - if not Cnf.has_key("Check-Archive::Options::%s" % (i)): - Cnf["Check-Archive::Options::%s" % (i)] = "" + if not Cnf.has_key("Check-Archive::Options::%s" % (i)): + Cnf["Check-Archive::Options::%s" % (i)] = "" args = apt_pkg.ParseCommandLine(Cnf, Arguments, sys.argv) Options = Cnf.SubTree("Check-Archive::Options") if Options["Help"]: - usage() + usage() if len(args) < 1: utils.warn("dak check-archive requires at least one argument") @@ -446,8 +440,8 @@ def main (): projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) database.init(Cnf, projectB) - if mode == "md5sums": - check_md5sums() + if mode == "checksums": + check_checksums() elif mode == "files": check_files() elif mode == "dsc-syntax":