X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=dak%2Fcheck_archive.py;h=80782908864f5a72ed52ee5fbd55b76976fc3d1a;hb=fdf3c42445b4f11f4cd71634dd2b57cb7d7a4f36;hp=2d9321d68c68f1caf5e7d77799c6eece507cb739;hpb=c9e12bb14c839b31d48db95406b4497bb4cdffde;p=dak.git diff --git a/dak/check_archive.py b/dak/check_archive.py index 2d9321d6..80782908 100755 --- a/dak/check_archive.py +++ b/dak/check_archive.py @@ -1,7 +1,11 @@ #!/usr/bin/env python -# Various different sanity checks -# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup +""" Various different sanity checks + +@contact: Debian FTP Master +@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup +@license: GNU General Public License version 2 or later +""" # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,21 +30,28 @@ ################################################################################ -import commands, os, pg, stat, sys, time -import apt_pkg, apt_inst +import commands +import os +import pg +import stat +import sys +import time +import apt_pkg +import apt_inst from daklib import database from daklib import utils +from daklib.regexes import re_issource ################################################################################ -Cnf = None -projectB = None -db_files = {} -waste = 0.0 -excluded = {} +Cnf = None #: Configuration, apt_pkg.Configuration +projectB = None #: database connection, pgobject +db_files = {} #: Cache of filenames as known by the database +waste = 0.0 #: How many bytes are "wasted" by files not referenced in database +excluded = {} #: List of files which are excluded from files check current_file = None future_files = {} -current_time = time.time() +current_time = time.time() #: now() ################################################################################ @@ -52,7 +63,7 @@ Run various sanity checks of the archive and/or database. The following MODEs are available: - md5sums - validate the md5sums stored in the database + checksums - validate the checksums stored in the database files - check files in the database against what's in the archive dsc-syntax - validate the syntax of .dsc files in the archive missing-overrides - check for missing overrides @@ -68,6 +79,16 @@ The following MODEs are available: ################################################################################ def process_dir (unused, dirname, filenames): + """ + Process a directory and output every files name which is not listed already + in the C{filenames} or global C{excluded} dictionaries. + + @type dirname: string + @param dirname: the directory to look at + + @type filenames: dict + @param filenames: Known filenames to ignore + """ global waste, db_files, excluded if dirname.find('/disks-') != -1 or dirname.find('upgrade-') != -1: @@ -85,6 +106,10 @@ def process_dir (unused, dirname, filenames): ################################################################################ def check_files(): + """ + Prepare the dictionary of existing filenames, then walk through the archive + pool/ directory to compare it. + """ global db_files print "Building list of database files..." @@ -120,11 +145,12 @@ def check_files(): ################################################################################ def check_dscs(): + """ + Parse every .dsc file in the archive and check for it's validity. + """ count = 0 suite = 'unstable' for component in Cnf.SubTree("Component").List(): - if component == "mixed": - continue component = component.lower() list_filename = '%s%s_%s_source.list' % (Cnf["Dir::Lists"], suite, component) list_file = utils.open_file(list_filename) @@ -142,6 +168,9 @@ def check_dscs(): ################################################################################ def check_override(): + """ + Check for missing overrides in stable and unstable. + """ for suite in [ "stable", "unstable" ]: print suite print "-"*len(suite) @@ -162,10 +191,13 @@ SELECT DISTINCT s.source FROM source s, src_associations sa ################################################################################ -# Ensure that the source files for any given package is all in one -# directory so that 'apt-get source' works... def check_source_in_one_dir(): + """ + Ensure that the source files for any given package is all in one + directory so that 'apt-get source' works... + """ + # Not the most enterprising method, but hey... broken_count = 0 q = projectB.query("SELECT id FROM source;") @@ -194,16 +226,21 @@ SELECT l.path, f.filename FROM files f, dsc_files df, location l WHERE df.source ################################################################################ -def check_md5sums(): +def check_checksums(): + """ + Validate all files + """ print "Getting file information from database..." - q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id") + q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id") ql = q.getresult() - print "Checking file md5sums & sizes..." + print "Checking file checksums & sizes..." for i in ql: filename = os.path.abspath(i[0] + i[1]) db_md5sum = i[2] - db_size = int(i[3]) + db_sha1sum = i[3] + db_sha256sum = i[4] + db_size = int(i[5]) try: f = utils.open_file(filename) except: @@ -215,13 +252,20 @@ def check_md5sums(): utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum)) if size != db_size: utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size)) + f.seek(0) + sha1sum = apt_pkg.sha1sum(f) + if sha1sum != db_sha1sum: + utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum)) + + f.seek(0) + sha256sum = apt_pkg.sha256sum(f) + if sha256sum != db_sha256sum: + utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum)) print "Done." ################################################################################ # -# Check all files for timestamps in the future; common from hardware -# (e.g. alpha) which have far-future dates as their default dates. def Ent(Kind,Name,Link,Mode,UID,GID,Size,MTime,Major,Minor): global future_files @@ -231,6 +275,11 @@ def Ent(Kind,Name,Link,Mode,UID,GID,Size,MTime,Major,Minor): print "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" % (current_file, Kind,Name,Link,Mode,UID,GID,Size, MTime, Major, Minor) def check_timestamps(): + """ + Check all files for timestamps in the future; common from hardware + (e.g. alpha) which have far-future dates as their default dates. + """ + global current_file q = projectB.query("SELECT l.path, f.filename FROM files f, location l WHERE f.location = l.id AND f.filename ~ '.deb$'") @@ -252,6 +301,9 @@ def check_timestamps(): ################################################################################ def check_missing_tar_gz_in_dsc(): + """ + Ensure each .dsc lists a .tar.gz file + """ count = 0 print "Building list of database files..." @@ -271,7 +323,7 @@ def check_missing_tar_gz_in_dsc(): dsc_files = utils.build_file_list(dsc, is_a_dsc=1) has_tar = 0 for f in dsc_files.keys(): - m = utils.re_issource.match(f) + m = re_issource.match(f) if not m: utils.fubar("%s not recognised as source." % (f)) ftype = m.group(3) @@ -288,10 +340,13 @@ def check_missing_tar_gz_in_dsc(): ################################################################################ def validate_sources(suite, component): + """ + Ensure files mentioned in Sources exist + """ filename = "%s/dists/%s/%s/source/Sources.gz" % (Cnf["Dir::Root"], suite, component) print "Processing %s..." % (filename) # apt_pkg.ParseTagFile needs a real file handle and can't handle a GzipFile instance... - temp_filename = utils.temp_filename() + (fd, temp_filename) = utils.temp_filename() (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename)) if (result != 0): sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output)) @@ -326,11 +381,14 @@ def validate_sources(suite, component): ######################################## def validate_packages(suite, component, architecture): + """ + Ensure files mentioned in Packages exist + """ filename = "%s/dists/%s/%s/binary-%s/Packages.gz" \ % (Cnf["Dir::Root"], suite, component, architecture) print "Processing %s..." % (filename) # apt_pkg.ParseTagFile needs a real file handle and can't handle a GzipFile instance... - temp_filename = utils.temp_filename() + (fd, temp_filename) = utils.temp_filename() (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename)) if (result != 0): sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output)) @@ -347,9 +405,12 @@ def validate_packages(suite, component, architecture): ######################################## def check_indices_files_exist(): + """ + Ensure files mentioned in Packages & Sources exist + """ for suite in [ "stable", "testing", "unstable" ]: for component in Cnf.ValueList("Suite::%s::Components" % (suite)): - architectures = Cnf.ValueList("Suite::%s::Architectures" % (suite)) + architectures = database.get_suite_architectures(suite) for arch in [ i.lower() for i in architectures ]: if arch == "source": validate_sources(suite, component) @@ -361,6 +422,9 @@ def check_indices_files_exist(): ################################################################################ def check_files_not_symlinks(): + """ + Check files in the database aren't symlinks + """ print "Building list of database files... ", before = time.time() q = projectB.query("SELECT l.path, f.filename, f.id FROM files f, location l WHERE f.location = l.id") @@ -395,6 +459,7 @@ def chk_bd_process_dir (unused, dirname, filenames): ################################################################################ def check_build_depends(): + """ Validate build-dependencies of .dsc files in the archive """ os.path.walk(Cnf["Dir::Root"], chk_bd_process_dir, None) ################################################################################ @@ -425,8 +490,8 @@ def main (): projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) database.init(Cnf, projectB) - if mode == "md5sums": - check_md5sums() + if mode == "checksums": + check_checksums() elif mode == "files": check_files() elif mode == "dsc-syntax":