From d16e2f9f99f018d3672e77f86a3adf264844590f Mon Sep 17 00:00:00 2001 From: Philipp Kern Date: Tue, 2 Sep 2008 20:58:48 +0200 Subject: [PATCH] Revert "revert all my stupid commits, we'll try this again later when we have a test server" This reverts commit aa83ebb15882823869b109d370e24ef3efd730f8. Conflicts: ChangeLog --- ChangeLog | 19 ++++++- dak/check_archive.py | 28 +++++++--- dak/process_accepted.py | 8 ++- dak/process_unchecked.py | 114 +++----------------------------------- daklib/database.py | 12 ++-- daklib/utils.py | 116 ++++++++++++++++++++++++++++++++++++++- setup/init_pool.sql | 2 + 7 files changed, 175 insertions(+), 124 deletions(-) diff --git a/ChangeLog b/ChangeLog index 36294a1b..a7f4a3af 100644 --- a/ChangeLog +++ b/ChangeLog @@ -36,6 +36,24 @@ * config/debian/cron.dinstall: We dont want i18n to ever fail dinstall, add a || true +2008-08-15 Mark Hymers + + * daklib/utils.py: Actually import a module before using it. + + * daklib/utils.py: Actually check we have basedict before trying to + use it. + + * dak/process_accepted.py, dak/process_unchecked.py, + daklib/database.py: Don't change get_files_id to use sha1sum and + sha256sum. + + * setup/init_pool.sql, dak/check_archive.py, dak/decode_dot_dak.py, + dak/process_accepted.py, dak/process_unchecked.py, daklib/database.py, + daklib/queue.py, daklib/utils.py: Attempt to add sha1sum and + sha256sums into the database. The complication is that we have to + keep backwards compatibility with the .dak files already in existance. + Note that import_archive hasn't been hacked to deal with this yet. + 2008-08-14 Joerg Jaspert * config/debian/cron.dinstall: Added the i18n retrieval of package @@ -99,7 +117,6 @@ 2008-08-07 Stephen Gran * Drop use of exec to eval variable interpolation - 2008-08-07 Joerg Jaspert * dak/process_accepted.py (install): Error out with the new diff --git a/dak/check_archive.py b/dak/check_archive.py index 2d9321d6..93cc832c 100755 --- a/dak/check_archive.py +++ b/dak/check_archive.py @@ -52,7 +52,7 @@ Run various sanity checks of the archive and/or database. The following MODEs are available: - md5sums - validate the md5sums stored in the database + checksums - validate the checksums stored in the database files - check files in the database against what's in the archive dsc-syntax - validate the syntax of .dsc files in the archive missing-overrides - check for missing overrides @@ -194,16 +194,18 @@ SELECT l.path, f.filename FROM files f, dsc_files df, location l WHERE df.source ################################################################################ -def check_md5sums(): +def check_checksums(): print "Getting file information from database..." - q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id") + q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id") ql = q.getresult() - print "Checking file md5sums & sizes..." + print "Checking file checksums & sizes..." for i in ql: filename = os.path.abspath(i[0] + i[1]) db_md5sum = i[2] - db_size = int(i[3]) + db_sha1sum = i[3] + db_sha256sum = i[4] + db_size = int(i[5]) try: f = utils.open_file(filename) except: @@ -215,6 +217,18 @@ def check_md5sums(): utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum)) if size != db_size: utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size)) + # Until the main database is filled, we need to not spit 500,000 warnings + # every time we scan the archive. Yet another hack (TM) which can go away + # once this is all working + if db_sha1sum is not None and db_sha1sum != '': + sha1sum = apt_pkg.sha1sum(f) + if sha1sum != db_sha1sum: + utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum)) + + if db_sha256sum is not None and db_sha256sum != '': + sha256sum = apt_pkg.sha256sum(f) + if sha256sum != db_sha256sum: + utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum)) print "Done." @@ -425,8 +439,8 @@ def main (): projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) database.init(Cnf, projectB) - if mode == "md5sums": - check_md5sums() + if mode == "checksums": + check_checksums() elif mode == "files": check_files() elif mode == "dsc-syntax": diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 0db17bad..5e09243f 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -274,6 +274,10 @@ def install (): # Begin a transaction; if we bomb out anywhere between here and the COMMIT WORK below, the DB will not be changed. projectB.query("BEGIN WORK") + # Check the hashes are all present: HACK: Can go away once all dak files + # are known to be newer than the shasum changes + utils.ensure_hashes(changes, dsc, files, dsc_files) + # Add the .dsc file to the DB for file in files.keys(): if files[file]["type"] == "dsc": @@ -307,7 +311,7 @@ def install (): # files id is stored in dsc_files by check_dsc(). files_id = dsc_files[dsc_file].get("files id", None) if files_id == None: - files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) + files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id) # FIXME: needs to check for -1/-2 and or handle exception if files_id == None: files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) @@ -349,7 +353,7 @@ def install (): if not files[file].has_key("location id") or not files[file]["location id"]: files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i()) if not files[file].has_key("files id") or not files[file]["files id"]: - files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["location id"]) + files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], files[file]["location id"]) source_id = database.get_source_id (source, source_version) if source_id: projectB.query("INSERT INTO binaries (package, version, maintainer, source, architecture, file, type, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, %d, '%s', %d)" diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index 04afb7b3..123fd9f3 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -630,11 +630,11 @@ def check_files(): # Check the md5sum & size against existing files (if any) files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"]) - files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["sha1sum"], files[f]["sha256sum"], files[f]["location id"]) + files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) if files_id == -1: reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f)) elif files_id == -2: - reject("md5sum, sha1sum, sha256sum and/or size mismatch on existing copy of %s." % (f)) + reject("md5sum and/or size mismatch on existing copy of %s." % (f)) files[f]["files id"] = files_id # Check for packages that have moved from one component to another @@ -919,111 +919,13 @@ def check_urgency (): ################################################################################ def check_hashes (): - # Make sure we recognise the format of the Files: field - format = changes.get("format", "0.0").split(".",1) - if len(format) == 2: - format = int(format[0]), int(format[1]) - else: - format = int(float(format[0])), 0 - - check_hash(".changes", files, "md5sum", apt_pkg.md5sum) - check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum) - - # (hashname, function, originate) - # If originate is true, we have to calculate it because - # the changes file version is too early for it to be - # included - hashes = [("sha1", apt_pkg.sha1sum, False), - ("sha256", apt_pkg.sha256sum, False)] - - if format <= (1,8): - hashes["sha1"] = True - hashes["sha256"] = True - - for x in changes: - if x.startswith("checksum-"): - h = x.split("-",1)[1] - if h not in dict(hashes): - reject("Unsupported checksum field in .changes" % (h)) - - for x in dsc: - if x.startswith("checksum-"): - h = x.split("-",1)[1] - if h not in dict(hashes): - reject("Unsupported checksum field in .dsc" % (h)) - - for h,f,o in hashes: - try: - fs = utils.build_file_list(changes, 0, "checksums-%s" % h, h) - if o: - create_hash(fs, h, f, files) - else: - check_hash(".changes %s" % (h), fs, h, f, files) - except NoFilesFieldError: - reject("No Checksums-%s: field in .changes" % (h)) - except UnknownFormatError, format: - reject("%s: unknown format of .changes" % (format)) - except ParseChangesError, line: - reject("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line)) - - if "source" not in changes["architecture"]: continue - - try: - fs = utils.build_file_list(dsc, 1, "checksums-%s" % h, h) - if o: - create_hash(fs, h, f, dsc_files) - else: - check_hash(".dsc %s" % (h), fs, h, f, dsc_files) - except UnknownFormatError, format: - reject("%s: unknown format of .dsc" % (format)) - except NoFilesFieldError: - reject("No Checksums-%s: field in .dsc" % (h)) - except ParseChangesError, line: - reject("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line)) - -################################################################################ - -def create_hash (lfiles, key, testfn, basedict = None): - for f in lfiles.keys(): - try: - file_handle = utils.open_file(f) - except CantOpenError: - continue - - # Check hash - basedict[f]['%ssum' % key] = testfn(file_handle) - file_handle.close() - - -################################################################################ - -def check_hash (where, lfiles, key, testfn, basedict = None): - if basedict: - for f in basedict.keys(): - if f not in lfiles: - reject("%s: no %s checksum" % (f, key)) - - for f in lfiles.keys(): - if basedict and f not in basedict: - reject("%s: extraneous entry in %s checksums" % (f, key)) - - try: - file_handle = utils.open_file(f) - except CantOpenError: - continue + utils.check_hash(".changes", files, "md5sum", apt_pkg.md5sum) + utils.check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum) - # Check hash - if testfn(file_handle) != lfiles[f][key]: - reject("%s: %s check failed." % (f, key)) - file_handle.close() - # Store the hashes for later use - basedict[f]['%ssum' % key] = lfiles[f][key] - # Check size - actual_size = os.stat(f)[stat.ST_SIZE] - size = int(lfiles[f]["size"]) - if size != actual_size: - reject("%s: actual file size (%s) does not match size (%s) in %s" - % (f, actual_size, size, where)) + # This is stupid API, but it'll have to do for now until + # we actually have proper abstraction + for m in utils.ensure_hashes(changes, dsc, files, dsc_files): + reject(m) ################################################################################ diff --git a/daklib/database.py b/daklib/database.py index cad427ac..9185d0a3 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -317,7 +317,7 @@ def get_or_set_fingerprint_id (fingerprint): ################################################################################ -def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id): +def get_files_id (filename, size, md5sum, location_id): global files_id_cache cache_key = "%s_%d" % (filename, location_id) @@ -326,7 +326,7 @@ def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id): return files_id_cache[cache_key] size = int(size) - q = projectB.query("SELECT id, size, md5sum, sha1sum, sha256sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id)) + q = projectB.query("SELECT id, size, md5sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id)) ql = q.getresult() if ql: if len(ql) != 1: @@ -334,9 +334,7 @@ def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id): ql = ql[0] orig_size = int(ql[1]) orig_md5sum = ql[2] - orig_sha1sum = ql[3] - orig_sha256sum = ql[4] - if orig_size != size or orig_md5sum != md5sum or orig_sha1sum != sha1sum or orig_sha256sum != sha256sum: + if orig_size != size or orig_md5sum != md5sum: return -2 files_id_cache[cache_key] = ql[0] return files_id_cache[cache_key] @@ -365,9 +363,9 @@ def get_or_set_queue_id (queue): def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id): global files_id_cache - projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum location_id)) + projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id)) - return get_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id) + return get_files_id (filename, size, md5sum, location_id) ### currval has issues with postgresql 7.1.3 when the table is big ### it was taking ~3 seconds to return on auric which is very Not diff --git a/daklib/utils.py b/daklib/utils.py index ec82782f..75845244 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -22,7 +22,7 @@ ################################################################################ import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \ - sys, tempfile, traceback + sys, tempfile, traceback, stat import apt_pkg import database from dak_exceptions import * @@ -55,6 +55,10 @@ default_apt_config = "/etc/dak/apt.conf" alias_cache = None key_uid_email_cache = {} +# (hashname, function, earliest_changes_version) +known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)), + ("sha256", apt_pkg.sha256sum, (1, 8))] + ################################################################################ def open_file(filename, mode='r'): @@ -207,6 +211,116 @@ The rules for (signing_rules == 1)-mode are: ################################################################################ +def create_hash (lfiles, key, testfn, basedict = None): + rejmsg = [] + for f in lfiles.keys(): + try: + file_handle = open_file(f) + except CantOpenError: + rejmsg.append("Could not open file %s for checksumming" % (f)) + + # Check hash + if basedict and basedict.has_key(f): + basedict[f]['%ssum' % key] = testfn(file_handle) + file_handle.close() + + return rejmsg + +################################################################################ + +def check_hash (where, lfiles, key, testfn, basedict = None): + rejmsg = [] + if basedict: + for f in basedict.keys(): + if f not in lfiles: + rejmsg.append("%s: no %s checksum" % (f, key)) + + for f in lfiles.keys(): + if basedict and f not in basedict: + rejmsg.append("%s: extraneous entry in %s checksums" % (f, key)) + + try: + file_handle = open_file(f) + except CantOpenError: + continue + + # Check hash + if testfn(file_handle) != lfiles[f][key]: + rejmsg.append("%s: %s check failed." % (f, key)) + file_handle.close() + # Store the hashes for later use + if basedict: + basedict[f]['%ssum' % key] = lfiles[f][key] + # Check size + actual_size = os.stat(f)[stat.ST_SIZE] + size = int(lfiles[f]["size"]) + if size != actual_size: + rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s" + % (f, actual_size, size, where)) + + return rejmsg + +################################################################################ + +def ensure_hashes(changes, dsc, files, dsc_files): + # Make sure we recognise the format of the Files: field + format = changes.get("format", "0.0").split(".",1) + if len(format) == 2: + format = int(format[0]), int(format[1]) + else: + format = int(float(format[0])), 0 + + rejmsg = [] + for x in changes: + if x.startswith("checksum-"): + h = x.split("-",1)[1] + if h not in dict(known_hashes): + rejmsg.append("Unsupported checksum field in .changes" % (h)) + + for x in dsc: + if x.startswith("checksum-"): + h = x.split("-",1)[1] + if h not in dict(known_hashes): + rejmsg.append("Unsupported checksum field in .dsc" % (h)) + + # We have to calculate the hash if we have an earlier changes version than + # the hash appears in rather than require it exist in the changes file + # I hate backwards compatibility + for h,f,v in known_hashes: + try: + if format < v: + for m in create_hash(files, h, f, files): + rejmsg.append(m) + else: + for m in check_hash(".changes %s" % (h), files, h, f, files): + rejmsg.append(m) + except NoFilesFieldError: + rejmsg.append("No Checksums-%s: field in .changes" % (h)) + except UnknownFormatError, format: + rejmsg.append("%s: unknown format of .changes" % (format)) + except ParseChangesError, line: + rejmsg.append("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line)) + + if "source" not in changes["architecture"]: continue + + try: + if format < v: + for m in create_hash(dsc_files, h, f, dsc_files): + rejmsg.append(m) + else: + for m in check_hash(".dsc %s" % (h), dsc_files, h, f, dsc_files): + rejmsg.append(m) + except UnknownFormatError, format: + rejmsg.append("%s: unknown format of .dsc" % (format)) + except NoFilesFieldError: + rejmsg.append("No Checksums-%s: field in .dsc" % (h)) + except ParseChangesError, line: + rejmsg.append("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line)) + + return rejmsg + +################################################################################ + # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): diff --git a/setup/init_pool.sql b/setup/init_pool.sql index 7a6e2a49..1e363940 100644 --- a/setup/init_pool.sql +++ b/setup/init_pool.sql @@ -70,6 +70,8 @@ CREATE TABLE files ( md5sum TEXT NOT NULL, location INT4 NOT NULL, -- REFERENCES location last_used TIMESTAMP, + sha1sum TEXT NOT NULL, + sha256sum TEXT NOT NULL, unique (filename, location) ); -- 2.39.5