From 1e0cd14af2395605fb890648daae2ac9d7208ba0 Mon Sep 17 00:00:00 2001 From: Mark Hymers Date: Thu, 7 Aug 2008 13:38:38 +0000 Subject: [PATCH] first attempt at bodging in support for sha1sum and sha256sum Signed-off-by: Mark Hymers --- dak/decode_dot_dak.py | 6 ++--- dak/process_accepted.py | 12 ++++++---- dak/process_unchecked.py | 49 ++++++++++++++++++++++++++++++++-------- daklib/database.py | 14 +++++++----- daklib/queue.py | 7 +++--- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/dak/decode_dot_dak.py b/dak/decode_dot_dak.py index 00bf8e7d..7ea342bd 100644 --- a/dak/decode_dot_dak.py +++ b/dak/decode_dot_dak.py @@ -101,9 +101,9 @@ def main(): for f in files.keys(): print " %s:" % (f) for i in [ "package", "version", "architecture", "type", "size", - "md5sum", "component", "location id", "source package", - "source version", "maintainer", "dbtype", "files id", - "new", "section", "priority", "pool name" ]: + "md5sum", "sha1sum", "sha256sum", "component", "location id", + "source package", "source version", "maintainer", "dbtype", + "files id", "new", "section", "priority", "pool name" ]: if files[f].has_key(i): print " %s: %s" % (i.capitalize(), files[f][i]) del files[f][i] diff --git a/dak/process_accepted.py b/dak/process_accepted.py index 94c2ea30..0db17bad 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -291,7 +291,7 @@ def install (): dsc_component = files[file]["component"] dsc_location_id = files[file]["location id"] if not files[file].has_key("files id") or not files[file]["files id"]: - files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], dsc_location_id) + files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) projectB.query("INSERT INTO source (source, version, maintainer, changedby, file, install_date, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, '%s', %s)" % (package, version, maintainer_id, changedby_id, files[file]["files id"], install_date, fingerprint_id)) @@ -307,10 +307,10 @@ def install (): # files id is stored in dsc_files by check_dsc(). files_id = dsc_files[dsc_file].get("files id", None) if files_id == None: - files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id) + files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) # FIXME: needs to check for -1/-2 and or handle exception if files_id == None: - files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id) + files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id) projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files_id)) # Add the src_uploaders to the DB @@ -388,16 +388,18 @@ def install (): # if changes["architecture"].has_key("source") and orig_tar_id and \ orig_tar_location != "legacy" and orig_tar_location != dsc_location_id: - q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id)) + q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum, f.sha1sum, f.sha256sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id)) ql = q.getresult()[0] old_filename = ql[0] + ql[1] file_size = ql[2] file_md5sum = ql[3] + file_sha1sum = ql[4] + file_sha256sum = ql[5] new_filename = utils.poolify(changes["source"], dsc_component) + os.path.basename(old_filename) new_files_id = database.get_files_id(new_filename, file_size, file_md5sum, dsc_location_id) if new_files_id == None: utils.copy(old_filename, Cnf["Dir::Pool"] + new_filename) - new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, dsc_location_id) + new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, file_sha1sum, file_sha256sum, dsc_location_id) projectB.query("UPDATE dsc_files SET file = %s WHERE source = %s AND file = %s" % (new_files_id, source_id, orig_tar_id)) # Install the files into the pool diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py index f2efe8c0..04afb7b3 100755 --- a/dak/process_unchecked.py +++ b/dak/process_unchecked.py @@ -630,11 +630,11 @@ def check_files(): # Check the md5sum & size against existing files (if any) files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"]) - files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"]) + files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["sha1sum"], files[f]["sha256sum"], files[f]["location id"]) if files_id == -1: reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f)) elif files_id == -2: - reject("md5sum and/or size mismatch on existing copy of %s." % (f)) + reject("md5sum, sha1sum, sha256sum and/or size mismatch on existing copy of %s." % (f)) files[f]["files id"] = files_id # Check for packages that have moved from one component to another @@ -777,6 +777,8 @@ def check_dsc(): files[orig_tar_gz] = {} files[orig_tar_gz]["size"] = os.stat(orig_tar_gz)[stat.ST_SIZE] files[orig_tar_gz]["md5sum"] = dsc_files[orig_tar_gz]["md5sum"] + files[orig_tar_gz]["sha1sum"] = dsc_files[orig_tar_gz]["sha1sum"] + files[orig_tar_gz]["sha256sum"] = dsc_files[orig_tar_gz]["sha256sum"] files[orig_tar_gz]["section"] = files[dsc_filename]["section"] files[orig_tar_gz]["priority"] = files[dsc_filename]["priority"] files[orig_tar_gz]["component"] = files[dsc_filename]["component"] @@ -927,11 +929,16 @@ def check_hashes (): check_hash(".changes", files, "md5sum", apt_pkg.md5sum) check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum) - if format >= (1,8): - hashes = [("sha1", apt_pkg.sha1sum), - ("sha256", apt_pkg.sha256sum)] - else: - hashes = [] + # (hashname, function, originate) + # If originate is true, we have to calculate it because + # the changes file version is too early for it to be + # included + hashes = [("sha1", apt_pkg.sha1sum, False), + ("sha256", apt_pkg.sha256sum, False)] + + if format <= (1,8): + hashes["sha1"] = True + hashes["sha256"] = True for x in changes: if x.startswith("checksum-"): @@ -945,10 +952,13 @@ def check_hashes (): if h not in dict(hashes): reject("Unsupported checksum field in .dsc" % (h)) - for h,f in hashes: + for h,f,o in hashes: try: fs = utils.build_file_list(changes, 0, "checksums-%s" % h, h) - check_hash(".changes %s" % (h), fs, h, f, files) + if o: + create_hash(fs, h, f, files) + else: + check_hash(".changes %s" % (h), fs, h, f, files) except NoFilesFieldError: reject("No Checksums-%s: field in .changes" % (h)) except UnknownFormatError, format: @@ -960,7 +970,10 @@ def check_hashes (): try: fs = utils.build_file_list(dsc, 1, "checksums-%s" % h, h) - check_hash(".dsc %s" % (h), fs, h, f, dsc_files) + if o: + create_hash(fs, h, f, dsc_files) + else: + check_hash(".dsc %s" % (h), fs, h, f, dsc_files) except UnknownFormatError, format: reject("%s: unknown format of .dsc" % (format)) except NoFilesFieldError: @@ -970,6 +983,20 @@ def check_hashes (): ################################################################################ +def create_hash (lfiles, key, testfn, basedict = None): + for f in lfiles.keys(): + try: + file_handle = utils.open_file(f) + except CantOpenError: + continue + + # Check hash + basedict[f]['%ssum' % key] = testfn(file_handle) + file_handle.close() + + +################################################################################ + def check_hash (where, lfiles, key, testfn, basedict = None): if basedict: for f in basedict.keys(): @@ -989,6 +1016,8 @@ def check_hash (where, lfiles, key, testfn, basedict = None): if testfn(file_handle) != lfiles[f][key]: reject("%s: %s check failed." % (f, key)) file_handle.close() + # Store the hashes for later use + basedict[f]['%ssum' % key] = lfiles[f][key] # Check size actual_size = os.stat(f)[stat.ST_SIZE] size = int(lfiles[f]["size"]) diff --git a/daklib/database.py b/daklib/database.py index 5c362604..cad427ac 100755 --- a/daklib/database.py +++ b/daklib/database.py @@ -317,7 +317,7 @@ def get_or_set_fingerprint_id (fingerprint): ################################################################################ -def get_files_id (filename, size, md5sum, location_id): +def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id): global files_id_cache cache_key = "%s_%d" % (filename, location_id) @@ -326,7 +326,7 @@ def get_files_id (filename, size, md5sum, location_id): return files_id_cache[cache_key] size = int(size) - q = projectB.query("SELECT id, size, md5sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id)) + q = projectB.query("SELECT id, size, md5sum, sha1sum, sha256sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id)) ql = q.getresult() if ql: if len(ql) != 1: @@ -334,7 +334,9 @@ def get_files_id (filename, size, md5sum, location_id): ql = ql[0] orig_size = int(ql[1]) orig_md5sum = ql[2] - if orig_size != size or orig_md5sum != md5sum: + orig_sha1sum = ql[3] + orig_sha256sum = ql[4] + if orig_size != size or orig_md5sum != md5sum or orig_sha1sum != sha1sum or orig_sha256sum != sha256sum: return -2 files_id_cache[cache_key] = ql[0] return files_id_cache[cache_key] @@ -360,12 +362,12 @@ def get_or_set_queue_id (queue): ################################################################################ -def set_files_id (filename, size, md5sum, location_id): +def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id): global files_id_cache - projectB.query("INSERT INTO files (filename, size, md5sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, location_id)) + projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum location_id)) - return get_files_id (filename, size, md5sum, location_id) + return get_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id) ### currval has issues with postgresql 7.1.3 when the table is big ### it was taking ~3 seconds to return on auric which is very Not diff --git a/daklib/queue.py b/daklib/queue.py index 08b8b5c6..40960b90 100755 --- a/daklib/queue.py +++ b/daklib/queue.py @@ -236,9 +236,10 @@ class Upload: for file_entry in files.keys(): d_files[file_entry] = {} for i in [ "package", "version", "architecture", "type", "size", - "md5sum", "component", "location id", "source package", - "source version", "maintainer", "dbtype", "files id", - "new", "section", "priority", "othercomponents", + "md5sum", "sha1sum", "sha256sum", "component", + "location id", "source package", "source version", + "maintainer", "dbtype", "files id", "new", + "section", "priority", "othercomponents", "pool name", "original component" ]: if files[file_entry].has_key(i): d_files[file_entry][i] = files[file_entry][i] -- 2.39.2