+2008-08-15 Mark Hymers <mhy@debian.org>
+
+ * setup/init_pool.sql, dak/check_archive.py, dak/decode_dot_dak.py,
+ dak/process_accepted.py, dak/process_unchecked.py, daklib/database.py,
+ daklib/queue.py, daklib/utils.py: Attempt to add sha1sum and
+ sha256sums into the database. The complication is that we have to
+ keep backwards compatibility with the .dak files already in existance.
+ Note that import_archive hasn't been hacked to deal with this yet.
+
2008-08-14 Joerg Jaspert <joerg@debian.org>
* config/debian/cron.dinstall: Added the i18n retrieval of package
2008-08-07 Stephen Gran <sgran@debian.org>
* Drop use of exec to eval variable interpolation
-
2008-08-07 Joerg Jaspert <joerg@debian.org>
* dak/process_accepted.py (install): Error out with the new
The following MODEs are available:
- md5sums - validate the md5sums stored in the database
+ checksums - validate the checksums stored in the database
files - check files in the database against what's in the archive
dsc-syntax - validate the syntax of .dsc files in the archive
missing-overrides - check for missing overrides
################################################################################
-def check_md5sums():
+def check_checksums():
print "Getting file information from database..."
- q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id")
+ q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id")
ql = q.getresult()
- print "Checking file md5sums & sizes..."
+ print "Checking file checksums & sizes..."
for i in ql:
filename = os.path.abspath(i[0] + i[1])
db_md5sum = i[2]
- db_size = int(i[3])
+ db_sha1sum = i[3]
+ db_sha256sum = i[4]
+ db_size = int(i[5])
try:
f = utils.open_file(filename)
except:
utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum))
if size != db_size:
utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size))
+ # Until the main database is filled, we need to not spit 500,000 warnings
+ # every time we scan the archive. Yet another hack (TM) which can go away
+ # once this is all working
+ if db_sha1sum is not None and db_sha1sum != '':
+ sha1sum = apt_pkg.sha1sum(f)
+ if sha1sum != db_sha1sum:
+ utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum))
+
+ if db_sha256sum is not None and db_sha256sum != '':
+ sha256sum = apt_pkg.sha256sum(f)
+ if sha256sum != db_sha256sum:
+ utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum))
print "Done."
projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
database.init(Cnf, projectB)
- if mode == "md5sums":
- check_md5sums()
+ if mode == "checksums":
+ check_checksums()
elif mode == "files":
check_files()
elif mode == "dsc-syntax":
for f in files.keys():
print " %s:" % (f)
for i in [ "package", "version", "architecture", "type", "size",
- "md5sum", "component", "location id", "source package",
- "source version", "maintainer", "dbtype", "files id",
- "new", "section", "priority", "pool name" ]:
+ "md5sum", "sha1sum", "sha256sum", "component", "location id",
+ "source package", "source version", "maintainer", "dbtype",
+ "files id", "new", "section", "priority", "pool name" ]:
if files[f].has_key(i):
print " %s: %s" % (i.capitalize(), files[f][i])
del files[f][i]
# Begin a transaction; if we bomb out anywhere between here and the COMMIT WORK below, the DB will not be changed.
projectB.query("BEGIN WORK")
+ # Check the hashes are all present: HACK: Can go away once all dak files
+ # are known to be newer than the shasum changes
+ utils.ensure_hashes(Upload)
+
# Add the .dsc file to the DB
for file in files.keys():
if files[file]["type"] == "dsc":
dsc_component = files[file]["component"]
dsc_location_id = files[file]["location id"]
if not files[file].has_key("files id") or not files[file]["files id"]:
- files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], dsc_location_id)
+ files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
projectB.query("INSERT INTO source (source, version, maintainer, changedby, file, install_date, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, '%s', %s)"
% (package, version, maintainer_id, changedby_id, files[file]["files id"], install_date, fingerprint_id))
# files id is stored in dsc_files by check_dsc().
files_id = dsc_files[dsc_file].get("files id", None)
if files_id == None:
- files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id)
+ files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
# FIXME: needs to check for -1/-2 and or handle exception
if files_id == None:
- files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id)
+ files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files_id))
# Add the src_uploaders to the DB
#
if changes["architecture"].has_key("source") and orig_tar_id and \
orig_tar_location != "legacy" and orig_tar_location != dsc_location_id:
- q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id))
+ q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum, f.sha1sum, f.sha256sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id))
ql = q.getresult()[0]
old_filename = ql[0] + ql[1]
file_size = ql[2]
file_md5sum = ql[3]
+ file_sha1sum = ql[4]
+ file_sha256sum = ql[5]
new_filename = utils.poolify(changes["source"], dsc_component) + os.path.basename(old_filename)
new_files_id = database.get_files_id(new_filename, file_size, file_md5sum, dsc_location_id)
if new_files_id == None:
utils.copy(old_filename, Cnf["Dir::Pool"] + new_filename)
- new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, dsc_location_id)
+ new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, file_sha1sum, file_sha256sum, dsc_location_id)
projectB.query("UPDATE dsc_files SET file = %s WHERE source = %s AND file = %s" % (new_files_id, source_id, orig_tar_id))
# Install the files into the pool
# Check the md5sum & size against existing files (if any)
files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"])
- files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"])
+ files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["sha1sum"], files[f]["sha256sum"], files[f]["location id"])
if files_id == -1:
reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f))
elif files_id == -2:
- reject("md5sum and/or size mismatch on existing copy of %s." % (f))
+ reject("md5sum, sha1sum, sha256sum and/or size mismatch on existing copy of %s." % (f))
files[f]["files id"] = files_id
# Check for packages that have moved from one component to another
files[orig_tar_gz] = {}
files[orig_tar_gz]["size"] = os.stat(orig_tar_gz)[stat.ST_SIZE]
files[orig_tar_gz]["md5sum"] = dsc_files[orig_tar_gz]["md5sum"]
+ files[orig_tar_gz]["sha1sum"] = dsc_files[orig_tar_gz]["sha1sum"]
+ files[orig_tar_gz]["sha256sum"] = dsc_files[orig_tar_gz]["sha256sum"]
files[orig_tar_gz]["section"] = files[dsc_filename]["section"]
files[orig_tar_gz]["priority"] = files[dsc_filename]["priority"]
files[orig_tar_gz]["component"] = files[dsc_filename]["component"]
else:
format = int(float(format[0])), 0
- check_hash(".changes", files, "md5sum", apt_pkg.md5sum)
- check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum)
+ utils.check_hash(".changes", files, "md5sum", apt_pkg.md5sum)
+ utils.check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum)
- if format >= (1,8):
- hashes = [("sha1", apt_pkg.sha1sum),
- ("sha256", apt_pkg.sha256sum)]
- else:
- hashes = []
-
- for x in changes:
- if x.startswith("checksum-"):
- h = x.split("-",1)[1]
- if h not in dict(hashes):
- reject("Unsupported checksum field in .changes" % (h))
-
- for x in dsc:
- if x.startswith("checksum-"):
- h = x.split("-",1)[1]
- if h not in dict(hashes):
- reject("Unsupported checksum field in .dsc" % (h))
-
- for h,f in hashes:
- try:
- fs = utils.build_file_list(changes, 0, "checksums-%s" % h, h)
- check_hash(".changes %s" % (h), fs, h, f, files)
- except NoFilesFieldError:
- reject("No Checksums-%s: field in .changes" % (h))
- except UnknownFormatError, format:
- reject("%s: unknown format of .changes" % (format))
- except ParseChangesError, line:
- reject("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line))
-
- if "source" not in changes["architecture"]: continue
-
- try:
- fs = utils.build_file_list(dsc, 1, "checksums-%s" % h, h)
- check_hash(".dsc %s" % (h), fs, h, f, dsc_files)
- except UnknownFormatError, format:
- reject("%s: unknown format of .dsc" % (format))
- except NoFilesFieldError:
- reject("No Checksums-%s: field in .dsc" % (h))
- except ParseChangesError, line:
- reject("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line))
-
-################################################################################
-
-def check_hash (where, lfiles, key, testfn, basedict = None):
- if basedict:
- for f in basedict.keys():
- if f not in lfiles:
- reject("%s: no %s checksum" % (f, key))
-
- for f in lfiles.keys():
- if basedict and f not in basedict:
- reject("%s: extraneous entry in %s checksums" % (f, key))
-
- try:
- file_handle = utils.open_file(f)
- except CantOpenError:
- continue
-
- # Check hash
- if testfn(file_handle) != lfiles[f][key]:
- reject("%s: %s check failed." % (f, key))
- file_handle.close()
- # Check size
- actual_size = os.stat(f)[stat.ST_SIZE]
- size = int(lfiles[f]["size"])
- if size != actual_size:
- reject("%s: actual file size (%s) does not match size (%s) in %s"
- % (f, actual_size, size, where))
+ # This is stupid API, but it'll have to do for now until
+ # we actually have proper abstraction
+ for m in utils.ensure_hashes(Upload):
+ reject(m)
################################################################################
################################################################################
-def get_files_id (filename, size, md5sum, location_id):
+def get_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
global files_id_cache
cache_key = "%s_%d" % (filename, location_id)
return files_id_cache[cache_key]
size = int(size)
- q = projectB.query("SELECT id, size, md5sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id))
+ q = projectB.query("SELECT id, size, md5sum, sha1sum, sha256sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id))
ql = q.getresult()
if ql:
if len(ql) != 1:
ql = ql[0]
orig_size = int(ql[1])
orig_md5sum = ql[2]
- if orig_size != size or orig_md5sum != md5sum:
+ orig_sha1sum = ql[3]
+ orig_sha256sum = ql[4]
+ if orig_size != size or orig_md5sum != md5sum or orig_sha1sum != sha1sum or orig_sha256sum != sha256sum:
return -2
files_id_cache[cache_key] = ql[0]
return files_id_cache[cache_key]
################################################################################
-def set_files_id (filename, size, md5sum, location_id):
+def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
global files_id_cache
- projectB.query("INSERT INTO files (filename, size, md5sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, location_id))
+ projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
- return get_files_id (filename, size, md5sum, location_id)
+ return get_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id)
### currval has issues with postgresql 7.1.3 when the table is big
### it was taking ~3 seconds to return on auric which is very Not
for file_entry in files.keys():
d_files[file_entry] = {}
for i in [ "package", "version", "architecture", "type", "size",
- "md5sum", "component", "location id", "source package",
- "source version", "maintainer", "dbtype", "files id",
- "new", "section", "priority", "othercomponents",
+ "md5sum", "sha1sum", "sha256sum", "component",
+ "location id", "source package", "source version",
+ "maintainer", "dbtype", "files id", "new",
+ "section", "priority", "othercomponents",
"pool name", "original component" ]:
if files[file_entry].has_key(i):
d_files[file_entry][i] = files[file_entry][i]
alias_cache = None
key_uid_email_cache = {}
+# (hashname, function, earliest_changes_version)
+known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
+ ("sha256", apt_pkg.sha256sum, (1, 8))]
+
################################################################################
def open_file(filename, mode='r'):
################################################################################
+def create_hash (lfiles, key, testfn, basedict = None):
+ rejmsg = []
+ for f in lfiles.keys():
+ try:
+ file_handle = open_file(f)
+ except CantOpenError:
+ rejmsg.append("Could not open file %s for checksumming" % (f))
+
+ # Check hash
+ basedict[f]['%ssum' % key] = testfn(file_handle)
+ file_handle.close()
+
+ return rejmsg
+
+################################################################################
+
+def check_hash (where, lfiles, key, testfn, basedict = None):
+ rejmsg = []
+ if basedict:
+ for f in basedict.keys():
+ if f not in lfiles:
+ rejmsg.append("%s: no %s checksum" % (f, key))
+
+ for f in lfiles.keys():
+ if basedict and f not in basedict:
+ rejmsg.append("%s: extraneous entry in %s checksums" % (f, key))
+
+ try:
+ file_handle = open_file(f)
+ except CantOpenError:
+ continue
+
+ # Check hash
+ if testfn(file_handle) != lfiles[f][key]:
+ rejmsg.append("%s: %s check failed." % (f, key))
+ file_handle.close()
+ # Store the hashes for later use
+ basedict[f]['%ssum' % key] = lfiles[f][key]
+ # Check size
+ actual_size = os.stat(f)[stat.ST_SIZE]
+ size = int(lfiles[f]["size"])
+ if size != actual_size:
+ rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
+ % (f, actual_size, size, where))
+
+ return rejmsg
+
+################################################################################
+
+def ensure_hashes(Upload):
+ rejmsg = []
+ for x in Upload.changes:
+ if x.startswith("checksum-"):
+ h = x.split("-",1)[1]
+ if h not in dict(known_hashes):
+ rejmsg.append("Unsupported checksum field in .changes" % (h))
+
+ for x in Upload.dsc:
+ if x.startswith("checksum-"):
+ h = x.split("-",1)[1]
+ if h not in dict(known_hashes):
+ rejmsg.append("Unsupported checksum field in .dsc" % (h))
+
+ # We have to calculate the hash if we have an earlier changes version than
+ # the hash appears in rather than require it exist in the changes file
+ # I hate backwards compatibility
+ for h,f,v in known_hashes:
+ try:
+ fs = build_file_list(Upload.changes, 0, "checksums-%s" % h, h)
+ if format < v:
+ for m in create_hash(fs, h, f, Upload.files):
+ rejmsg.append(m)
+ else:
+ for m in check_hash(".changes %s" % (h), fs, h, f, Upload.files):
+ rejmsg.append(m)
+ except NoFilesFieldError:
+ rejmsg.append("No Checksums-%s: field in .changes" % (h))
+ except UnknownFormatError, format:
+ rejmsg.append("%s: unknown format of .changes" % (format))
+ except ParseChangesError, line:
+ rejmsg.append("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line))
+
+ if "source" not in Upload.changes["architecture"]: continue
+
+ try:
+ fs = build_file_list(Upload.dsc, 1, "checksums-%s" % h, h)
+ if format < v:
+ for m in create_hash(fs, h, f, Upload.dsc_files):
+ rejmsg.append(m)
+ else:
+ for m in check_hash(".dsc %s" % (h), fs, h, f, Upload.dsc_files):
+ rejmsg.append(m)
+ except UnknownFormatError, format:
+ rejmsg.append("%s: unknown format of .dsc" % (format))
+ except NoFilesFieldError:
+ rejmsg.append("No Checksums-%s: field in .dsc" % (h))
+ except ParseChangesError, line:
+ rejmsg.append("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line))
+
+ return rejmsg
+
+################################################################################
+
# Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
md5sum TEXT NOT NULL,
location INT4 NOT NULL, -- REFERENCES location
last_used TIMESTAMP,
+ sha1sum TEXT NOT NULL,
+ sha256sum TEXT NOT NULL,
unique (filename, location)
);