Revert "revert all my stupid commits, we'll try this again later when we have a test...

author Philipp Kern <pkern@debian.org>

Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)

committer Philipp Kern <pkern@debian.org>

Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)
author Philipp Kern <pkern@debian.org>
Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)
committer Philipp Kern <pkern@debian.org>
Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)
diff --git a/ChangeLog b/ChangeLog

index 36294a1ba774089e3773ef7eafaac1ec5536fd76..a7f4a3afe40b2cbd35392bd49584bf7077239ac2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -36,6 +36,24 @@
         * config/debian/cron.dinstall: We dont want i18n to ever fail
         dinstall, add a || true
  
+2008-08-15  Mark Hymers  <mhy@debian.org>
+
+       * daklib/utils.py: Actually import a module before using it.
+
+       * daklib/utils.py: Actually check we have basedict before trying to
+       use it.
+
+       *  dak/process_accepted.py, dak/process_unchecked.py,
+       daklib/database.py: Don't change get_files_id to use sha1sum and
+       sha256sum.
+
+       * setup/init_pool.sql, dak/check_archive.py, dak/decode_dot_dak.py,
+       dak/process_accepted.py, dak/process_unchecked.py, daklib/database.py,
+       daklib/queue.py, daklib/utils.py: Attempt to add sha1sum and
+       sha256sums into the database.  The complication is that we have to
+       keep backwards compatibility with the .dak files already in existance.
+       Note that import_archive hasn't been hacked to deal with this yet.
+
  2008-08-14  Joerg Jaspert  <joerg@debian.org>
  
         * config/debian/cron.dinstall: Added the i18n retrieval of package
@@ -99,7 +117,6 @@
  2008-08-07  Stephen Gran  <sgran@debian.org>
  
         * Drop use of exec to eval variable interpolation
-
  2008-08-07  Joerg Jaspert  <joerg@debian.org>
  
         * dak/process_accepted.py (install): Error out with the new
diff --git a/dak/check_archive.py b/dak/check_archive.py

index 2d9321d68c68f1caf5e7d77799c6eece507cb739..93cc832c1fb0efd41748e894bf607894a20c9c5f 100755 (executable)
--- a/dak/check_archive.py
+++ b/dak/check_archive.py
@@ -52,7 +52,7 @@ Run various sanity checks of the archive and/or database.
  
  The following MODEs are available:
  
-  md5sums            - validate the md5sums stored in the database
+  checksums          - validate the checksums stored in the database
    files              - check files in the database against what's in the archive
    dsc-syntax         - validate the syntax of .dsc files in the archive
    missing-overrides  - check for missing overrides
@@ -194,16 +194,18 @@ SELECT l.path, f.filename FROM files f, dsc_files df, location l WHERE df.source
  
  ################################################################################
  
-def check_md5sums():
+def check_checksums():
      print "Getting file information from database..."
-    q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id")
+    q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id")
      ql = q.getresult()
  
-    print "Checking file md5sums & sizes..."
+    print "Checking file checksums & sizes..."
      for i in ql:
          filename = os.path.abspath(i[0] + i[1])
          db_md5sum = i[2]
-        db_size = int(i[3])
+        db_sha1sum = i[3]
+        db_sha256sum = i[4]
+        db_size = int(i[5])
          try:
              f = utils.open_file(filename)
          except:
@@ -215,6 +217,18 @@ def check_md5sums():
              utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum))
          if size != db_size:
              utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size))
+        # Until the main database is filled, we need to not spit 500,000 warnings
+        # every time we scan the archive.  Yet another hack (TM) which can go away
+        # once this is all working
+        if db_sha1sum is not None and db_sha1sum != '':
+            sha1sum = apt_pkg.sha1sum(f)
+            if sha1sum != db_sha1sum:
+                utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum))
+
+        if db_sha256sum is not None and db_sha256sum != '':
+            sha256sum = apt_pkg.sha256sum(f)
+            if sha256sum != db_sha256sum:
+                utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum))
  
      print "Done."
  
@@ -425,8 +439,8 @@ def main ():
      projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
      database.init(Cnf, projectB)
  
-    if mode == "md5sums":
-        check_md5sums()
+    if mode == "checksums":
+        check_checksums()
      elif mode == "files":
          check_files()
      elif mode == "dsc-syntax":
diff --git a/dak/process_accepted.py b/dak/process_accepted.py

index 0db17bad471545f55f229aa9cc0a4fb01d1872af..5e09243ffa545b22f836f9e2fa57191e4cfd31d2 100755 (executable)
--- a/dak/process_accepted.py
+++ b/dak/process_accepted.py
@@ -274,6 +274,10 @@ def install ():
      # Begin a transaction; if we bomb out anywhere between here and the COMMIT WORK below, the DB will not be changed.
      projectB.query("BEGIN WORK")
  
+    # Check the hashes are all present: HACK: Can go away once all dak files
+    # are known to be newer than the shasum changes
+    utils.ensure_hashes(changes, dsc, files, dsc_files)
+
      # Add the .dsc file to the DB
      for file in files.keys():
          if files[file]["type"] == "dsc":
@@ -307,7 +311,7 @@ def install ():
                  # files id is stored in dsc_files by check_dsc().
                  files_id = dsc_files[dsc_file].get("files id", None)
                  if files_id == None:
-                    files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
+                    files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id)
                  # FIXME: needs to check for -1/-2 and or handle exception
                  if files_id == None:
                      files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
@@ -349,7 +353,7 @@ def install ():
              if not files[file].has_key("location id") or not files[file]["location id"]:
                  files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i())
              if not files[file].has_key("files id") or not files[file]["files id"]:
-                files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["location id"])
+                files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], files[file]["location id"])
              source_id = database.get_source_id (source, source_version)
              if source_id:
                  projectB.query("INSERT INTO binaries (package, version, maintainer, source, architecture, file, type, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, %d, '%s', %d)"
diff --git a/dak/process_unchecked.py b/dak/process_unchecked.py

index 04afb7b30065648d3cd3cdf633778915477cb766..123fd9f37e7e6f7316300f8b98c90452b12a086c 100755 (executable)
--- a/dak/process_unchecked.py
+++ b/dak/process_unchecked.py
@@ -630,11 +630,11 @@ def check_files():
  
              # Check the md5sum & size against existing files (if any)
              files[f]["pool name"] = utils.poolify (changes["source"], files[f]["component"])
-            files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["sha1sum"], files[f]["sha256sum"], files[f]["location id"])
+            files_id = database.get_files_id(files[f]["pool name"] + f, files[f]["size"], files[f]["md5sum"], files[f]["location id"])
              if files_id == -1:
                  reject("INTERNAL ERROR, get_files_id() returned multiple matches for %s." % (f))
              elif files_id == -2:
-                reject("md5sum, sha1sum, sha256sum and/or size mismatch on existing copy of %s." % (f))
+                reject("md5sum and/or size mismatch on existing copy of %s." % (f))
              files[f]["files id"] = files_id
  
              # Check for packages that have moved from one component to another
@@ -919,111 +919,13 @@ def check_urgency ():
  ################################################################################
  
  def check_hashes ():
-    # Make sure we recognise the format of the Files: field
-    format = changes.get("format", "0.0").split(".",1)
-    if len(format) == 2:
-        format = int(format[0]), int(format[1])
-    else:
-        format = int(float(format[0])), 0
-
-    check_hash(".changes", files, "md5sum", apt_pkg.md5sum)
-    check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum)
-
-    # (hashname, function, originate)
-    # If originate is true, we have to calculate it because
-    # the changes file version is too early for it to be
-    # included
-    hashes = [("sha1", apt_pkg.sha1sum, False),
-              ("sha256", apt_pkg.sha256sum, False)]
-
-    if format <= (1,8):
-        hashes["sha1"] = True
-        hashes["sha256"] = True
-
-    for x in changes:
-        if x.startswith("checksum-"):
-            h = x.split("-",1)[1]
-            if h not in dict(hashes):
-                reject("Unsupported checksum field in .changes" % (h))
-
-    for x in dsc:
-        if x.startswith("checksum-"):
-            h = x.split("-",1)[1]
-            if h not in dict(hashes):
-                reject("Unsupported checksum field in .dsc" % (h))
-
-    for h,f,o in hashes:
-        try:
-            fs = utils.build_file_list(changes, 0, "checksums-%s" % h, h)
-            if o:
-                create_hash(fs, h, f, files)
-            else:
-                check_hash(".changes %s" % (h), fs, h, f, files)
-        except NoFilesFieldError:
-            reject("No Checksums-%s: field in .changes" % (h))
-        except UnknownFormatError, format:
-            reject("%s: unknown format of .changes" % (format))
-        except ParseChangesError, line:
-            reject("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line))
-
-        if "source" not in changes["architecture"]: continue
-
-        try:
-            fs = utils.build_file_list(dsc, 1, "checksums-%s" % h, h)
-            if o:
-                create_hash(fs, h, f, dsc_files)
-            else:
-                check_hash(".dsc %s" % (h), fs, h, f, dsc_files)
-        except UnknownFormatError, format:
-            reject("%s: unknown format of .dsc" % (format))
-        except NoFilesFieldError:
-            reject("No Checksums-%s: field in .dsc" % (h))
-        except ParseChangesError, line:
-            reject("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line))
-
-################################################################################
-
-def create_hash (lfiles, key, testfn, basedict = None):
-    for f in lfiles.keys():
-        try:
-            file_handle = utils.open_file(f)
-        except CantOpenError:
-            continue
-
-        # Check hash
-        basedict[f]['%ssum' % key] = testfn(file_handle)
-        file_handle.close()
-
-
-################################################################################
-
-def check_hash (where, lfiles, key, testfn, basedict = None):
-    if basedict:
-        for f in basedict.keys():
-            if f not in lfiles:
-                reject("%s: no %s checksum" % (f, key))
-
-    for f in lfiles.keys():
-        if basedict and f not in basedict:
-            reject("%s: extraneous entry in %s checksums" % (f, key))
-
-        try:
-            file_handle = utils.open_file(f)
-        except CantOpenError:
-            continue
+    utils.check_hash(".changes", files, "md5sum", apt_pkg.md5sum)
+    utils.check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum)
  
-        # Check hash
-        if testfn(file_handle) != lfiles[f][key]:
-            reject("%s: %s check failed." % (f, key))
-        file_handle.close()
-        # Store the hashes for later use
-        basedict[f]['%ssum' % key] = lfiles[f][key]
-        # Check size
-        actual_size = os.stat(f)[stat.ST_SIZE]
-        size = int(lfiles[f]["size"])
-        if size != actual_size:
-            reject("%s: actual file size (%s) does not match size (%s) in %s"
-                   % (f, actual_size, size, where))
+    # This is stupid API, but it'll have to do for now until
+    # we actually have proper abstraction
+    for m in utils.ensure_hashes(changes, dsc, files, dsc_files):
+        reject(m)
  
  ################################################################################
  
diff --git a/daklib/database.py b/daklib/database.py

index cad427ac07c682b5994f0bf26cd12eaedbd0e9aa..9185d0a3f54fd0c77e16df1e0dba623ebbc3bff6 100755 (executable)
--- a/daklib/database.py
+++ b/daklib/database.py
@@ -317,7 +317,7 @@ def get_or_set_fingerprint_id (fingerprint):
  
  ################################################################################
  
-def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id):
+def get_files_id (filename, size, md5sum, location_id):
      global files_id_cache
  
      cache_key = "%s_%d" % (filename, location_id)
@@ -326,7 +326,7 @@ def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id):
          return files_id_cache[cache_key]
  
      size = int(size)
-    q = projectB.query("SELECT id, size, md5sum, sha1sum, sha256sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id))
+    q = projectB.query("SELECT id, size, md5sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id))
      ql = q.getresult()
      if ql:
          if len(ql) != 1:
@@ -334,9 +334,7 @@ def get_files_id (filename, size, md5sum, sha1sum, sha256sum location_id):
          ql = ql[0]
          orig_size = int(ql[1])
          orig_md5sum = ql[2]
-        orig_sha1sum = ql[3]
-        orig_sha256sum = ql[4]
-        if orig_size != size or orig_md5sum != md5sum or orig_sha1sum != sha1sum or orig_sha256sum != sha256sum:
+        if orig_size != size or orig_md5sum != md5sum:
              return -2
          files_id_cache[cache_key] = ql[0]
          return files_id_cache[cache_key]
@@ -365,9 +363,9 @@ def get_or_set_queue_id (queue):
  def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
      global files_id_cache
  
-    projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum location_id))
+    projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
  
-    return get_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id)
+    return get_files_id (filename, size, md5sum, location_id)
  
      ### currval has issues with postgresql 7.1.3 when the table is big
      ### it was taking ~3 seconds to return on auric which is very Not
diff --git a/daklib/utils.py b/daklib/utils.py

index ec82782fdaa593bf9a4c89e9ac2e44236bf15ce8..75845244270fc04e554675f831100a9c7b0123e8 100755 (executable)
--- a/daklib/utils.py
+++ b/daklib/utils.py
@@ -22,7 +22,7 @@
  ################################################################################
  
  import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \
-       sys, tempfile, traceback
+       sys, tempfile, traceback, stat
  import apt_pkg
  import database
  from dak_exceptions import *
@@ -55,6 +55,10 @@ default_apt_config = "/etc/dak/apt.conf"
  alias_cache = None
  key_uid_email_cache = {}
  
+# (hashname, function, earliest_changes_version)
+known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
+                ("sha256", apt_pkg.sha256sum, (1, 8))]
+
  ################################################################################
  
  def open_file(filename, mode='r'):
@@ -207,6 +211,116 @@ The rules for (signing_rules == 1)-mode are:
  
  ################################################################################
  
+def create_hash (lfiles, key, testfn, basedict = None):
+    rejmsg = []
+    for f in lfiles.keys():
+        try:
+            file_handle = open_file(f)
+        except CantOpenError:
+            rejmsg.append("Could not open file %s for checksumming" % (f))
+
+        # Check hash
+        if basedict and basedict.has_key(f):
+            basedict[f]['%ssum' % key] = testfn(file_handle)
+        file_handle.close()
+
+    return rejmsg
+
+################################################################################
+
+def check_hash (where, lfiles, key, testfn, basedict = None):
+    rejmsg = []
+    if basedict:
+        for f in basedict.keys():
+            if f not in lfiles:
+                rejmsg.append("%s: no %s checksum" % (f, key))
+
+    for f in lfiles.keys():
+        if basedict and f not in basedict:
+            rejmsg.append("%s: extraneous entry in %s checksums" % (f, key))
+
+        try:
+            file_handle = open_file(f)
+        except CantOpenError:
+            continue
+
+        # Check hash
+        if testfn(file_handle) != lfiles[f][key]:
+            rejmsg.append("%s: %s check failed." % (f, key))
+        file_handle.close()
+        # Store the hashes for later use
+        if basedict:
+            basedict[f]['%ssum' % key] = lfiles[f][key]
+        # Check size
+        actual_size = os.stat(f)[stat.ST_SIZE]
+        size = int(lfiles[f]["size"])
+        if size != actual_size:
+            rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
+                   % (f, actual_size, size, where))
+
+    return rejmsg
+
+################################################################################
+
+def ensure_hashes(changes, dsc, files, dsc_files):
+    # Make sure we recognise the format of the Files: field
+    format = changes.get("format", "0.0").split(".",1)
+    if len(format) == 2:
+        format = int(format[0]), int(format[1])
+    else:
+        format = int(float(format[0])), 0
+
+    rejmsg = []
+    for x in changes:
+        if x.startswith("checksum-"):
+            h = x.split("-",1)[1]
+            if h not in dict(known_hashes):
+                rejmsg.append("Unsupported checksum field in .changes" % (h))
+
+    for x in dsc:
+        if x.startswith("checksum-"):
+            h = x.split("-",1)[1]
+            if h not in dict(known_hashes):
+                rejmsg.append("Unsupported checksum field in .dsc" % (h))
+
+    # We have to calculate the hash if we have an earlier changes version than
+    # the hash appears in rather than require it exist in the changes file
+    # I hate backwards compatibility
+    for h,f,v in known_hashes:
+        try:
+            if format < v:
+                for m in create_hash(files, h, f, files):
+                    rejmsg.append(m)
+            else:
+                for m in check_hash(".changes %s" % (h), files, h, f, files):
+                    rejmsg.append(m)
+        except NoFilesFieldError:
+            rejmsg.append("No Checksums-%s: field in .changes" % (h))
+        except UnknownFormatError, format:
+            rejmsg.append("%s: unknown format of .changes" % (format))
+        except ParseChangesError, line:
+            rejmsg.append("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line))
+
+        if "source" not in changes["architecture"]: continue
+
+        try:
+            if format < v:
+                for m in create_hash(dsc_files, h, f, dsc_files):
+                    rejmsg.append(m)
+            else:
+                for m in check_hash(".dsc %s" % (h), dsc_files, h, f, dsc_files):
+                    rejmsg.append(m)
+        except UnknownFormatError, format:
+            rejmsg.append("%s: unknown format of .dsc" % (format))
+        except NoFilesFieldError:
+            rejmsg.append("No Checksums-%s: field in .dsc" % (h))
+        except ParseChangesError, line:
+            rejmsg.append("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line))
+
+    return rejmsg
+
+################################################################################
+
  # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
  
  def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
diff --git a/setup/init_pool.sql b/setup/init_pool.sql

index 7a6e2a490b82f46b809f556e6b08414be02b844b..1e3639406cf1c8f79b072a1eb90e13f3a070c995 100644 (file)
--- a/setup/init_pool.sql
+++ b/setup/init_pool.sql
@@ -70,6 +70,8 @@ CREATE TABLE files (
         md5sum TEXT NOT NULL,
         location INT4 NOT NULL, -- REFERENCES location
         last_used TIMESTAMP,
+       sha1sum TEXT NOT NULL,
+       sha256sum TEXT NOT NULL,
         unique (filename, location)
  );
author	Philipp Kern <pkern@debian.org>
	Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)
committer	Philipp Kern <pkern@debian.org>
	Tue, 2 Sep 2008 18:58:48 +0000 (20:58 +0200)
ChangeLog		patch \| blob \| history
dak/check_archive.py		patch \| blob \| history
dak/process_accepted.py		patch \| blob \| history
dak/process_unchecked.py		patch \| blob \| history
daklib/database.py		patch \| blob \| history
daklib/utils.py		patch \| blob \| history
setup/init_pool.sql		patch \| blob \| history