undoing changes I shouldn't have made

[dak.git] / daklib / utils.py
diff --git a/daklib/utils.py b/daklib/utils.py

index bdb573af2210330f78f8279ef429cb4e77095093..b0b71c0169d413e32d5a6c0b1aa009dd76f4211d 100755 (executable)
--- a/daklib/utils.py
+++ b/daklib/utils.py
@@ -22,7 +22,6 @@
  # along with this program; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  
-import codecs
  import commands
  import email.Header
  import os
@@ -39,14 +38,19 @@ import time
  import re
  import string
  import email as modemail
+import subprocess
  
  from dbconn import DBConn, get_architecture, get_component, get_suite
  from dak_exceptions import *
  from textutils import fix_maintainer
  from regexes import re_html_escaping, html_escaping, re_single_line_field, \
-                    re_multi_line_field, re_srchasver, re_verwithext, \
-                    re_parse_maintainer, re_taint_free, re_gpg_uid, re_re_mark, \
-                    re_whitespace_comment
+                    re_multi_line_field, re_srchasver, re_taint_free, \
+                    re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
+                    re_is_orig_source
+
+from formats import parse_format, validate_changes_format
+from srcformats import get_format_from_string
+from collections import defaultdict
  
  ################################################################################
  
@@ -60,6 +64,22 @@ key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
                  ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  
+# Monkeypatch commands.getstatusoutput as it returns a "0" exit code in
+# all situations under lenny's Python.
+import commands
+def dak_getstatusoutput(cmd):
+    pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
+        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+    output = "".join(pipe.stdout.readlines())
+
+    ret = pipe.wait()
+    if ret is None:
+        ret = 0
+
+    return ret, output
+commands.getstatusoutput = dak_getstatusoutput
+
  ################################################################################
  
  def html_escape(s):
@@ -332,6 +352,86 @@ def check_size(where, files):
  
  ################################################################################
  
+def check_dsc_files(dsc_filename, dsc=None, dsc_files=None):
+    """
+    Verify that the files listed in the Files field of the .dsc are
+    those expected given the announced Format.
+
+    @type dsc_filename: string
+    @param dsc_filename: path of .dsc file
+
+    @type dsc: dict
+    @param dsc: the content of the .dsc parsed by C{parse_changes()}
+
+    @type dsc_files: dict
+    @param dsc_files: the file list returned by C{build_file_list()}
+
+    @rtype: list
+    @return: all errors detected
+    """
+    rejmsg = []
+
+    # Parse the file if needed
+    if dsc is None:
+        dsc = parse_changes(dsc_filename, signing_rules=1);
+
+    if dsc_files is None:
+        dsc_files = build_file_list(dsc, is_a_dsc=1)
+
+    # Ensure .dsc lists proper set of source files according to the format
+    # announced
+    has = defaultdict(lambda: 0)
+
+    ftype_lookup = (
+        (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
+        (r'diff.gz',                   ('debian_diff',)),
+        (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
+        (r'debian\.tar\.(gz|bz2)',     ('debian_tar',)),
+        (r'orig\.tar\.(gz|bz2)',       ('orig_tar',)),
+        (r'tar\.(gz|bz2)',             ('native_tar',)),
+        (r'orig-.+\.tar\.(gz|bz2)',    ('more_orig_tar',)),
+    )
+
+    for f in dsc_files.keys():
+        m = re_issource.match(f)
+        if not m:
+            rejmsg.append("%s: %s in Files field not recognised as source."
+                          % (dsc_filename, f))
+            continue
+
+        # Populate 'has' dictionary by resolving keys in lookup table
+        matched = False
+        for regex, keys in ftype_lookup:
+            if re.match(regex, m.group(3)):
+                matched = True
+                for key in keys:
+                    has[key] += 1
+                break
+
+        # File does not match anything in lookup table; reject
+        if not matched:
+            reject("%s: unexpected source file '%s'" % (dsc_filename, f))
+
+    # Check for multiple files
+    for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
+        if has[file_type] > 1:
+            rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
+
+    # Source format specific tests
+    try:
+        format = get_format_from_string(dsc['format'])
+        rejmsg.extend([
+            '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
+        ])
+
+    except UnknownFormatError:
+        # Not an error here for now
+        pass
+
+    return rejmsg
+
+################################################################################
+
  def check_hash_fields(what, manifest):
      """
      check_hash_fields ensures that there are no checksum fields in the
@@ -386,41 +486,6 @@ def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
  
  ################################################################################
  
-def ensure_hashes(changes, dsc, files, dsc_files):
-    rejmsg = []
-
-    # Make sure we recognise the format of the Files: field in the .changes
-    format = changes.get("format", "0.0").split(".", 1)
-    if len(format) == 2:
-        format = int(format[0]), int(format[1])
-    else:
-        format = int(float(format[0])), 0
-
-    # We need to deal with the original changes blob, as the fields we need
-    # might not be in the changes dict serialised into the .dak anymore.
-    orig_changes = parse_deb822(changes['filecontents'])
-
-    # Copy the checksums over to the current changes dict.  This will keep
-    # the existing modifications to it intact.
-    for field in orig_changes:
-        if field.startswith('checksums-'):
-            changes[field] = orig_changes[field]
-
-    # Check for unsupported hashes
-    rejmsg.extend(check_hash_fields(".changes", changes))
-    rejmsg.extend(check_hash_fields(".dsc", dsc))
-
-    # We have to calculate the hash if we have an earlier changes version than
-    # the hash appears in rather than require it exist in the changes file
-    for hashname, hashfunc, version in known_hashes:
-        rejmsg.extend(_ensure_changes_hash(changes, format, version, files,
-            hashname, hashfunc))
-        if "source" in changes["architecture"]:
-            rejmsg.extend(_ensure_dsc_hash(dsc, dsc_files, hashname,
-                hashfunc))
-
-    return rejmsg
-
  def parse_checksums(where, files, manifest, hashname):
      rejmsg = []
      field = 'checksums-%s' % hashname
@@ -429,7 +494,12 @@ def parse_checksums(where, files, manifest, hashname):
      for line in manifest[field].split('\n'):
          if not line:
              break
-        checksum, size, checkfile = line.strip().split(' ')
+        clist = line.strip().split(' ')
+        if len(clist) == 3:
+            checksum, size, checkfile = clist
+        else:
+            rejmsg.append("Cannot parse checksum line [%s]" % (line))
+            continue
          if not files.has_key(checkfile):
          # TODO: check for the file's entry in the original files dict, not
          # the one modified by (auto)byhand and other weird stuff
@@ -458,30 +528,8 @@ def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
      if not changes.has_key(field):
          raise NoFilesFieldError
  
-    # Make sure we recognise the format of the Files: field
-    format = re_verwithext.search(changes.get("format", "0.0"))
-    if not format:
-        raise UnknownFormatError, "%s" % (changes.get("format","0.0"))
-
-    format = format.groups()
-    if format[1] == None:
-        format = int(float(format[0])), 0, format[2]
-    else:
-        format = int(format[0]), int(format[1]), format[2]
-    if format[2] == None:
-        format = format[:2]
-
-    if is_a_dsc:
-        # format = (1,0) are the only formats we currently accept,
-        # format = (0,0) are missing format headers of which we still
-        # have some in the archive.
-        if format != (1,0) and format != (0,0):
-            raise UnknownFormatError, "%s" % (changes.get("format","0.0"))
-    else:
-        if (format < (1,5) or format > (1,8)):
-            raise UnknownFormatError, "%s" % (changes.get("format","0.0"))
-        if field != "files" and format < (1,8):
-            raise UnknownFormatError, "%s" % (changes.get("format","0.0"))
+    # Validate .changes Format: field
+    validate_changes_format(parse_format(changes['format']), field)
  
      includes_section = (not is_a_dsc) and field == "files"
  
@@ -701,22 +749,12 @@ def which_alias_file():
  
  ################################################################################
  
-# Escape characters which have meaning to SQL's regex comparison operator ('~')
-# (woefully incomplete)
-
-def regex_safe (s):
-    s = s.replace('+', '\\\\+')
-    s = s.replace('.', '\\\\.')
-    return s
-
-################################################################################
-
  def TemplateSubst(map, filename):
      """ Perform a substition of template """
      templatefile = open_file(filename)
      template = templatefile.read()
      for x in map.keys():
-        template = template.replace(x,map[x])
+        template = template.replace(x, str(map[x]))
      templatefile.close()
      return template
  
@@ -736,6 +774,9 @@ def warn(msg):
  def whoami ():
      return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
  
+def getusername ():
+    return pwd.getpwuid(os.getuid())[0]
+
  ################################################################################
  
  def size_type (c):
@@ -920,8 +961,8 @@ def parse_args(Options):
          suite_ids_list = []
          for suitename in split_args(Options["Suite"]):
              suite = get_suite(suitename, session=session)
-            if suite_id is None:
-                warn("suite '%s' not recognised." % (suitename))
+            if suite.suite_id is None:
+                warn("suite '%s' not recognised." % (suite.suite_name))
              else:
                  suite_ids_list.append(suite.suite_id)
          if suite_ids_list:
@@ -1198,7 +1239,7 @@ def gpg_keyring_args(keyrings=None):
  
  ################################################################################
  
-def check_signature (sig_filename, reject, data_filename="", keyrings=None, autofetch=None):
+def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None):
      """
      Check the signature of a file and return the fingerprint if the
      signature is valid or 'None' if it's not.  The first argument is the
@@ -1214,14 +1255,16 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto
      used.
      """
  
+    rejects = []
+
      # Ensure the filename contains no shell meta-characters or other badness
      if not re_taint_free.match(sig_filename):
-        reject("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
-        return None
+        rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
+        return (None, rejects)
  
      if data_filename and not re_taint_free.match(data_filename):
-        reject("!!WARNING!! tainted data filename: '%s'." % (data_filename))
-        return None
+        rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
+        return (None, rejects)
  
      if not keyrings:
          keyrings = Cnf.ValueList("Dinstall::GPGKeyring")
@@ -1232,8 +1275,8 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto
      if autofetch:
          error_msg = retrieve_key(sig_filename)
          if error_msg:
-            reject(error_msg)
-            return None
+            rejects.append(error_msg)
+            return (None, rejects)
  
      # Build the command line
      status_read, status_write = os.pipe()
@@ -1248,40 +1291,32 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto
  
      # If we failed to parse the status-fd output, let's just whine and bail now
      if internal_error:
-        reject("internal error while performing signature check on %s." % (sig_filename))
-        reject(internal_error, "")
-        reject("Please report the above errors to the Archive maintainers by replying to this mail.", "")
-        return None
+        rejects.append("internal error while performing signature check on %s." % (sig_filename))
+        rejects.append(internal_error, "")
+        rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
+        return (None, rejects)
  
-    bad = ""
      # Now check for obviously bad things in the processed output
      if keywords.has_key("KEYREVOKED"):
-        reject("The key used to sign %s has been revoked." % (sig_filename))
-        bad = 1
+        rejects.append("The key used to sign %s has been revoked." % (sig_filename))
      if keywords.has_key("BADSIG"):
-        reject("bad signature on %s." % (sig_filename))
-        bad = 1
+        rejects.append("bad signature on %s." % (sig_filename))
      if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
-        reject("failed to check signature on %s." % (sig_filename))
-        bad = 1
+        rejects.append("failed to check signature on %s." % (sig_filename))
      if keywords.has_key("NO_PUBKEY"):
          args = keywords["NO_PUBKEY"]
          if len(args) >= 1:
              key = args[0]
-        reject("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
-        bad = 1
+        rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
      if keywords.has_key("BADARMOR"):
-        reject("ASCII armour of signature was corrupt in %s." % (sig_filename))
-        bad = 1
+        rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
      if keywords.has_key("NODATA"):
-        reject("no signature found in %s." % (sig_filename))
-        bad = 1
+        rejects.append("no signature found in %s." % (sig_filename))
      if keywords.has_key("EXPKEYSIG"):
          args = keywords["EXPKEYSIG"]
          if len(args) >= 1:
              key = args[0]
-        reject("Signature made by expired key 0x%s" % (key))
-        bad = 1
+        rejects.append("Signature made by expired key 0x%s" % (key))
      if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
          args = keywords["KEYEXPIRED"]
          expiredate=""
@@ -1294,38 +1329,33 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto
                      expiredate = "unknown (%s)" % (timestamp)
              else:
                  expiredate = timestamp
-        reject("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
-        bad = 1
+        rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
  
-    if bad:
-        return None
+    if len(rejects) > 0:
+        return (None, rejects)
  
      # Next check gpgv exited with a zero return code
      if exit_status:
-        reject("gpgv failed while checking %s." % (sig_filename))
+        rejects.append("gpgv failed while checking %s." % (sig_filename))
          if status.strip():
-            reject(prefix_multi_line_string(status, " [GPG status-fd output:] "), "")
+            rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "), "")
          else:
-            reject(prefix_multi_line_string(output, " [GPG output:] "), "")
-        return None
+            rejects.append(prefix_multi_line_string(output, " [GPG output:] "), "")
+        return (None, rejects)
  
      # Sanity check the good stuff we expect
      if not keywords.has_key("VALIDSIG"):
-        reject("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
-        bad = 1
+        rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
      else:
          args = keywords["VALIDSIG"]
          if len(args) < 1:
-            reject("internal error while checking signature on %s." % (sig_filename))
-            bad = 1
+            rejects.append("internal error while checking signature on %s." % (sig_filename))
          else:
              fingerprint = args[0]
      if not keywords.has_key("GOODSIG"):
-        reject("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
-        bad = 1
+        rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
      if not keywords.has_key("SIG_ID"):
-        reject("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
-        bad = 1
+        rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
  
      # Finally ensure there's not something we don't recognise
      known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
@@ -1334,13 +1364,12 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto
  
      for keyword in keywords.keys():
          if not known_keywords.has_key(keyword):
-            reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
-            bad = 1
+            rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
  
-    if bad:
-        return None
+    if len(rejects) > 0:
+        return (None, rejects)
      else:
-        return fingerprint
+        return (fingerprint, [])
  
  ################################################################################
  
@@ -1479,3 +1508,48 @@ if which_conf_file() != default_config:
      apt_pkg.ReadConfigFileISC(Cnf,which_conf_file())
  
  ###############################################################################
+
+def ensure_orig_files(changes, dest_dir, session):
+    """
+    Ensure that dest_dir contains all the orig tarballs for the specified
+    changes. If it does not, symlink them into place.
+
+    Returns a 2-tuple (already_exists, symlinked) containing a list of files
+    that were already there and a list of files that were symlinked into place.
+    """
+
+    exists, symlinked = [], []
+
+    for dsc_file in changes.dsc_files:
+
+        # Skip all files that are not orig tarballs
+        if not re_is_orig_source.match(dsc_file):
+            continue
+
+        # Skip orig files not identified in the pool
+        if not (dsc_file in changes.orig_files and
+                'id' in changes.orig_files[dsc_file]):
+            continue
+
+        dest = os.path.join(dest_dir, dsc_file)
+
+        if os.path.exists(dest):
+            exists.append(dest)
+            continue
+
+        orig_file_id = changes.orig_files[dsc_file]['id']
+
+        c = session.execute(
+            'SELECT l.path, f.filename FROM location l, files f WHERE f.id = :id and f.location = l.id',
+            {'id': orig_file_id}
+        )
+
+        res = c.fetchone()
+        if not res:
+            return "[INTERNAL ERROR] Couldn't find id %s in files table." % orig_file_id
+
+        src = os.path.join(res[0], res[1])
+        os.symlink(src, dest)
+        symlinked.append(dest)
+
+    return (exists, symlinked)