X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Futils.py;h=52b902f9ffe84e298fce9fb4c127b935b5376f6a;hb=df1bf169c5b89fa9764a326bbd7a6883a8789f6b;hp=ec82782fdaa593bf9a4c89e9ac2e44236bf15ce8;hpb=aa83ebb15882823869b109d370e24ef3efd730f8;p=dak.git diff --git a/daklib/utils.py b/daklib/utils.py index ec82782f..52b902f9 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# vim:set et ts=4 sw=4: # Utility functions # Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup @@ -22,9 +23,10 @@ ################################################################################ import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \ - sys, tempfile, traceback + sys, tempfile, traceback, stat import apt_pkg import database +import time from dak_exceptions import * ################################################################################ @@ -49,12 +51,24 @@ re_verwithext = re.compile(r"^(\d+)(?:\.(\d+))(?:\s+\((\S+)\))?$") re_srchasver = re.compile(r"^(\S+)\s+\((\S+)\)$") +html_escaping = {'"':'"', '&':'&', '<':'<', '>':'>'} +re_html_escaping = re.compile('|'.join(map(re.escape, html_escaping.keys()))) + default_config = "/etc/dak/dak.conf" default_apt_config = "/etc/dak/apt.conf" alias_cache = None key_uid_email_cache = {} +# (hashname, function, earliest_changes_version) +known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)), + ("sha256", apt_pkg.sha256sum, (1, 8))] + +################################################################################ + +def html_escape(s): + return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s) + ################################################################################ def open_file(filename, mode='r'): @@ -96,34 +110,14 @@ def extract_component_from_section(section): ################################################################################ -def parse_changes(filename, signing_rules=0): - """Parses a changes file and returns a dictionary where each field is a -key. The mandatory first argument is the filename of the .changes -file. - -signing_rules is an optional argument: - - o If signing_rules == -1, no signature is required. - o If signing_rules == 0 (the default), a signature is required. - o If signing_rules == 1, it turns on the same strict format checking - as dpkg-source. - -The rules for (signing_rules == 1)-mode are: - - o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" - followed by any PGP header data and must end with a blank line. - - o The data section must end with a blank line and must be followed by - "-----BEGIN PGP SIGNATURE-----". -""" - +def parse_deb822(contents, signing_rules=0): error = "" changes = {} - changes_in = open_file(filename) - lines = changes_in.readlines() + # Split the lines in the input, keeping the linebreaks. + lines = contents.splitlines(True) - if not lines: + if len(lines) == 0: raise ParseChangesError, "[Empty changes file]" # Reindex by line number so we can easily verify the format of @@ -189,7 +183,6 @@ The rules for (signing_rules == 1)-mode are: if signing_rules == 1 and inside_signature: raise InvalidDscError, index - changes_in.close() changes["filecontents"] = "".join(lines) if changes.has_key("source"): @@ -207,6 +200,228 @@ The rules for (signing_rules == 1)-mode are: ################################################################################ +def parse_changes(filename, signing_rules=0): + """Parses a changes file and returns a dictionary where each field is a +key. The mandatory first argument is the filename of the .changes +file. + +signing_rules is an optional argument: + + o If signing_rules == -1, no signature is required. + o If signing_rules == 0 (the default), a signature is required. + o If signing_rules == 1, it turns on the same strict format checking + as dpkg-source. + +The rules for (signing_rules == 1)-mode are: + + o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----" + followed by any PGP header data and must end with a blank line. + + o The data section must end with a blank line and must be followed by + "-----BEGIN PGP SIGNATURE-----". +""" + + changes_in = open_file(filename) + content = changes_in.read() + changes_in.close() + return parse_deb822(content, signing_rules) + +################################################################################ + +def hash_key(hashname): + return '%ssum' % hashname + +################################################################################ + +def create_hash(where, files, hashname, hashfunc): + """create_hash extends the passed files dict with the given hash by + iterating over all files on disk and passing them to the hashing + function given.""" + + rejmsg = [] + for f in files.keys(): + try: + file_handle = open_file(f) + except CantOpenError: + rejmsg.append("Could not open file %s for checksumming" % (f)) + + files[f][hash_key(hashname)] = hashfunc(file_handle) + + file_handle.close() + return rejmsg + +################################################################################ + +def check_hash(where, files, hashname, hashfunc): + """check_hash checks the given hash in the files dict against the actual + files on disk. The hash values need to be present consistently in + all file entries. It does not modify its input in any way.""" + + rejmsg = [] + for f in files.keys(): + file_handle = None + try: + try: + file_handle = open_file(f) + + # Check for the hash entry, to not trigger a KeyError. + if not files[f].has_key(hash_key(hashname)): + rejmsg.append("%s: misses %s checksum in %s" % (f, hashname, + where)) + continue + + # Actually check the hash for correctness. + if hashfunc(file_handle) != files[f][hash_key(hashname)]: + rejmsg.append("%s: %s check failed in %s" % (f, hashname, + where)) + except CantOpenError: + # TODO: This happens when the file is in the pool. + # warn("Cannot open file %s" % f) + continue + finally: + if file_handle: + file_handle.close() + return rejmsg + +################################################################################ + +def check_size(where, files): + """check_size checks the file sizes in the passed files dict against the + files on disk.""" + + rejmsg = [] + for f in files.keys(): + try: + entry = os.stat(f) + except OSError, exc: + if exc.errno == 2: + # TODO: This happens when the file is in the pool. + continue + raise + + actual_size = entry[stat.ST_SIZE] + size = int(files[f]["size"]) + if size != actual_size: + rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s" + % (f, actual_size, size, where)) + return rejmsg + +################################################################################ + +def check_hash_fields(what, manifest): + """check_hash_fields ensures that there are no checksum fields in the + given dict that we do not know about.""" + + rejmsg = [] + hashes = map(lambda x: x[0], known_hashes) + for field in manifest: + if field.startswith("checksums-"): + hashname = field.split("-",1)[1] + if hashname not in hashes: + rejmsg.append("Unsupported checksum field for %s "\ + "in %s" % (hashname, what)) + return rejmsg + +################################################################################ + +def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc): + if format >= version: + # The version should contain the specified hash. + func = check_hash + + # Import hashes from the changes + rejmsg = parse_checksums(".changes", files, changes, hashname) + if len(rejmsg) > 0: + return rejmsg + else: + # We need to calculate the hash because it can't possibly + # be in the file. + func = create_hash + return func(".changes", files, hashname, hashfunc) + +# We could add the orig which might be in the pool to the files dict to +# access the checksums easily. + +def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc): + """ensure_dsc_hashes' task is to ensure that each and every *present* hash + in the dsc is correct, i.e. identical to the changes file and if necessary + the pool. The latter task is delegated to check_hash.""" + + rejmsg = [] + if not dsc.has_key('Checksums-%s' % (hashname,)): + return rejmsg + # Import hashes from the dsc + parse_checksums(".dsc", dsc_files, dsc, hashname) + # And check it... + rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc)) + return rejmsg + +################################################################################ + +def ensure_hashes(changes, dsc, files, dsc_files): + rejmsg = [] + + # Make sure we recognise the format of the Files: field in the .changes + format = changes.get("format", "0.0").split(".", 1) + if len(format) == 2: + format = int(format[0]), int(format[1]) + else: + format = int(float(format[0])), 0 + + # We need to deal with the original changes blob, as the fields we need + # might not be in the changes dict serialised into the .dak anymore. + orig_changes = parse_deb822(changes['filecontents']) + + # Copy the checksums over to the current changes dict. This will keep + # the existing modifications to it intact. + for field in orig_changes: + if field.startswith('checksums-'): + changes[field] = orig_changes[field] + + # Check for unsupported hashes + rejmsg.extend(check_hash_fields(".changes", changes)) + rejmsg.extend(check_hash_fields(".dsc", dsc)) + + # We have to calculate the hash if we have an earlier changes version than + # the hash appears in rather than require it exist in the changes file + for hashname, hashfunc, version in known_hashes: + rejmsg.extend(_ensure_changes_hash(changes, format, version, files, + hashname, hashfunc)) + if "source" in changes["architecture"]: + rejmsg.extend(_ensure_dsc_hash(dsc, dsc_files, hashname, + hashfunc)) + + return rejmsg + +def parse_checksums(where, files, manifest, hashname): + rejmsg = [] + field = 'checksums-%s' % hashname + if not field in manifest: + return rejmsg + input = manifest[field] + for line in input.split('\n'): + if not line: + break + hash, size, file = line.strip().split(' ') + if not files.has_key(file): + # TODO: check for the file's entry in the original files dict, not + # the one modified by (auto)byhand and other weird stuff + # rejmsg.append("%s: not present in files but in checksums-%s in %s" % + # (file, hashname, where)) + continue + if not files[file]["size"] == size: + rejmsg.append("%s: size differs for files and checksums-%s entry "\ + "in %s" % (file, hashname, where)) + continue + files[file][hash_key(hashname)] = hash + for f in files.keys(): + if not files[f].has_key(hash_key(hashname)): + rejmsg.append("%s: no entry in checksums-%s in %s" % (file, + hashname, where)) + return rejmsg + +################################################################################ + # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): @@ -230,7 +445,10 @@ def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): format = format[:2] if is_a_dsc: - if format != (1,0): + # format = (1,0) are the only formats we currently accept, + # format = (0,0) are missing format headers of which we still + # have some in the archive. + if format != (1,0) and format != (0,0): raise UnknownFormatError, "%s" % (changes.get("format","0.0")) else: if (format < (1,5) or format > (1,8)): @@ -1014,11 +1232,22 @@ used.""" if keywords.has_key("NODATA"): reject("no signature found in %s." % (sig_filename)) bad = 1 + if keywords.has_key("EXPKEYSIG"): + args = keywords["EXPKEYSIG"] + if len(args) >= 1: + key = args[0] + reject("Signature made by expired key 0x%s" % (key)) + bad = 1 if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"): args = keywords["KEYEXPIRED"] + expiredate="" if len(args) >= 1: - key = args[0] - reject("The key (0x%s) used to sign %s has expired." % (key, sig_filename)) + timestamp = args[0] + if timestamp.count("T") == 0: + expiredate = time.strftime("%Y-%m-%d", time.gmtime(timestamp)) + else: + expiredate = timestamp + reject("The key used to sign %s has expired on %s" % (sig_filename, expiredate)) bad = 1 if bad: @@ -1178,3 +1407,52 @@ if which_conf_file() != default_config: apt_pkg.ReadConfigFileISC(Cnf,which_conf_file()) ################################################################################ + +def generate_contents_information(filename): + """ + Generate a list of flies contained in a .deb + + @type filename: string + @param filename: the path to a .deb + + @rtype: list + @return: a list of files in the data.tar.* portion of the .deb + """ + cmd = "ar t %s" % (filename) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: 'ar t' invocation failed." % (filename)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Ugh ... this is ugly ... Code ripped from process_unchecked.py + chunks = output.split('\n') + + contents = [] + try: + cmd = "ar x %s %s" % (filename, chunks[2]) + (result, output) = commands.getstatusoutput(cmd) + if result != 0: + reject("%s: '%s' invocation failed." % (filename, cmd)) + reject(utils.prefix_multi_line_string(output, " [ar output:] "), "") + + # Got deb tarballs, now lets go through and determine what bits + # and pieces the deb had ... + if chunks[2] == "data.tar.gz": + data = tarfile.open("data.tar.gz", "r:gz") + elif data_tar == "data.tar.bz2": + data = tarfile.open("data.tar.bz2", "r:bz2") + else: + os.remove(chunks[2]) + reject("couldn't find data.tar.*") + + for tarinfo in data: + if not tarinfo.isdir(): + contents.append(tarinfo.name[2:]) + + finally: + if os.path.exists( chunks[2] ): + os.remove( chunks[2] ) + + return contents + +###############################################################################