X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Futils.py;h=3305f695ac4faab639dad148b95f8493b83da8b7;hb=1ef1804c272cf73da57a3e2d1cbf6c7610207ee9;hp=bdb573af2210330f78f8279ef429cb4e77095093;hpb=b9ebd1d66eca9bcfefbbea1e008f694e3cf95b2c;p=dak.git diff --git a/daklib/utils.py b/daklib/utils.py index bdb573af..3305f695 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -22,7 +22,6 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import codecs import commands import email.Header import os @@ -39,14 +38,18 @@ import time import re import string import email as modemail +import subprocess from dbconn import DBConn, get_architecture, get_component, get_suite from dak_exceptions import * from textutils import fix_maintainer from regexes import re_html_escaping, html_escaping, re_single_line_field, \ - re_multi_line_field, re_srchasver, re_verwithext, \ - re_parse_maintainer, re_taint_free, re_gpg_uid, re_re_mark, \ - re_whitespace_comment + re_multi_line_field, re_srchasver, re_taint_free, \ + re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \ + re_is_orig_source + +from srcformats import get_format_from_string +from collections import defaultdict ################################################################################ @@ -60,6 +63,22 @@ key_uid_email_cache = {} #: Cache for email addresses from gpg key uids known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)), ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc +# Monkeypatch commands.getstatusoutput as it returns a "0" exit code in +# all situations under lenny's Python. +import commands +def dak_getstatusoutput(cmd): + pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + output = "".join(pipe.stdout.readlines()) + + ret = pipe.wait() + if ret is None: + ret = 0 + + return ret, output +commands.getstatusoutput = dak_getstatusoutput + ################################################################################ def html_escape(s): @@ -332,6 +351,86 @@ def check_size(where, files): ################################################################################ +def check_dsc_files(dsc_filename, dsc=None, dsc_files=None): + """ + Verify that the files listed in the Files field of the .dsc are + those expected given the announced Format. + + @type dsc_filename: string + @param dsc_filename: path of .dsc file + + @type dsc: dict + @param dsc: the content of the .dsc parsed by C{parse_changes()} + + @type dsc_files: dict + @param dsc_files: the file list returned by C{build_file_list()} + + @rtype: list + @return: all errors detected + """ + rejmsg = [] + + # Parse the file if needed + if dsc is None: + dsc = parse_changes(dsc_filename, signing_rules=1); + + if dsc_files is None: + dsc_files = build_file_list(dsc, is_a_dsc=1) + + # Ensure .dsc lists proper set of source files according to the format + # announced + has = defaultdict(lambda: 0) + + ftype_lookup = ( + (r'orig.tar.gz', ('orig_tar_gz', 'orig_tar')), + (r'diff.gz', ('debian_diff',)), + (r'tar.gz', ('native_tar_gz', 'native_tar')), + (r'debian\.tar\.(gz|bz2)', ('debian_tar',)), + (r'orig\.tar\.(gz|bz2)', ('orig_tar',)), + (r'tar\.(gz|bz2)', ('native_tar',)), + (r'orig-.+\.tar\.(gz|bz2)', ('more_orig_tar',)), + ) + + for f in dsc_files.keys(): + m = re_issource.match(f) + if not m: + rejmsg.append("%s: %s in Files field not recognised as source." + % (dsc_filename, f)) + continue + + # Populate 'has' dictionary by resolving keys in lookup table + matched = False + for regex, keys in ftype_lookup: + if re.match(regex, m.group(3)): + matched = True + for key in keys: + has[key] += 1 + break + + # File does not match anything in lookup table; reject + if not matched: + reject("%s: unexpected source file '%s'" % (dsc_filename, f)) + + # Check for multiple files + for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'): + if has[file_type] > 1: + rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type)) + + # Source format specific tests + try: + format = get_format_from_string(dsc['format']) + rejmsg.extend([ + '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has) + ]) + + except UnknownFormatError: + # Not an error here for now + pass + + return rejmsg + +################################################################################ + def check_hash_fields(what, manifest): """ check_hash_fields ensures that there are no checksum fields in the @@ -386,41 +485,6 @@ def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc): ################################################################################ -def ensure_hashes(changes, dsc, files, dsc_files): - rejmsg = [] - - # Make sure we recognise the format of the Files: field in the .changes - format = changes.get("format", "0.0").split(".", 1) - if len(format) == 2: - format = int(format[0]), int(format[1]) - else: - format = int(float(format[0])), 0 - - # We need to deal with the original changes blob, as the fields we need - # might not be in the changes dict serialised into the .dak anymore. - orig_changes = parse_deb822(changes['filecontents']) - - # Copy the checksums over to the current changes dict. This will keep - # the existing modifications to it intact. - for field in orig_changes: - if field.startswith('checksums-'): - changes[field] = orig_changes[field] - - # Check for unsupported hashes - rejmsg.extend(check_hash_fields(".changes", changes)) - rejmsg.extend(check_hash_fields(".dsc", dsc)) - - # We have to calculate the hash if we have an earlier changes version than - # the hash appears in rather than require it exist in the changes file - for hashname, hashfunc, version in known_hashes: - rejmsg.extend(_ensure_changes_hash(changes, format, version, files, - hashname, hashfunc)) - if "source" in changes["architecture"]: - rejmsg.extend(_ensure_dsc_hash(dsc, dsc_files, hashname, - hashfunc)) - - return rejmsg - def parse_checksums(where, files, manifest, hashname): rejmsg = [] field = 'checksums-%s' % hashname @@ -429,7 +493,12 @@ def parse_checksums(where, files, manifest, hashname): for line in manifest[field].split('\n'): if not line: break - checksum, size, checkfile = line.strip().split(' ') + clist = line.strip().split(' ') + if len(clist) == 3: + checksum, size, checkfile = clist + else: + rejmsg.append("Cannot parse checksum line [%s]" % (line)) + continue if not files.has_key(checkfile): # TODO: check for the file's entry in the original files dict, not # the one modified by (auto)byhand and other weird stuff @@ -458,30 +527,9 @@ def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): if not changes.has_key(field): raise NoFilesFieldError - # Make sure we recognise the format of the Files: field - format = re_verwithext.search(changes.get("format", "0.0")) - if not format: - raise UnknownFormatError, "%s" % (changes.get("format","0.0")) - - format = format.groups() - if format[1] == None: - format = int(float(format[0])), 0, format[2] - else: - format = int(format[0]), int(format[1]), format[2] - if format[2] == None: - format = format[:2] - - if is_a_dsc: - # format = (1,0) are the only formats we currently accept, - # format = (0,0) are missing format headers of which we still - # have some in the archive. - if format != (1,0) and format != (0,0): - raise UnknownFormatError, "%s" % (changes.get("format","0.0")) - else: - if (format < (1,5) or format > (1,8)): - raise UnknownFormatError, "%s" % (changes.get("format","0.0")) - if field != "files" and format < (1,8): - raise UnknownFormatError, "%s" % (changes.get("format","0.0")) + # Get SourceFormat object for this Format and validate it + format = get_format_from_string(changes['format']) + format.validate_format(is_a_dsc=is_a_dsc, field=field) includes_section = (not is_a_dsc) and field == "files" @@ -701,22 +749,12 @@ def which_alias_file(): ################################################################################ -# Escape characters which have meaning to SQL's regex comparison operator ('~') -# (woefully incomplete) - -def regex_safe (s): - s = s.replace('+', '\\\\+') - s = s.replace('.', '\\\\.') - return s - -################################################################################ - def TemplateSubst(map, filename): """ Perform a substition of template """ templatefile = open_file(filename) template = templatefile.read() for x in map.keys(): - template = template.replace(x,map[x]) + template = template.replace(x, str(map[x])) templatefile.close() return template @@ -736,6 +774,9 @@ def warn(msg): def whoami (): return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '') +def getusername (): + return pwd.getpwuid(os.getuid())[0] + ################################################################################ def size_type (c): @@ -920,8 +961,8 @@ def parse_args(Options): suite_ids_list = [] for suitename in split_args(Options["Suite"]): suite = get_suite(suitename, session=session) - if suite_id is None: - warn("suite '%s' not recognised." % (suitename)) + if suite.suite_id is None: + warn("suite '%s' not recognised." % (suite.suite_name)) else: suite_ids_list.append(suite.suite_id) if suite_ids_list: @@ -1198,7 +1239,7 @@ def gpg_keyring_args(keyrings=None): ################################################################################ -def check_signature (sig_filename, reject, data_filename="", keyrings=None, autofetch=None): +def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None): """ Check the signature of a file and return the fingerprint if the signature is valid or 'None' if it's not. The first argument is the @@ -1214,14 +1255,16 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto used. """ + rejects = [] + # Ensure the filename contains no shell meta-characters or other badness if not re_taint_free.match(sig_filename): - reject("!!WARNING!! tainted signature filename: '%s'." % (sig_filename)) - return None + rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename)) + return (None, rejects) if data_filename and not re_taint_free.match(data_filename): - reject("!!WARNING!! tainted data filename: '%s'." % (data_filename)) - return None + rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename)) + return (None, rejects) if not keyrings: keyrings = Cnf.ValueList("Dinstall::GPGKeyring") @@ -1232,8 +1275,8 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto if autofetch: error_msg = retrieve_key(sig_filename) if error_msg: - reject(error_msg) - return None + rejects.append(error_msg) + return (None, rejects) # Build the command line status_read, status_write = os.pipe() @@ -1248,40 +1291,32 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto # If we failed to parse the status-fd output, let's just whine and bail now if internal_error: - reject("internal error while performing signature check on %s." % (sig_filename)) - reject(internal_error, "") - reject("Please report the above errors to the Archive maintainers by replying to this mail.", "") - return None + rejects.append("internal error while performing signature check on %s." % (sig_filename)) + rejects.append(internal_error, "") + rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "") + return (None, rejects) - bad = "" # Now check for obviously bad things in the processed output if keywords.has_key("KEYREVOKED"): - reject("The key used to sign %s has been revoked." % (sig_filename)) - bad = 1 + rejects.append("The key used to sign %s has been revoked." % (sig_filename)) if keywords.has_key("BADSIG"): - reject("bad signature on %s." % (sig_filename)) - bad = 1 + rejects.append("bad signature on %s." % (sig_filename)) if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"): - reject("failed to check signature on %s." % (sig_filename)) - bad = 1 + rejects.append("failed to check signature on %s." % (sig_filename)) if keywords.has_key("NO_PUBKEY"): args = keywords["NO_PUBKEY"] if len(args) >= 1: key = args[0] - reject("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename)) - bad = 1 + rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename)) if keywords.has_key("BADARMOR"): - reject("ASCII armour of signature was corrupt in %s." % (sig_filename)) - bad = 1 + rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename)) if keywords.has_key("NODATA"): - reject("no signature found in %s." % (sig_filename)) - bad = 1 + rejects.append("no signature found in %s." % (sig_filename)) if keywords.has_key("EXPKEYSIG"): args = keywords["EXPKEYSIG"] if len(args) >= 1: key = args[0] - reject("Signature made by expired key 0x%s" % (key)) - bad = 1 + rejects.append("Signature made by expired key 0x%s" % (key)) if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"): args = keywords["KEYEXPIRED"] expiredate="" @@ -1294,38 +1329,33 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto expiredate = "unknown (%s)" % (timestamp) else: expiredate = timestamp - reject("The key used to sign %s has expired on %s" % (sig_filename, expiredate)) - bad = 1 + rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate)) - if bad: - return None + if len(rejects) > 0: + return (None, rejects) # Next check gpgv exited with a zero return code if exit_status: - reject("gpgv failed while checking %s." % (sig_filename)) + rejects.append("gpgv failed while checking %s." % (sig_filename)) if status.strip(): - reject(prefix_multi_line_string(status, " [GPG status-fd output:] "), "") + rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "), "") else: - reject(prefix_multi_line_string(output, " [GPG output:] "), "") - return None + rejects.append(prefix_multi_line_string(output, " [GPG output:] "), "") + return (None, rejects) # Sanity check the good stuff we expect if not keywords.has_key("VALIDSIG"): - reject("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename)) - bad = 1 + rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename)) else: args = keywords["VALIDSIG"] if len(args) < 1: - reject("internal error while checking signature on %s." % (sig_filename)) - bad = 1 + rejects.append("internal error while checking signature on %s." % (sig_filename)) else: fingerprint = args[0] if not keywords.has_key("GOODSIG"): - reject("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename)) - bad = 1 + rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename)) if not keywords.has_key("SIG_ID"): - reject("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename)) - bad = 1 + rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename)) # Finally ensure there's not something we don't recognise known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="", @@ -1334,13 +1364,12 @@ def check_signature (sig_filename, reject, data_filename="", keyrings=None, auto for keyword in keywords.keys(): if not known_keywords.has_key(keyword): - reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename)) - bad = 1 + rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename)) - if bad: - return None + if len(rejects) > 0: + return (None, rejects) else: - return fingerprint + return (fingerprint, []) ################################################################################ @@ -1479,3 +1508,48 @@ if which_conf_file() != default_config: apt_pkg.ReadConfigFileISC(Cnf,which_conf_file()) ############################################################################### + +def ensure_orig_files(changes, dest_dir, session): + """ + Ensure that dest_dir contains all the orig tarballs for the specified + changes. If it does not, symlink them into place. + + Returns a 2-tuple (already_exists, symlinked) containing a list of files + that were already there and a list of files that were symlinked into place. + """ + + exists, symlinked = [], [] + + for dsc_file in changes.dsc_files: + + # Skip all files that are not orig tarballs + if not re_is_orig_source.match(dsc_file): + continue + + # Skip orig files not identified in the pool + if not (dsc_file in changes.orig_files and + 'id' in changes.orig_files[dsc_file]): + continue + + dest = os.path.join(dest_dir, dsc_file) + + if os.path.exists(dest): + exists.append(dest) + continue + + orig_file_id = changes.orig_files[dsc_file]['id'] + + c = session.execute( + 'SELECT l.path, f.filename FROM location l, files f WHERE f.id = :id and f.location = l.id', + {'id': orig_file_id} + ) + + res = c.fetchone() + if not res: + return "[INTERNAL ERROR] Couldn't find id %s in files table." % orig_file_id + + src = os.path.join(res[0], res[1]) + os.symlink(src, dest) + symlinked.append(dest) + + return (exists, symlinked)