X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=utils.py;fp=utils.py;h=1ad6a4e7762dcc6888b5679a732dae8eca2f566c;hb=f2f237c936b5d5727c2b70be85cdacd2903c05b2;hp=8445fd4d664289f1f6f9ad91335626546a27d2b5;hpb=3686a00f1001f2d5692fa5e706b898053e39191a;p=dak.git diff --git a/utils.py b/utils.py index 8445fd4d..1ad6a4e7 100644 --- a/utils.py +++ b/utils.py @@ -2,7 +2,7 @@ # Utility functions # Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup -# $Id: utils.py,v 1.65 2004-04-01 17:13:10 troup Exp $ +# $Id: utils.py,v 1.66 2004-04-03 02:49:46 troup Exp $ ################################################################################ @@ -22,7 +22,9 @@ ################################################################################ -import commands, os, pwd, re, select, socket, shutil, string, sys, tempfile, traceback; +import commands, encodings.ascii, encodings.utf_8, encodings.latin_1, \ + email.Header, os, pwd, re, select, socket, shutil, string, sys, \ + tempfile, traceback; import apt_pkg; import db_access; @@ -40,7 +42,7 @@ re_single_line_field = re.compile(r"^(\S*)\s*:\s*(.*)"); re_multi_line_field = re.compile(r"^\s(.*)"); re_taint_free = re.compile(r"^[-+~\.\w]+$"); -re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\> \t]+)\>"); +re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>"); changes_parse_error_exc = "Can't parse line in .changes file"; invalid_dsc_format_exc = "Invalid .dsc file"; @@ -58,6 +60,23 @@ default_apt_config = "/etc/katie/apt.conf"; ################################################################################ +class Error(Exception): + """Base class for exceptions in this module.""" + pass; + +class ParseMaintError(Error): + """Exception raised for errors in parsing a maintainer field. + + Attributes: + message -- explanation of the error + """ + + def __init__(self, message): + self.args = message,; + self.message = message; + +################################################################################ + def open_file(filename, mode='r'): try: f = open(filename, mode); @@ -262,24 +281,80 @@ def build_file_list(changes, is_a_dsc=0): ################################################################################ -# Fix the `Maintainer:' field to be an RFC822 compatible address. -# cf. Debian Policy Manual (D.2.4) -# -# 06:28| 'The standard sucks, but my tool is supposed to -# interoperate with it. I know - I'll fix the suckage -# and make things incompatible!' +def force_to_utf8(s): + """Forces a string to UTF-8. If the string isn't already UTF-8, +it's assumed to be ISO-8859-1.""" + try: + unicode(s, 'utf-8'); + return s; + except UnicodeError: + latin1_s = unicode(s,'iso8859-1'); + return latin1_s.encode('utf-8'); + +def rfc2047_encode(s): + """Encodes a (header) string per RFC2047 if necessary. If the +string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.""" + try: + encodings.ascii.Codec().decode(s); + return s; + except UnicodeError: + pass; + try: + encodings.utf_8.Codec().decode(s); + h = email.Header.Header(s, 'utf-8', 998); + return str(h); + except UnicodeError: + h = email.Header.Header(s, 'iso-8859-1', 998); + return str(h); + +################################################################################ + +# 'The standard sucks, but my tool is supposed to interoperate +# with it. I know - I'll fix the suckage and make things +# incompatible!' def fix_maintainer (maintainer): - m = re_parse_maintainer.match(maintainer); - rfc822 = maintainer; - name = ""; - email = ""; - if m and len(m.groups()) == 2: + """Parses a Maintainer or Changed-By field and returns: + (1) an RFC822 compatible version, + (2) an RFC2047 compatible version, + (3) the name + (4) the email + +The name is forced to UTF-8 for both (1) and (3). If the name field +contains '.' or ',' (as allowed by Debian policy), (1) and (2) are +switched to 'email (name)' format.""" + maintainer = maintainer.strip() + if not maintainer: + return ('', '', '', ''); + + if maintainer.find("<") == -1 or (maintainer[0] == "<" and \ + maintainer[-1:] == ">"): + email = maintainer; + name = ""; + else: + m = re_parse_maintainer.match(maintainer); + if not m: + raise ParseMaintError, "Doesn't parse as a valid Maintainer field." name = m.group(1); email = m.group(2); - if name.find(',') != -1 or name.find('.') != -1: - rfc822 = "%s (%s)" % (email, name); - return (rfc822, name, email) + + # Get an RFC2047 compliant version of the name + rfc2047_name = rfc2047_encode(name); + + # Force the name to be UTF-8 + name = force_to_utf8(name); + + if name.find(',') != -1 or name.find('.') != -1: + rfc822_maint = "%s (%s)" % (email, name); + rfc2047_maint = "%s (%s)" % (email, rfc2047_name); + else: + rfc822_maint = "%s <%s>" % (name, email); + rfc2047_maint = "%s <%s>" % (rfc2047_name, email); + + if email.find("@") == -1 and email.find("buildd_") != 0: + raise ParseMaintError, "No @ found in email address part." + + return (rfc822_maint, rfc2047_maint, name, email); ################################################################################