# Utility functions
# Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup <james@nocrew.org>
-# $Id: utils.py,v 1.65 2004-04-01 17:13:10 troup Exp $
+# $Id: utils.py,v 1.66 2004-04-03 02:49:46 troup Exp $
################################################################################
################################################################################
-import commands, os, pwd, re, select, socket, shutil, string, sys, tempfile, traceback;
+import commands, encodings.ascii, encodings.utf_8, encodings.latin_1, \
+ email.Header, os, pwd, re, select, socket, shutil, string, sys, \
+ tempfile, traceback;
import apt_pkg;
import db_access;
re_multi_line_field = re.compile(r"^\s(.*)");
re_taint_free = re.compile(r"^[-+~\.\w]+$");
-re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\> \t]+)\>");
+re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>");
changes_parse_error_exc = "Can't parse line in .changes file";
invalid_dsc_format_exc = "Invalid .dsc file";
################################################################################
+class Error(Exception):
+ """Base class for exceptions in this module."""
+ pass;
+
+class ParseMaintError(Error):
+ """Exception raised for errors in parsing a maintainer field.
+
+ Attributes:
+ message -- explanation of the error
+ """
+
+ def __init__(self, message):
+ self.args = message,;
+ self.message = message;
+
+################################################################################
+
def open_file(filename, mode='r'):
try:
f = open(filename, mode);
################################################################################
-# Fix the `Maintainer:' field to be an RFC822 compatible address.
-# cf. Debian Policy Manual (D.2.4)
-#
-# 06:28|<Culus> 'The standard sucks, but my tool is supposed to
-# interoperate with it. I know - I'll fix the suckage
-# and make things incompatible!'
+def force_to_utf8(s):
+ """Forces a string to UTF-8. If the string isn't already UTF-8,
+it's assumed to be ISO-8859-1."""
+ try:
+ unicode(s, 'utf-8');
+ return s;
+ except UnicodeError:
+ latin1_s = unicode(s,'iso8859-1');
+ return latin1_s.encode('utf-8');
+
+def rfc2047_encode(s):
+ """Encodes a (header) string per RFC2047 if necessary. If the
+string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1."""
+ try:
+ encodings.ascii.Codec().decode(s);
+ return s;
+ except UnicodeError:
+ pass;
+ try:
+ encodings.utf_8.Codec().decode(s);
+ h = email.Header.Header(s, 'utf-8', 998);
+ return str(h);
+ except UnicodeError:
+ h = email.Header.Header(s, 'iso-8859-1', 998);
+ return str(h);
+
+################################################################################
+
+# <Culus> 'The standard sucks, but my tool is supposed to interoperate
+# with it. I know - I'll fix the suckage and make things
+# incompatible!'
def fix_maintainer (maintainer):
- m = re_parse_maintainer.match(maintainer);
- rfc822 = maintainer;
- name = "";
- email = "";
- if m and len(m.groups()) == 2:
+ """Parses a Maintainer or Changed-By field and returns:
+ (1) an RFC822 compatible version,
+ (2) an RFC2047 compatible version,
+ (3) the name
+ (4) the email
+
+The name is forced to UTF-8 for both (1) and (3). If the name field
+contains '.' or ',' (as allowed by Debian policy), (1) and (2) are
+switched to 'email (name)' format."""
+ maintainer = maintainer.strip()
+ if not maintainer:
+ return ('', '', '', '');
+
+ if maintainer.find("<") == -1 or (maintainer[0] == "<" and \
+ maintainer[-1:] == ">"):
+ email = maintainer;
+ name = "";
+ else:
+ m = re_parse_maintainer.match(maintainer);
+ if not m:
+ raise ParseMaintError, "Doesn't parse as a valid Maintainer field."
name = m.group(1);
email = m.group(2);
- if name.find(',') != -1 or name.find('.') != -1:
- rfc822 = "%s (%s)" % (email, name);
- return (rfc822, name, email)
+
+ # Get an RFC2047 compliant version of the name
+ rfc2047_name = rfc2047_encode(name);
+
+ # Force the name to be UTF-8
+ name = force_to_utf8(name);
+
+ if name.find(',') != -1 or name.find('.') != -1:
+ rfc822_maint = "%s (%s)" % (email, name);
+ rfc2047_maint = "%s (%s)" % (email, rfc2047_name);
+ else:
+ rfc822_maint = "%s <%s>" % (name, email);
+ rfc2047_maint = "%s <%s>" % (rfc2047_name, email);
+
+ if email.find("@") == -1 and email.find("buildd_") != 0:
+ raise ParseMaintError, "No @ found in email address part."
+
+ return (rfc822_maint, rfc2047_maint, name, email);
################################################################################