From: Mark Hymers Date: Sat, 23 May 2009 23:03:13 +0000 (+0100) Subject: move fix_maintainer support routines to textutils X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=10fc3b5f4ce1f302ec86e8a2c1584fc9f7bb5f04;p=dak.git move fix_maintainer support routines to textutils Signed-off-by: Mark Hymers --- diff --git a/dak/make_maintainers.py b/dak/make_maintainers.py index 4e2fe244..b74a1144 100755 --- a/dak/make_maintainers.py +++ b/dak/make_maintainers.py @@ -35,6 +35,7 @@ import sys import apt_pkg from daklib import database from daklib import utils +from daklib import textutils from daklib.regexes import re_comments ################################################################################ @@ -43,7 +44,7 @@ Cnf = None #: Configuration, apt_pkg.Configuration projectB = None #: database connection, pgobject maintainer_from_source_cache = {} #: caches the maintainer name per source_id packages = {} #: packages data to write out -fixed_maintainer_cache = {} #: caches fixed ( L{daklib.utils.fix_maintainer} ) maintainer data +fixed_maintainer_cache = {} #: caches fixed ( L{daklib.textutils.fix_maintainer} ) maintainer data ################################################################################ @@ -62,7 +63,7 @@ def fix_maintainer (maintainer): Fixup maintainer entry, cache the result. @type maintainer: string - @param maintainer: A maintainer entry as passed to L{daklib.utils.fix_maintainer} + @param maintainer: A maintainer entry as passed to L{daklib.textutils.fix_maintainer} @rtype: tuple @returns: fixed maintainer tuple @@ -70,7 +71,7 @@ def fix_maintainer (maintainer): global fixed_maintainer_cache if not fixed_maintainer_cache.has_key(maintainer): - fixed_maintainer_cache[maintainer] = utils.fix_maintainer(maintainer)[0] + fixed_maintainer_cache[maintainer] = textutils.fix_maintainer(maintainer)[0] return fixed_maintainer_cache[maintainer] diff --git a/dak/queue_report.py b/dak/queue_report.py index a4bcea0f..a9f027ce 100755 --- a/dak/queue_report.py +++ b/dak/queue_report.py @@ -40,6 +40,7 @@ import cgi from daklib import queue from daklib import database from daklib import utils +from daklib.textutils import fix_maintainer from daklib.dak_exceptions import * Cnf = None @@ -365,7 +366,7 @@ def process_changes_files(changes_files, type, log): try: (maintainer["maintainer822"], maintainer["maintainer2047"], maintainer["maintainername"], maintainer["maintaineremail"]) = \ - utils.fix_maintainer (j["maintainer"]) + fix_maintainer (j["maintainer"]) except ParseMaintError, msg: print "Problems while parsing maintainer address\n" maintainer["maintainername"] = "Unknown" @@ -375,7 +376,7 @@ def process_changes_files(changes_files, type, log): try: (changeby["changedby822"], changeby["changedby2047"], changeby["changedbyname"], changeby["changedbyemail"]) = \ - utils.fix_maintainer (j["changed-by"]) + fix_maintainer (j["changed-by"]) except ParseMaintError, msg: (changeby["changedby822"], changeby["changedby2047"], changeby["changedbyname"], changeby["changedbyemail"]) = \ diff --git a/dak/test/006/test.py b/dak/test/006/test.py index b7594bcf..51a33170 100755 --- a/dak/test/006/test.py +++ b/dak/test/006/test.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Test utils.fix_maintainer() +# Test textutils.fix_maintainer() # Copyright (C) 2004, 2006 James Troup # This program is free software; you can redistribute it and/or modify @@ -24,7 +24,7 @@ import os, sys sys.path.append(os.path.abspath('../../')) -import utils +import textutils ################################################################################ @@ -35,7 +35,7 @@ def fail(message): ################################################################################ def check_valid(s, xa, xb, xc, xd): - (a, b, c, d) = utils.fix_maintainer(s) + (a, b, c, d) = textutils.fix_maintainer(s) if a != xa: fail("rfc822_maint: %s (returned) != %s (expected [From: '%s']" % (a, xa, s)) if b != xb: @@ -47,7 +47,7 @@ def check_valid(s, xa, xb, xc, xd): def check_invalid(s): try: - utils.fix_maintainer(s) + textutils.fix_maintainer(s) fail("%s was parsed successfully but is expected to be invalid." % (s)) except utils.ParseMaintError, unused: pass diff --git a/daklib/textutils.py b/daklib/textutils.py new file mode 100755 index 00000000..97d09c41 --- /dev/null +++ b/daklib/textutils.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# vim:set et ts=4 sw=4: + +"""Text utility functions + +@contact: Debian FTP Master +@copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import codecs +import email.Header + +from dak_exceptions import * +from regexes import re_parse_maintainer + +################################################################################ + +def force_to_utf8(s): + """ + Forces a string to UTF-8. If the string isn't already UTF-8, + it's assumed to be ISO-8859-1. + """ + try: + unicode(s, 'utf-8') + return s + except UnicodeError: + latin1_s = unicode(s,'iso8859-1') + return latin1_s.encode('utf-8') + +def rfc2047_encode(s): + """ + Encodes a (header) string per RFC2047 if necessary. If the + string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1. + """ + try: + codecs.lookup('ascii')[1](s) + return s + except UnicodeError: + pass + try: + codecs.lookup('utf-8')[1](s) + h = email.Header.Header(s, 'utf-8', 998) + return str(h) + except UnicodeError: + h = email.Header.Header(s, 'iso-8859-1', 998) + return str(h) + +################################################################################ + +# 'The standard sucks, but my tool is supposed to interoperate +# with it. I know - I'll fix the suckage and make things +# incompatible!' + +def fix_maintainer(maintainer): + """ + Parses a Maintainer or Changed-By field and returns: + 1. an RFC822 compatible version, + 2. an RFC2047 compatible version, + 3. the name + 4. the email + + The name is forced to UTF-8 for both 1. and 3.. If the name field + contains '.' or ',' (as allowed by Debian policy), 1. and 2. are + switched to 'email (name)' format. + + """ + maintainer = maintainer.strip() + if not maintainer: + return ('', '', '', '') + + if maintainer.find("<") == -1: + email = maintainer + name = "" + elif (maintainer[0] == "<" and maintainer[-1:] == ">"): + email = maintainer[1:-1] + name = "" + else: + m = re_parse_maintainer.match(maintainer) + if not m: + raise ParseMaintError, "Doesn't parse as a valid Maintainer field." + name = m.group(1) + email = m.group(2) + + # Get an RFC2047 compliant version of the name + rfc2047_name = rfc2047_encode(name) + + # Force the name to be UTF-8 + name = force_to_utf8(name) + + if name.find(',') != -1 or name.find('.') != -1: + rfc822_maint = "%s (%s)" % (email, name) + rfc2047_maint = "%s (%s)" % (email, rfc2047_name) + else: + rfc822_maint = "%s <%s>" % (name, email) + rfc2047_maint = "%s <%s>" % (rfc2047_name, email) + + if email.find("@") == -1 and email.find("buildd_") != 0: + raise ParseMaintError, "No @ found in email address part." + + return (rfc822_maint, rfc2047_maint, name, email) + +################################################################################ diff --git a/daklib/utils.py b/daklib/utils.py index 951270b5..548e59b4 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -41,6 +41,7 @@ import re import string import email as modemail from dak_exceptions import * +from textutils import fix_maintainer from regexes import re_html_escaping, html_escaping, re_single_line_field, \ re_multi_line_field, re_srchasver, re_verwithext, \ re_parse_maintainer, re_taint_free, re_gpg_uid, re_re_mark, \ @@ -512,92 +513,6 @@ def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): ################################################################################ -def force_to_utf8(s): - """ - Forces a string to UTF-8. If the string isn't already UTF-8, - it's assumed to be ISO-8859-1. - """ - try: - unicode(s, 'utf-8') - return s - except UnicodeError: - latin1_s = unicode(s,'iso8859-1') - return latin1_s.encode('utf-8') - -def rfc2047_encode(s): - """ - Encodes a (header) string per RFC2047 if necessary. If the - string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1. - """ - try: - codecs.lookup('ascii')[1](s) - return s - except UnicodeError: - pass - try: - codecs.lookup('utf-8')[1](s) - h = email.Header.Header(s, 'utf-8', 998) - return str(h) - except UnicodeError: - h = email.Header.Header(s, 'iso-8859-1', 998) - return str(h) - -################################################################################ - -# 'The standard sucks, but my tool is supposed to interoperate -# with it. I know - I'll fix the suckage and make things -# incompatible!' - -def fix_maintainer (maintainer): - """ - Parses a Maintainer or Changed-By field and returns: - 1. an RFC822 compatible version, - 2. an RFC2047 compatible version, - 3. the name - 4. the email - - The name is forced to UTF-8 for both 1. and 3.. If the name field - contains '.' or ',' (as allowed by Debian policy), 1. and 2. are - switched to 'email (name)' format. - - """ - maintainer = maintainer.strip() - if not maintainer: - return ('', '', '', '') - - if maintainer.find("<") == -1: - email = maintainer - name = "" - elif (maintainer[0] == "<" and maintainer[-1:] == ">"): - email = maintainer[1:-1] - name = "" - else: - m = re_parse_maintainer.match(maintainer) - if not m: - raise ParseMaintError, "Doesn't parse as a valid Maintainer field." - name = m.group(1) - email = m.group(2) - - # Get an RFC2047 compliant version of the name - rfc2047_name = rfc2047_encode(name) - - # Force the name to be UTF-8 - name = force_to_utf8(name) - - if name.find(',') != -1 or name.find('.') != -1: - rfc822_maint = "%s (%s)" % (email, name) - rfc2047_maint = "%s (%s)" % (email, rfc2047_name) - else: - rfc822_maint = "%s <%s>" % (name, email) - rfc2047_maint = "%s <%s>" % (rfc2047_name, email) - - if email.find("@") == -1 and email.find("buildd_") != 0: - raise ParseMaintError, "No @ found in email address part." - - return (rfc822_maint, rfc2047_maint, name, email) - -################################################################################ - def send_mail (message, filename=""): """sendmail wrapper, takes _either_ a message string or a file as arguments"""