daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import grp
  31 import select
  32 import socket
  33 import shutil
  34 import sys
  35 import tempfile
  36 import traceback
  37 import stat
  38 import apt_inst
  39 import apt_pkg
  40 import time
  41 import re
  42 import email as modemail
  43 import subprocess
  44 import ldap
  45
  46 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  47                    get_override_type, Keyring, session_wrapper, \
  48                    get_active_keyring_paths, get_primary_keyring_path, \
  49                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  50                    Component, Override, OverrideType
  51 from sqlalchemy import desc
  52 from dak_exceptions import *
  53 from gpg import SignedFile
  54 from textutils import fix_maintainer
  55 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  56                     re_multi_line_field, re_srchasver, re_taint_free, \
  57                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  58                     re_is_orig_source, re_build_dep_arch
  59
  60 from formats import parse_format, validate_changes_format
  61 from srcformats import get_format_from_string
  62 from collections import defaultdict
  63
  64 ################################################################################
  65
  66 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  67 default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
  68
  69 alias_cache = None        #: Cache for email alias checks
  70 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  71
  72 # (hashname, function, earliest_changes_version)
  73 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  74                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  75
  76 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  77 # code in lenny's Python. This also affects commands.getoutput and
  78 # commands.getstatus.
  79 def dak_getstatusoutput(cmd):
  80     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  81         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  82
  83     output = pipe.stdout.read()
  84
  85     pipe.wait()
  86
  87     if output[-1:] == '\n':
  88         output = output[:-1]
  89
  90     ret = pipe.wait()
  91     if ret is None:
  92         ret = 0
  93
  94     return ret, output
  95 commands.getstatusoutput = dak_getstatusoutput
  96
  97 ################################################################################
  98
  99 def html_escape(s):
 100     """ Escape html chars """
 101     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 102
 103 ################################################################################
 104
 105 def open_file(filename, mode='r'):
 106     """
 107     Open C{file}, return fileobject.
 108
 109     @type filename: string
 110     @param filename: path/filename to open
 111
 112     @type mode: string
 113     @param mode: open mode
 114
 115     @rtype: fileobject
 116     @return: open fileobject
 117
 118     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 119
 120     """
 121     try:
 122         f = open(filename, mode)
 123     except IOError:
 124         raise CantOpenError(filename)
 125     return f
 126
 127 ################################################################################
 128
 129 def our_raw_input(prompt=""):
 130     if prompt:
 131         while 1:
 132             try:
 133                 sys.stdout.write(prompt)
 134                 break
 135             except IOError:
 136                 pass
 137     sys.stdout.flush()
 138     try:
 139         ret = raw_input()
 140         return ret
 141     except EOFError:
 142         sys.stderr.write("\nUser interrupt (^D).\n")
 143         raise SystemExit
 144
 145 ################################################################################
 146
 147 def extract_component_from_section(section, session=None):
 148     component = ""
 149
 150     if section.find('/') != -1:
 151         component = section.split('/')[0]
 152
 153     # Expand default component
 154     if component == "":
 155         comp = get_component(section, session)
 156         if comp is None:
 157             component = "main"
 158         else:
 159             component = comp.component_name
 160
 161     return (section, component)
 162
 163 ################################################################################
 164
 165 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 166     require_signature = True
 167     if keyrings == None:
 168         keyrings = []
 169         require_signature = False
 170
 171     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 172     contents = signed_file.contents
 173
 174     error = ""
 175     changes = {}
 176
 177     # Split the lines in the input, keeping the linebreaks.
 178     lines = contents.splitlines(True)
 179
 180     if len(lines) == 0:
 181         raise ParseChangesError("[Empty changes file]")
 182
 183     # Reindex by line number so we can easily verify the format of
 184     # .dsc files...
 185     index = 0
 186     indexed_lines = {}
 187     for line in lines:
 188         index += 1
 189         indexed_lines[index] = line[:-1]
 190
 191     num_of_lines = len(indexed_lines.keys())
 192     index = 0
 193     first = -1
 194     while index < num_of_lines:
 195         index += 1
 196         line = indexed_lines[index]
 197         if line == "" and signing_rules == 1:
 198             if index != num_of_lines:
 199                 raise InvalidDscError(index)
 200             break
 201         slf = re_single_line_field.match(line)
 202         if slf:
 203             field = slf.groups()[0].lower()
 204             changes[field] = slf.groups()[1]
 205             first = 1
 206             continue
 207         if line == " .":
 208             changes[field] += '\n'
 209             continue
 210         mlf = re_multi_line_field.match(line)
 211         if mlf:
 212             if first == -1:
 213                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 214             if first == 1 and changes[field] != "":
 215                 changes[field] += '\n'
 216             first = 0
 217             changes[field] += mlf.groups()[0] + '\n'
 218             continue
 219         error += line
 220
 221     changes["filecontents"] = armored_contents
 222
 223     if changes.has_key("source"):
 224         # Strip the source version in brackets from the source field,
 225         # put it in the "source-version" field instead.
 226         srcver = re_srchasver.search(changes["source"])
 227         if srcver:
 228             changes["source"] = srcver.group(1)
 229             changes["source-version"] = srcver.group(2)
 230
 231     if error:
 232         raise ParseChangesError(error)
 233
 234     return changes
 235
 236 ################################################################################
 237
 238 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 239     """
 240     Parses a changes file and returns a dictionary where each field is a
 241     key.  The mandatory first argument is the filename of the .changes
 242     file.
 243
 244     signing_rules is an optional argument:
 245
 246       - If signing_rules == -1, no signature is required.
 247       - If signing_rules == 0 (the default), a signature is required.
 248       - If signing_rules == 1, it turns on the same strict format checking
 249         as dpkg-source.
 250
 251     The rules for (signing_rules == 1)-mode are:
 252
 253       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 254         followed by any PGP header data and must end with a blank line.
 255
 256       - The data section must end with a blank line and must be followed by
 257         "-----BEGIN PGP SIGNATURE-----".
 258     """
 259
 260     changes_in = open_file(filename)
 261     content = changes_in.read()
 262     changes_in.close()
 263     try:
 264         unicode(content, 'utf-8')
 265     except UnicodeError:
 266         raise ChangesUnicodeError("Changes file not proper utf-8")
 267     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 268
 269
 270     if not dsc_file:
 271         # Finally ensure that everything needed for .changes is there
 272         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 273                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 274
 275         missingfields=[]
 276         for keyword in must_keywords:
 277             if not changes.has_key(keyword.lower()):
 278                 missingfields.append(keyword)
 279
 280                 if len(missingfields):
 281                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 282
 283     return changes
 284
 285 ################################################################################
 286
 287 def hash_key(hashname):
 288     return '%ssum' % hashname
 289
 290 ################################################################################
 291
 292 def create_hash(where, files, hashname, hashfunc):
 293     """
 294     create_hash extends the passed files dict with the given hash by
 295     iterating over all files on disk and passing them to the hashing
 296     function given.
 297     """
 298
 299     rejmsg = []
 300     for f in files.keys():
 301         try:
 302             file_handle = open_file(f)
 303         except CantOpenError:
 304             rejmsg.append("Could not open file %s for checksumming" % (f))
 305             continue
 306
 307         files[f][hash_key(hashname)] = hashfunc(file_handle)
 308
 309         file_handle.close()
 310     return rejmsg
 311
 312 ################################################################################
 313
 314 def check_hash(where, files, hashname, hashfunc):
 315     """
 316     check_hash checks the given hash in the files dict against the actual
 317     files on disk.  The hash values need to be present consistently in
 318     all file entries.  It does not modify its input in any way.
 319     """
 320
 321     rejmsg = []
 322     for f in files.keys():
 323         file_handle = None
 324         try:
 325             try:
 326                 file_handle = open_file(f)
 327
 328                 # Check for the hash entry, to not trigger a KeyError.
 329                 if not files[f].has_key(hash_key(hashname)):
 330                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 331                         where))
 332                     continue
 333
 334                 # Actually check the hash for correctness.
 335                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 336                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 337                         where))
 338             except CantOpenError:
 339                 # TODO: This happens when the file is in the pool.
 340                 # warn("Cannot open file %s" % f)
 341                 continue
 342         finally:
 343             if file_handle:
 344                 file_handle.close()
 345     return rejmsg
 346
 347 ################################################################################
 348
 349 def check_size(where, files):
 350     """
 351     check_size checks the file sizes in the passed files dict against the
 352     files on disk.
 353     """
 354
 355     rejmsg = []
 356     for f in files.keys():
 357         try:
 358             entry = os.stat(f)
 359         except OSError as exc:
 360             if exc.errno == 2:
 361                 # TODO: This happens when the file is in the pool.
 362                 continue
 363             raise
 364
 365         actual_size = entry[stat.ST_SIZE]
 366         size = int(files[f]["size"])
 367         if size != actual_size:
 368             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 369                    % (f, actual_size, size, where))
 370     return rejmsg
 371
 372 ################################################################################
 373
 374 def check_dsc_files(dsc_filename, dsc, dsc_files):
 375     """
 376     Verify that the files listed in the Files field of the .dsc are
 377     those expected given the announced Format.
 378
 379     @type dsc_filename: string
 380     @param dsc_filename: path of .dsc file
 381
 382     @type dsc: dict
 383     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 384
 385     @type dsc_files: dict
 386     @param dsc_files: the file list returned by C{build_file_list()}
 387
 388     @rtype: list
 389     @return: all errors detected
 390     """
 391     rejmsg = []
 392
 393     # Ensure .dsc lists proper set of source files according to the format
 394     # announced
 395     has = defaultdict(lambda: 0)
 396
 397     ftype_lookup = (
 398         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 399         (r'diff.gz',                   ('debian_diff',)),
 400         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 401         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 402         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 403         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 404         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 405     )
 406
 407     for f in dsc_files:
 408         m = re_issource.match(f)
 409         if not m:
 410             rejmsg.append("%s: %s in Files field not recognised as source."
 411                           % (dsc_filename, f))
 412             continue
 413
 414         # Populate 'has' dictionary by resolving keys in lookup table
 415         matched = False
 416         for regex, keys in ftype_lookup:
 417             if re.match(regex, m.group(3)):
 418                 matched = True
 419                 for key in keys:
 420                     has[key] += 1
 421                 break
 422
 423         # File does not match anything in lookup table; reject
 424         if not matched:
 425             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 426
 427     # Check for multiple files
 428     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 429         if has[file_type] > 1:
 430             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 431
 432     # Source format specific tests
 433     try:
 434         format = get_format_from_string(dsc['format'])
 435         rejmsg.extend([
 436             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 437         ])
 438
 439     except UnknownFormatError:
 440         # Not an error here for now
 441         pass
 442
 443     return rejmsg
 444
 445 ################################################################################
 446
 447 def check_hash_fields(what, manifest):
 448     """
 449     check_hash_fields ensures that there are no checksum fields in the
 450     given dict that we do not know about.
 451     """
 452
 453     rejmsg = []
 454     hashes = map(lambda x: x[0], known_hashes)
 455     for field in manifest:
 456         if field.startswith("checksums-"):
 457             hashname = field.split("-",1)[1]
 458             if hashname not in hashes:
 459                 rejmsg.append("Unsupported checksum field for %s "\
 460                     "in %s" % (hashname, what))
 461     return rejmsg
 462
 463 ################################################################################
 464
 465 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 466     if format >= version:
 467         # The version should contain the specified hash.
 468         func = check_hash
 469
 470         # Import hashes from the changes
 471         rejmsg = parse_checksums(".changes", files, changes, hashname)
 472         if len(rejmsg) > 0:
 473             return rejmsg
 474     else:
 475         # We need to calculate the hash because it can't possibly
 476         # be in the file.
 477         func = create_hash
 478     return func(".changes", files, hashname, hashfunc)
 479
 480 # We could add the orig which might be in the pool to the files dict to
 481 # access the checksums easily.
 482
 483 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 484     """
 485     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 486     in the dsc is correct, i.e. identical to the changes file and if necessary
 487     the pool.  The latter task is delegated to check_hash.
 488     """
 489
 490     rejmsg = []
 491     if not dsc.has_key('Checksums-%s' % (hashname,)):
 492         return rejmsg
 493     # Import hashes from the dsc
 494     parse_checksums(".dsc", dsc_files, dsc, hashname)
 495     # And check it...
 496     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 497     return rejmsg
 498
 499 ################################################################################
 500
 501 def parse_checksums(where, files, manifest, hashname):
 502     rejmsg = []
 503     field = 'checksums-%s' % hashname
 504     if not field in manifest:
 505         return rejmsg
 506     for line in manifest[field].split('\n'):
 507         if not line:
 508             break
 509         clist = line.strip().split(' ')
 510         if len(clist) == 3:
 511             checksum, size, checkfile = clist
 512         else:
 513             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 514             continue
 515         if not files.has_key(checkfile):
 516         # TODO: check for the file's entry in the original files dict, not
 517         # the one modified by (auto)byhand and other weird stuff
 518         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 519         #        (file, hashname, where))
 520             continue
 521         if not files[checkfile]["size"] == size:
 522             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 523                 "in %s" % (checkfile, hashname, where))
 524             continue
 525         files[checkfile][hash_key(hashname)] = checksum
 526     for f in files.keys():
 527         if not files[f].has_key(hash_key(hashname)):
 528             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 529     return rejmsg
 530
 531 ################################################################################
 532
 533 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 534
 535 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 536     files = {}
 537
 538     # Make sure we have a Files: field to parse...
 539     if not changes.has_key(field):
 540         raise NoFilesFieldError
 541
 542     # Validate .changes Format: field
 543     if not is_a_dsc:
 544         validate_changes_format(parse_format(changes['format']), field)
 545
 546     includes_section = (not is_a_dsc) and field == "files"
 547
 548     # Parse each entry/line:
 549     for i in changes[field].split('\n'):
 550         if not i:
 551             break
 552         s = i.split()
 553         section = priority = ""
 554         try:
 555             if includes_section:
 556                 (md5, size, section, priority, name) = s
 557             else:
 558                 (md5, size, name) = s
 559         except ValueError:
 560             raise ParseChangesError(i)
 561
 562         if section == "":
 563             section = "-"
 564         if priority == "":
 565             priority = "-"
 566
 567         (section, component) = extract_component_from_section(section)
 568
 569         files[name] = dict(size=size, section=section,
 570                            priority=priority, component=component)
 571         files[name][hashname] = md5
 572
 573     return files
 574
 575 ################################################################################
 576
 577 # see http://bugs.debian.org/619131
 578 def build_package_list(dsc, session = None):
 579     if not dsc.has_key("package-list"):
 580         return {}
 581
 582     packages = {}
 583
 584     for line in dsc["package-list"].split("\n"):
 585         if not line:
 586             break
 587
 588         fields = line.split()
 589         name = fields[0]
 590         package_type = fields[1]
 591         (section, component) = extract_component_from_section(fields[2])
 592         priority = fields[3]
 593
 594         # Validate type if we have a session
 595         if session and get_override_type(package_type, session) is None:
 596             # Maybe just warn and ignore? exit(1) might be a bit hard...
 597             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 598
 599         if name not in packages or packages[name]["type"] == "dsc":
 600             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 601
 602     return packages
 603
 604 ################################################################################
 605
 606 def send_mail (message, filename="", whitelists=None):
 607     """sendmail wrapper, takes _either_ a message string or a file as arguments
 608
 609     @type  whitelists: list of (str or None)
 610     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 611                        everything, otherwise an address is whitelisted if it is
 612                        included in any of the lists.
 613                        In addition a global whitelist can be specified in
 614                        Dinstall::MailWhiteList.
 615     """
 616
 617     maildir = Cnf.get('Dir::Mail')
 618     if maildir:
 619         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 620         path = find_next_free(path)
 621         fh = open(path, 'w')
 622         print >>fh, message,
 623         fh.close()
 624
 625     # Check whether we're supposed to be sending mail
 626     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 627         return
 628
 629     # If we've been passed a string dump it into a temporary file
 630     if message:
 631         (fd, filename) = tempfile.mkstemp()
 632         os.write (fd, message)
 633         os.close (fd)
 634
 635     if whitelists is None or None in whitelists:
 636         whitelists = []
 637     if Cnf.get('Dinstall::MailWhiteList', ''):
 638         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 639     if len(whitelists) != 0:
 640         message_in = open_file(filename)
 641         message_raw = modemail.message_from_file(message_in)
 642         message_in.close();
 643
 644         whitelist = [];
 645         for path in whitelists:
 646           with open_file(path, 'r') as whitelist_in:
 647             for line in whitelist_in:
 648                 if not re_whitespace_comment.match(line):
 649                     if re_re_mark.match(line):
 650                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 651                     else:
 652                         whitelist.append(re.compile(re.escape(line.strip())))
 653
 654         # Fields to check.
 655         fields = ["To", "Bcc", "Cc"]
 656         for field in fields:
 657             # Check each field
 658             value = message_raw.get(field, None)
 659             if value != None:
 660                 match = [];
 661                 for item in value.split(","):
 662                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 663                     mail_whitelisted = 0
 664                     for wr in whitelist:
 665                         if wr.match(email):
 666                             mail_whitelisted = 1
 667                             break
 668                     if not mail_whitelisted:
 669                         print "Skipping {0} since it's not whitelisted".format(item)
 670                         continue
 671                     match.append(item)
 672
 673                 # Doesn't have any mail in whitelist so remove the header
 674                 if len(match) == 0:
 675                     del message_raw[field]
 676                 else:
 677                     message_raw.replace_header(field, ', '.join(match))
 678
 679         # Change message fields in order if we don't have a To header
 680         if not message_raw.has_key("To"):
 681             fields.reverse()
 682             for field in fields:
 683                 if message_raw.has_key(field):
 684                     message_raw[fields[-1]] = message_raw[field]
 685                     del message_raw[field]
 686                     break
 687             else:
 688                 # Clean up any temporary files
 689                 # and return, as we removed all recipients.
 690                 if message:
 691                     os.unlink (filename);
 692                 return;
 693
 694         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 695         os.write (fd, message_raw.as_string(True));
 696         os.close (fd);
 697
 698     # Invoke sendmail
 699     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 700     if (result != 0):
 701         raise SendmailFailedError(output)
 702
 703     # Clean up any temporary files
 704     if message:
 705         os.unlink (filename)
 706
 707 ################################################################################
 708
 709 def poolify (source, component=None):
 710     if source[:3] == "lib":
 711         return source[:4] + '/' + source + '/'
 712     else:
 713         return source[:1] + '/' + source + '/'
 714
 715 ################################################################################
 716
 717 def move (src, dest, overwrite = 0, perms = 0o664):
 718     if os.path.exists(dest) and os.path.isdir(dest):
 719         dest_dir = dest
 720     else:
 721         dest_dir = os.path.dirname(dest)
 722     if not os.path.exists(dest_dir):
 723         umask = os.umask(00000)
 724         os.makedirs(dest_dir, 0o2775)
 725         os.umask(umask)
 726     #print "Moving %s to %s..." % (src, dest)
 727     if os.path.exists(dest) and os.path.isdir(dest):
 728         dest += '/' + os.path.basename(src)
 729     # Don't overwrite unless forced to
 730     if os.path.exists(dest):
 731         if not overwrite:
 732             fubar("Can't move %s to %s - file already exists." % (src, dest))
 733         else:
 734             if not os.access(dest, os.W_OK):
 735                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 736     shutil.copy2(src, dest)
 737     os.chmod(dest, perms)
 738     os.unlink(src)
 739
 740 def copy (src, dest, overwrite = 0, perms = 0o664):
 741     if os.path.exists(dest) and os.path.isdir(dest):
 742         dest_dir = dest
 743     else:
 744         dest_dir = os.path.dirname(dest)
 745     if not os.path.exists(dest_dir):
 746         umask = os.umask(00000)
 747         os.makedirs(dest_dir, 0o2775)
 748         os.umask(umask)
 749     #print "Copying %s to %s..." % (src, dest)
 750     if os.path.exists(dest) and os.path.isdir(dest):
 751         dest += '/' + os.path.basename(src)
 752     # Don't overwrite unless forced to
 753     if os.path.exists(dest):
 754         if not overwrite:
 755             raise FileExistsError
 756         else:
 757             if not os.access(dest, os.W_OK):
 758                 raise CantOverwriteError
 759     shutil.copy2(src, dest)
 760     os.chmod(dest, perms)
 761
 762 ################################################################################
 763
 764 def where_am_i ():
 765     res = socket.getfqdn()
 766     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 767     if database_hostname:
 768         return database_hostname
 769     else:
 770         return res
 771
 772 def which_conf_file ():
 773     if os.getenv('DAK_CONFIG'):
 774         return os.getenv('DAK_CONFIG')
 775
 776     res = socket.getfqdn()
 777     # In case we allow local config files per user, try if one exists
 778     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 779         homedir = os.getenv("HOME")
 780         confpath = os.path.join(homedir, "/etc/dak.conf")
 781         if os.path.exists(confpath):
 782             apt_pkg.ReadConfigFileISC(Cnf,confpath)
 783
 784     # We are still in here, so there is no local config file or we do
 785     # not allow local files. Do the normal stuff.
 786     if Cnf.get("Config::" + res + "::DakConfig"):
 787         return Cnf["Config::" + res + "::DakConfig"]
 788
 789     return default_config
 790
 791 def which_apt_conf_file ():
 792     res = socket.getfqdn()
 793     # In case we allow local config files per user, try if one exists
 794     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 795         homedir = os.getenv("HOME")
 796         confpath = os.path.join(homedir, "/etc/dak.conf")
 797         if os.path.exists(confpath):
 798             apt_pkg.ReadConfigFileISC(Cnf,default_config)
 799
 800     if Cnf.get("Config::" + res + "::AptConfig"):
 801         return Cnf["Config::" + res + "::AptConfig"]
 802     else:
 803         return default_apt_config
 804
 805 def which_alias_file():
 806     hostname = socket.getfqdn()
 807     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 808     if os.path.exists(aliasfn):
 809         return aliasfn
 810     else:
 811         return None
 812
 813 ################################################################################
 814
 815 def TemplateSubst(subst_map, filename):
 816     """ Perform a substition of template """
 817     templatefile = open_file(filename)
 818     template = templatefile.read()
 819     for k, v in subst_map.iteritems():
 820         template = template.replace(k, str(v))
 821     templatefile.close()
 822     return template
 823
 824 ################################################################################
 825
 826 def fubar(msg, exit_code=1):
 827     sys.stderr.write("E: %s\n" % (msg))
 828     sys.exit(exit_code)
 829
 830 def warn(msg):
 831     sys.stderr.write("W: %s\n" % (msg))
 832
 833 ################################################################################
 834
 835 # Returns the user name with a laughable attempt at rfc822 conformancy
 836 # (read: removing stray periods).
 837 def whoami ():
 838     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 839
 840 def getusername ():
 841     return pwd.getpwuid(os.getuid())[0]
 842
 843 ################################################################################
 844
 845 def size_type (c):
 846     t  = " B"
 847     if c > 10240:
 848         c = c / 1024
 849         t = " KB"
 850     if c > 10240:
 851         c = c / 1024
 852         t = " MB"
 853     return ("%d%s" % (c, t))
 854
 855 ################################################################################
 856
 857 def cc_fix_changes (changes):
 858     o = changes.get("architecture", "")
 859     if o:
 860         del changes["architecture"]
 861     changes["architecture"] = {}
 862     for j in o.split():
 863         changes["architecture"][j] = 1
 864
 865 def changes_compare (a, b):
 866     """ Sort by source name, source version, 'have source', and then by filename """
 867     try:
 868         a_changes = parse_changes(a)
 869     except:
 870         return -1
 871
 872     try:
 873         b_changes = parse_changes(b)
 874     except:
 875         return 1
 876
 877     cc_fix_changes (a_changes)
 878     cc_fix_changes (b_changes)
 879
 880     # Sort by source name
 881     a_source = a_changes.get("source")
 882     b_source = b_changes.get("source")
 883     q = cmp (a_source, b_source)
 884     if q:
 885         return q
 886
 887     # Sort by source version
 888     a_version = a_changes.get("version", "0")
 889     b_version = b_changes.get("version", "0")
 890     q = apt_pkg.version_compare(a_version, b_version)
 891     if q:
 892         return q
 893
 894     # Sort by 'have source'
 895     a_has_source = a_changes["architecture"].get("source")
 896     b_has_source = b_changes["architecture"].get("source")
 897     if a_has_source and not b_has_source:
 898         return -1
 899     elif b_has_source and not a_has_source:
 900         return 1
 901
 902     # Fall back to sort by filename
 903     return cmp(a, b)
 904
 905 ################################################################################
 906
 907 def find_next_free (dest, too_many=100):
 908     extra = 0
 909     orig_dest = dest
 910     while os.path.exists(dest) and extra < too_many:
 911         dest = orig_dest + '.' + repr(extra)
 912         extra += 1
 913     if extra >= too_many:
 914         raise NoFreeFilenameError
 915     return dest
 916
 917 ################################################################################
 918
 919 def result_join (original, sep = '\t'):
 920     resultlist = []
 921     for i in xrange(len(original)):
 922         if original[i] == None:
 923             resultlist.append("")
 924         else:
 925             resultlist.append(original[i])
 926     return sep.join(resultlist)
 927
 928 ################################################################################
 929
 930 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 931     out = ""
 932     for line in str.split('\n'):
 933         line = line.strip()
 934         if line or include_blank_lines:
 935             out += "%s%s\n" % (prefix, line)
 936     # Strip trailing new line
 937     if out:
 938         out = out[:-1]
 939     return out
 940
 941 ################################################################################
 942
 943 def validate_changes_file_arg(filename, require_changes=1):
 944     """
 945     'filename' is either a .changes or .dak file.  If 'filename' is a
 946     .dak file, it's changed to be the corresponding .changes file.  The
 947     function then checks if the .changes file a) exists and b) is
 948     readable and returns the .changes filename if so.  If there's a
 949     problem, the next action depends on the option 'require_changes'
 950     argument:
 951
 952       - If 'require_changes' == -1, errors are ignored and the .changes
 953         filename is returned.
 954       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 955       - If 'require_changes' == 1, a fatal error is raised.
 956
 957     """
 958     error = None
 959
 960     orig_filename = filename
 961     if filename.endswith(".dak"):
 962         filename = filename[:-4]+".changes"
 963
 964     if not filename.endswith(".changes"):
 965         error = "invalid file type; not a changes file"
 966     else:
 967         if not os.access(filename,os.R_OK):
 968             if os.path.exists(filename):
 969                 error = "permission denied"
 970             else:
 971                 error = "file not found"
 972
 973     if error:
 974         if require_changes == 1:
 975             fubar("%s: %s." % (orig_filename, error))
 976         elif require_changes == 0:
 977             warn("Skipping %s - %s" % (orig_filename, error))
 978             return None
 979         else: # We only care about the .dak file
 980             return filename
 981     else:
 982         return filename
 983
 984 ################################################################################
 985
 986 def real_arch(arch):
 987     return (arch != "source" and arch != "all")
 988
 989 ################################################################################
 990
 991 def join_with_commas_and(list):
 992     if len(list) == 0: return "nothing"
 993     if len(list) == 1: return list[0]
 994     return ", ".join(list[:-1]) + " and " + list[-1]
 995
 996 ################################################################################
 997
 998 def pp_deps (deps):
 999     pp_deps = []
1000     for atom in deps:
1001         (pkg, version, constraint) = atom
1002         if constraint:
1003             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
1004         else:
1005             pp_dep = pkg
1006         pp_deps.append(pp_dep)
1007     return " |".join(pp_deps)
1008
1009 ################################################################################
1010
1011 def get_conf():
1012     return Cnf
1013
1014 ################################################################################
1015
1016 def parse_args(Options):
1017     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1018     # XXX: This should go away and everything which calls it be converted
1019     #      to use SQLA properly.  For now, we'll just fix it not to use
1020     #      the old Pg interface though
1021     session = DBConn().session()
1022     # Process suite
1023     if Options["Suite"]:
1024         suite_ids_list = []
1025         for suitename in split_args(Options["Suite"]):
1026             suite = get_suite(suitename, session=session)
1027             if not suite or suite.suite_id is None:
1028                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1029             else:
1030                 suite_ids_list.append(suite.suite_id)
1031         if suite_ids_list:
1032             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1033         else:
1034             fubar("No valid suite given.")
1035     else:
1036         con_suites = ""
1037
1038     # Process component
1039     if Options["Component"]:
1040         component_ids_list = []
1041         for componentname in split_args(Options["Component"]):
1042             component = get_component(componentname, session=session)
1043             if component is None:
1044                 warn("component '%s' not recognised." % (componentname))
1045             else:
1046                 component_ids_list.append(component.component_id)
1047         if component_ids_list:
1048             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1049         else:
1050             fubar("No valid component given.")
1051     else:
1052         con_components = ""
1053
1054     # Process architecture
1055     con_architectures = ""
1056     check_source = 0
1057     if Options["Architecture"]:
1058         arch_ids_list = []
1059         for archname in split_args(Options["Architecture"]):
1060             if archname == "source":
1061                 check_source = 1
1062             else:
1063                 arch = get_architecture(archname, session=session)
1064                 if arch is None:
1065                     warn("architecture '%s' not recognised." % (archname))
1066                 else:
1067                     arch_ids_list.append(arch.arch_id)
1068         if arch_ids_list:
1069             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1070         else:
1071             if not check_source:
1072                 fubar("No valid architecture given.")
1073     else:
1074         check_source = 1
1075
1076     return (con_suites, con_architectures, con_components, check_source)
1077
1078 ################################################################################
1079
1080 def arch_compare_sw (a, b):
1081     """
1082     Function for use in sorting lists of architectures.
1083
1084     Sorts normally except that 'source' dominates all others.
1085     """
1086
1087     if a == "source" and b == "source":
1088         return 0
1089     elif a == "source":
1090         return -1
1091     elif b == "source":
1092         return 1
1093
1094     return cmp (a, b)
1095
1096 ################################################################################
1097
1098 def split_args (s, dwim=1):
1099     """
1100     Split command line arguments which can be separated by either commas
1101     or whitespace.  If dwim is set, it will complain about string ending
1102     in comma since this usually means someone did 'dak ls -a i386, m68k
1103     foo' or something and the inevitable confusion resulting from 'm68k'
1104     being treated as an argument is undesirable.
1105     """
1106
1107     if s.find(",") == -1:
1108         return s.split()
1109     else:
1110         if s[-1:] == "," and dwim:
1111             fubar("split_args: found trailing comma, spurious space maybe?")
1112         return s.split(",")
1113
1114 ################################################################################
1115
1116 def gpgv_get_status_output(cmd, status_read, status_write):
1117     """
1118     Our very own version of commands.getouputstatus(), hacked to support
1119     gpgv's status fd.
1120     """
1121
1122     cmd = ['/bin/sh', '-c', cmd]
1123     p2cread, p2cwrite = os.pipe()
1124     c2pread, c2pwrite = os.pipe()
1125     errout, errin = os.pipe()
1126     pid = os.fork()
1127     if pid == 0:
1128         # Child
1129         os.close(0)
1130         os.close(1)
1131         os.dup(p2cread)
1132         os.dup(c2pwrite)
1133         os.close(2)
1134         os.dup(errin)
1135         for i in range(3, 256):
1136             if i != status_write:
1137                 try:
1138                     os.close(i)
1139                 except:
1140                     pass
1141         try:
1142             os.execvp(cmd[0], cmd)
1143         finally:
1144             os._exit(1)
1145
1146     # Parent
1147     os.close(p2cread)
1148     os.dup2(c2pread, c2pwrite)
1149     os.dup2(errout, errin)
1150
1151     output = status = ""
1152     while 1:
1153         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1154         more_data = []
1155         for fd in i:
1156             r = os.read(fd, 8196)
1157             if len(r) > 0:
1158                 more_data.append(fd)
1159                 if fd == c2pwrite or fd == errin:
1160                     output += r
1161                 elif fd == status_read:
1162                     status += r
1163                 else:
1164                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1165         if not more_data:
1166             pid, exit_status = os.waitpid(pid, 0)
1167             try:
1168                 os.close(status_write)
1169                 os.close(status_read)
1170                 os.close(c2pread)
1171                 os.close(c2pwrite)
1172                 os.close(p2cwrite)
1173                 os.close(errin)
1174                 os.close(errout)
1175             except:
1176                 pass
1177             break
1178
1179     return output, status, exit_status
1180
1181 ################################################################################
1182
1183 def process_gpgv_output(status):
1184     # Process the status-fd output
1185     keywords = {}
1186     internal_error = ""
1187     for line in status.split('\n'):
1188         line = line.strip()
1189         if line == "":
1190             continue
1191         split = line.split()
1192         if len(split) < 2:
1193             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1194             continue
1195         (gnupg, keyword) = split[:2]
1196         if gnupg != "[GNUPG:]":
1197             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1198             continue
1199         args = split[2:]
1200         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1201             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1202             continue
1203         else:
1204             keywords[keyword] = args
1205
1206     return (keywords, internal_error)
1207
1208 ################################################################################
1209
1210 def retrieve_key (filename, keyserver=None, keyring=None):
1211     """
1212     Retrieve the key that signed 'filename' from 'keyserver' and
1213     add it to 'keyring'.  Returns nothing on success, or an error message
1214     on error.
1215     """
1216
1217     # Defaults for keyserver and keyring
1218     if not keyserver:
1219         keyserver = Cnf["Dinstall::KeyServer"]
1220     if not keyring:
1221         keyring = get_primary_keyring_path()
1222
1223     # Ensure the filename contains no shell meta-characters or other badness
1224     if not re_taint_free.match(filename):
1225         return "%s: tainted filename" % (filename)
1226
1227     # Invoke gpgv on the file
1228     status_read, status_write = os.pipe()
1229     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1230     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1231
1232     # Process the status-fd output
1233     (keywords, internal_error) = process_gpgv_output(status)
1234     if internal_error:
1235         return internal_error
1236
1237     if not keywords.has_key("NO_PUBKEY"):
1238         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1239
1240     fingerprint = keywords["NO_PUBKEY"][0]
1241     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1242     # it'll try to create a lockfile in /dev.  A better solution might
1243     # be a tempfile or something.
1244     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1245           % (Cnf["Dinstall::SigningKeyring"])
1246     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1247            % (keyring, keyserver, fingerprint)
1248     (result, output) = commands.getstatusoutput(cmd)
1249     if (result != 0):
1250         return "'%s' failed with exit code %s" % (cmd, result)
1251
1252     return ""
1253
1254 ################################################################################
1255
1256 def gpg_keyring_args(keyrings=None):
1257     if not keyrings:
1258         keyrings = get_active_keyring_paths()
1259
1260     return " ".join(["--keyring %s" % x for x in keyrings])
1261
1262 ################################################################################
1263 @session_wrapper
1264 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1265     """
1266     Check the signature of a file and return the fingerprint if the
1267     signature is valid or 'None' if it's not.  The first argument is the
1268     filename whose signature should be checked.  The second argument is a
1269     reject function and is called when an error is found.  The reject()
1270     function must allow for two arguments: the first is the error message,
1271     the second is an optional prefix string.  It's possible for reject()
1272     to be called more than once during an invocation of check_signature().
1273     The third argument is optional and is the name of the files the
1274     detached signature applies to.  The fourth argument is optional and is
1275     a *list* of keyrings to use.  'autofetch' can either be None, True or
1276     False.  If None, the default behaviour specified in the config will be
1277     used.
1278     """
1279
1280     rejects = []
1281
1282     # Ensure the filename contains no shell meta-characters or other badness
1283     if not re_taint_free.match(sig_filename):
1284         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1285         return (None, rejects)
1286
1287     if data_filename and not re_taint_free.match(data_filename):
1288         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1289         return (None, rejects)
1290
1291     if not keyrings:
1292         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1293
1294     # Autofetch the signing key if that's enabled
1295     if autofetch == None:
1296         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1297     if autofetch:
1298         error_msg = retrieve_key(sig_filename)
1299         if error_msg:
1300             rejects.append(error_msg)
1301             return (None, rejects)
1302
1303     # Build the command line
1304     status_read, status_write = os.pipe()
1305     cmd = "gpgv --status-fd %s %s %s %s" % (
1306         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1307
1308     # Invoke gpgv on the file
1309     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1310
1311     # Process the status-fd output
1312     (keywords, internal_error) = process_gpgv_output(status)
1313
1314     # If we failed to parse the status-fd output, let's just whine and bail now
1315     if internal_error:
1316         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1317         rejects.append(internal_error, "")
1318         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1319         return (None, rejects)
1320
1321     # Now check for obviously bad things in the processed output
1322     if keywords.has_key("KEYREVOKED"):
1323         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1324     if keywords.has_key("BADSIG"):
1325         rejects.append("bad signature on %s." % (sig_filename))
1326     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1327         rejects.append("failed to check signature on %s." % (sig_filename))
1328     if keywords.has_key("NO_PUBKEY"):
1329         args = keywords["NO_PUBKEY"]
1330         if len(args) >= 1:
1331             key = args[0]
1332         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1333     if keywords.has_key("BADARMOR"):
1334         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1335     if keywords.has_key("NODATA"):
1336         rejects.append("no signature found in %s." % (sig_filename))
1337     if keywords.has_key("EXPKEYSIG"):
1338         args = keywords["EXPKEYSIG"]
1339         if len(args) >= 1:
1340             key = args[0]
1341         rejects.append("Signature made by expired key 0x%s" % (key))
1342     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1343         args = keywords["KEYEXPIRED"]
1344         expiredate=""
1345         if len(args) >= 1:
1346             timestamp = args[0]
1347             if timestamp.count("T") == 0:
1348                 try:
1349                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1350                 except ValueError:
1351                     expiredate = "unknown (%s)" % (timestamp)
1352             else:
1353                 expiredate = timestamp
1354         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1355
1356     if len(rejects) > 0:
1357         return (None, rejects)
1358
1359     # Next check gpgv exited with a zero return code
1360     if exit_status:
1361         rejects.append("gpgv failed while checking %s." % (sig_filename))
1362         if status.strip():
1363             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1364         else:
1365             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1366         return (None, rejects)
1367
1368     # Sanity check the good stuff we expect
1369     if not keywords.has_key("VALIDSIG"):
1370         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1371     else:
1372         args = keywords["VALIDSIG"]
1373         if len(args) < 1:
1374             rejects.append("internal error while checking signature on %s." % (sig_filename))
1375         else:
1376             fingerprint = args[0]
1377     if not keywords.has_key("GOODSIG"):
1378         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1379     if not keywords.has_key("SIG_ID"):
1380         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1381
1382     # Finally ensure there's not something we don't recognise
1383     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1384                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1385                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1386
1387     for keyword in keywords.keys():
1388         if not known_keywords.has_key(keyword):
1389             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1390
1391     if len(rejects) > 0:
1392         return (None, rejects)
1393     else:
1394         return (fingerprint, [])
1395
1396 ################################################################################
1397
1398 def gpg_get_key_addresses(fingerprint):
1399     """retreive email addresses from gpg key uids for a given fingerprint"""
1400     addresses = key_uid_email_cache.get(fingerprint)
1401     if addresses != None:
1402         return addresses
1403     addresses = list()
1404     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1405                 % (gpg_keyring_args(), fingerprint)
1406     (result, output) = commands.getstatusoutput(cmd)
1407     if result == 0:
1408         for l in output.split('\n'):
1409             m = re_gpg_uid.match(l)
1410             if not m:
1411                 continue
1412             address = m.group(1)
1413             if address.endswith('@debian.org'):
1414                 # prefer @debian.org addresses
1415                 # TODO: maybe not hardcode the domain
1416                 addresses.insert(0, address)
1417             else:
1418                 addresses.append(m.group(1))
1419     key_uid_email_cache[fingerprint] = addresses
1420     return addresses
1421
1422 ################################################################################
1423
1424 def get_logins_from_ldap(fingerprint='*'):
1425     """retrieve login from LDAP linked to a given fingerprint"""
1426
1427     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1428     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1429     l = ldap.open(LDAPServer)
1430     l.simple_bind_s('','')
1431     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1432                        '(keyfingerprint=%s)' % fingerprint,
1433                        ['uid', 'keyfingerprint'])
1434     login = {}
1435     for elem in Attrs:
1436         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1437     return login
1438
1439 ################################################################################
1440
1441 def clean_symlink (src, dest, root):
1442     """
1443     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1444     Returns fixed 'src'
1445     """
1446     src = src.replace(root, '', 1)
1447     dest = dest.replace(root, '', 1)
1448     dest = os.path.dirname(dest)
1449     new_src = '../' * len(dest.split('/'))
1450     return new_src + src
1451
1452 ################################################################################
1453
1454 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1455     """
1456     Return a secure and unique filename by pre-creating it.
1457
1458     @type directory: str
1459     @param directory: If non-null it will be the directory the file is pre-created in.
1460
1461     @type prefix: str
1462     @param prefix: The filename will be prefixed with this string
1463
1464     @type suffix: str
1465     @param suffix: The filename will end with this string
1466
1467     @type mode: str
1468     @param mode: If set the file will get chmodded to those permissions
1469
1470     @type group: str
1471     @param group: If set the file will get chgrped to the specified group.
1472
1473     @rtype: list
1474     @return: Returns a pair (fd, name)
1475     """
1476
1477     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1478     if mode:
1479         os.chmod(tfname, mode)
1480     if group:
1481         gid = grp.getgrnam(group).gr_gid
1482         os.chown(tfname, -1, gid)
1483     return (tfd, tfname)
1484
1485 ################################################################################
1486
1487 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1488     """
1489     Return a secure and unique directory by pre-creating it.
1490
1491     @type parent: str
1492     @param parent: If non-null it will be the directory the directory is pre-created in.
1493
1494     @type prefix: str
1495     @param prefix: The filename will be prefixed with this string
1496
1497     @type suffix: str
1498     @param suffix: The filename will end with this string
1499
1500     @type mode: str
1501     @param mode: If set the file will get chmodded to those permissions
1502
1503     @type group: str
1504     @param group: If set the file will get chgrped to the specified group.
1505
1506     @rtype: list
1507     @return: Returns a pair (fd, name)
1508
1509     """
1510
1511     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1512     if mode:
1513         os.chmod(tfname, mode)
1514     if group:
1515         os.chown(tfname, -1, group)
1516     return tfname
1517
1518 ################################################################################
1519
1520 def is_email_alias(email):
1521     """ checks if the user part of the email is listed in the alias file """
1522     global alias_cache
1523     if alias_cache == None:
1524         aliasfn = which_alias_file()
1525         alias_cache = set()
1526         if aliasfn:
1527             for l in open(aliasfn):
1528                 alias_cache.add(l.split(':')[0])
1529     uid = email.split('@')[0]
1530     return uid in alias_cache
1531
1532 ################################################################################
1533
1534 def get_changes_files(from_dir):
1535     """
1536     Takes a directory and lists all .changes files in it (as well as chdir'ing
1537     to the directory; this is due to broken behaviour on the part of p-u/p-a
1538     when you're not in the right place)
1539
1540     Returns a list of filenames
1541     """
1542     try:
1543         # Much of the rest of p-u/p-a depends on being in the right place
1544         os.chdir(from_dir)
1545         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1546     except OSError as e:
1547         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1548
1549     return changes_files
1550
1551 ################################################################################
1552
1553 apt_pkg.init()
1554
1555 Cnf = apt_pkg.Configuration()
1556 if not os.getenv("DAK_TEST"):
1557     apt_pkg.read_config_file_isc(Cnf,default_config)
1558
1559 if which_conf_file() != default_config:
1560     apt_pkg.read_config_file_isc(Cnf,which_conf_file())
1561
1562 ################################################################################
1563
1564 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1565     """
1566     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1567     Well, actually it parsed a local copy, but let's document the source
1568     somewhere ;)
1569
1570     returns a dict associating source package name with a list of open wnpp
1571     bugs (Yes, there might be more than one)
1572     """
1573
1574     line = []
1575     try:
1576         f = open(file)
1577         lines = f.readlines()
1578     except IOError as e:
1579         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1580         lines = []
1581     wnpp = {}
1582
1583     for line in lines:
1584         splited_line = line.split(": ", 1)
1585         if len(splited_line) > 1:
1586             wnpp[splited_line[0]] = splited_line[1].split("|")
1587
1588     for source in wnpp.keys():
1589         bugs = []
1590         for wnpp_bug in wnpp[source]:
1591             bug_no = re.search("(\d)+", wnpp_bug).group()
1592             if bug_no:
1593                 bugs.append(bug_no)
1594         wnpp[source] = bugs
1595     return wnpp
1596
1597 ################################################################################
1598
1599 def get_packages_from_ftp(root, suite, component, architecture):
1600     """
1601     Returns an object containing apt_pkg-parseable data collected by
1602     aggregating Packages.gz files gathered for each architecture.
1603
1604     @type root: string
1605     @param root: path to ftp archive root directory
1606
1607     @type suite: string
1608     @param suite: suite to extract files from
1609
1610     @type component: string
1611     @param component: component to extract files from
1612
1613     @type architecture: string
1614     @param architecture: architecture to extract files from
1615
1616     @rtype: TagFile
1617     @return: apt_pkg class containing package data
1618     """
1619     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1620     (fd, temp_file) = temp_filename()
1621     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1622     if (result != 0):
1623         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1624     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1625     if os.path.exists(filename):
1626         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1627         if (result != 0):
1628             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1629     packages = open_file(temp_file)
1630     Packages = apt_pkg.ParseTagFile(packages)
1631     os.unlink(temp_file)
1632     return Packages
1633
1634 ################################################################################
1635
1636 def deb_extract_control(fh):
1637     """extract DEBIAN/control from a binary package"""
1638     return apt_inst.DebFile(fh).control.extractdata("control")
1639
1640 ################################################################################
1641
1642 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1643     """mail addresses to contact for an upload
1644
1645     @type  maintainer: str
1646     @param maintainer: Maintainer field of the .changes file
1647
1648     @type  changed_by: str
1649     @param changed_by: Changed-By field of the .changes file
1650
1651     @type  fingerprint: str
1652     @param fingerprint: fingerprint of the key used to sign the upload
1653
1654     @rtype:  list of str
1655     @return: list of RFC 2047-encoded mail addresses to contact regarding
1656              this upload
1657     """
1658     addresses = [maintainer]
1659     if changed_by != maintainer:
1660         addresses.append(changed_by)
1661
1662     fpr_addresses = gpg_get_key_addresses(fingerprint)
1663     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1664         addresses.append(fpr_addresses[0])
1665
1666     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1667     return encoded_addresses
1668
1669 ################################################################################
1670
1671 def call_editor(text="", suffix=".txt"):
1672     """run editor and return the result as a string
1673
1674     @type  text: str
1675     @param text: initial text
1676
1677     @type  suffix: str
1678     @param suffix: extension for temporary file
1679
1680     @rtype:  str
1681     @return: string with the edited text
1682     """
1683     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1684     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1685     try:
1686         print >>tmp, text,
1687         tmp.close()
1688         subprocess.check_call([editor, tmp.name])
1689         return open(tmp.name, 'r').read()
1690     finally:
1691         os.unlink(tmp.name)
1692
1693 ################################################################################
1694
1695 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1696     dbsuite = get_suite(suite, session)
1697     overridesuite = dbsuite
1698     if dbsuite.overridesuite is not None:
1699         overridesuite = get_suite(dbsuite.overridesuite, session)
1700     dep_problem = 0
1701     p2c = {}
1702     all_broken = {}
1703     if arches:
1704         all_arches = set(arches)
1705     else:
1706         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1707     all_arches -= set(["source", "all"])
1708     metakey_d = get_or_set_metadatakey("Depends", session)
1709     metakey_p = get_or_set_metadatakey("Provides", session)
1710     params = {
1711         'suite_id':     dbsuite.suite_id,
1712         'metakey_d_id': metakey_d.key_id,
1713         'metakey_p_id': metakey_p.key_id,
1714     }
1715     for architecture in all_arches | set(['all']):
1716         deps = {}
1717         sources = {}
1718         virtual_packages = {}
1719         params['arch_id'] = get_architecture(architecture, session).arch_id
1720
1721         statement = '''
1722             SELECT b.id, b.package, s.source, c.name as component,
1723                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1724                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1725                 FROM binaries b
1726                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1727                 JOIN source s ON b.source = s.id
1728                 JOIN files_archive_map af ON b.file = af.file_id
1729                 JOIN component c ON af.component_id = c.id
1730                 WHERE b.architecture = :arch_id'''
1731         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1732             from_statement(statement).params(params)
1733         for binary_id, package, source, component, depends, provides in query:
1734             sources[package] = source
1735             p2c[package] = component
1736             if depends is not None:
1737                 deps[package] = depends
1738             # Maintain a counter for each virtual package.  If a
1739             # Provides: exists, set the counter to 0 and count all
1740             # provides by a package not in the list for removal.
1741             # If the counter stays 0 at the end, we know that only
1742             # the to-be-removed packages provided this virtual
1743             # package.
1744             if provides is not None:
1745                 for virtual_pkg in provides.split(","):
1746                     virtual_pkg = virtual_pkg.strip()
1747                     if virtual_pkg == package: continue
1748                     if not virtual_packages.has_key(virtual_pkg):
1749                         virtual_packages[virtual_pkg] = 0
1750                     if package not in removals:
1751                         virtual_packages[virtual_pkg] += 1
1752
1753         # If a virtual package is only provided by the to-be-removed
1754         # packages, treat the virtual package as to-be-removed too.
1755         for virtual_pkg in virtual_packages.keys():
1756             if virtual_packages[virtual_pkg] == 0:
1757                 removals.append(virtual_pkg)
1758
1759         # Check binary dependencies (Depends)
1760         for package in deps.keys():
1761             if package in removals: continue
1762             parsed_dep = []
1763             try:
1764                 parsed_dep += apt_pkg.ParseDepends(deps[package])
1765             except ValueError as e:
1766                 print "Error for package %s: %s" % (package, e)
1767             for dep in parsed_dep:
1768                 # Check for partial breakage.  If a package has a ORed
1769                 # dependency, there is only a dependency problem if all
1770                 # packages in the ORed depends will be removed.
1771                 unsat = 0
1772                 for dep_package, _, _ in dep:
1773                     if dep_package in removals:
1774                         unsat += 1
1775                 if unsat == len(dep):
1776                     component = p2c[package]
1777                     source = sources[package]
1778                     if component != "main":
1779                         source = "%s/%s" % (source, component)
1780                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1781                     dep_problem = 1
1782
1783     if all_broken:
1784         if cruft:
1785             print "  - broken Depends:"
1786         else:
1787             print "# Broken Depends:"
1788         for source, bindict in sorted(all_broken.items()):
1789             lines = []
1790             for binary, arches in sorted(bindict.items()):
1791                 if arches == all_arches or 'all' in arches:
1792                     lines.append(binary)
1793                 else:
1794                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1795             if cruft:
1796                 print '    %s: %s' % (source, lines[0])
1797             else:
1798                 print '%s: %s' % (source, lines[0])
1799             for line in lines[1:]:
1800                 if cruft:
1801                     print '    ' + ' ' * (len(source) + 2) + line
1802                 else:
1803                     print ' ' * (len(source) + 2) + line
1804         if not cruft:
1805             print
1806
1807     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1808     all_broken.clear()
1809     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1810     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1811     params = {
1812         'suite_id':    dbsuite.suite_id,
1813         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1814     }
1815     statement = '''
1816         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1817            FROM source s
1818            JOIN source_metadata sm ON s.id = sm.src_id
1819            WHERE s.id in
1820                (SELECT source FROM src_associations
1821                    WHERE suite = :suite_id)
1822                AND sm.key_id in :metakey_ids
1823            GROUP BY s.id, s.source'''
1824     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1825         params(params)
1826     for source_id, source, build_dep in query:
1827         if source in removals: continue
1828         parsed_dep = []
1829         if build_dep is not None:
1830             # Remove [arch] information since we want to see breakage on all arches
1831             build_dep = re_build_dep_arch.sub("", build_dep)
1832             try:
1833                 parsed_dep += apt_pkg.ParseDepends(build_dep)
1834             except ValueError as e:
1835                 print "Error for source %s: %s" % (source, e)
1836         for dep in parsed_dep:
1837             unsat = 0
1838             for dep_package, _, _ in dep:
1839                 if dep_package in removals:
1840                     unsat += 1
1841             if unsat == len(dep):
1842                 component, = session.query(Component.component_name) \
1843                     .join(Component.overrides) \
1844                     .filter(Override.suite == overridesuite) \
1845                     .filter(Override.package == source) \
1846                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1847                     .first()
1848                 if component != "main":
1849                     source = "%s/%s" % (source, component)
1850                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1851                 dep_problem = 1
1852
1853     if all_broken:
1854         if cruft:
1855             print "  - broken Build-Depends:"
1856         else:
1857             print "# Broken Build-Depends:"
1858         for source, bdeps in sorted(all_broken.items()):
1859             bdeps = sorted(bdeps)
1860             if cruft:
1861                 print '    %s: %s' % (source, bdeps[0])
1862             else:
1863                 print '%s: %s' % (source, bdeps[0])
1864             for bdep in bdeps[1:]:
1865                 if cruft:
1866                     print '    ' + ' ' * (len(source) + 2) + bdep
1867                 else:
1868                     print ' ' * (len(source) + 2) + bdep
1869         if not cruft:
1870             print
1871
1872     return dep_problem