daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import grp
  31 import select
  32 import socket
  33 import shutil
  34 import sys
  35 import tempfile
  36 import traceback
  37 import stat
  38 import apt_inst
  39 import apt_pkg
  40 import time
  41 import re
  42 import email as modemail
  43 import subprocess
  44 import ldap
  45
  46 import daklib.config as config
  47 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  48                    get_override_type, Keyring, session_wrapper, \
  49                    get_active_keyring_paths, get_primary_keyring_path, \
  50                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  51                    Component, Override, OverrideType
  52 from sqlalchemy import desc
  53 from dak_exceptions import *
  54 from gpg import SignedFile
  55 from textutils import fix_maintainer
  56 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  57                     re_multi_line_field, re_srchasver, re_taint_free, \
  58                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  59                     re_is_orig_source, re_build_dep_arch
  60
  61 from formats import parse_format, validate_changes_format
  62 from srcformats import get_format_from_string
  63 from collections import defaultdict
  64
  65 ################################################################################
  66
  67 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  68
  69 alias_cache = None        #: Cache for email alias checks
  70 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  71
  72 # (hashname, function, earliest_changes_version)
  73 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  74                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  75
  76 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  77 # code in lenny's Python. This also affects commands.getoutput and
  78 # commands.getstatus.
  79 def dak_getstatusoutput(cmd):
  80     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  81         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  82
  83     output = pipe.stdout.read()
  84
  85     pipe.wait()
  86
  87     if output[-1:] == '\n':
  88         output = output[:-1]
  89
  90     ret = pipe.wait()
  91     if ret is None:
  92         ret = 0
  93
  94     return ret, output
  95 commands.getstatusoutput = dak_getstatusoutput
  96
  97 ################################################################################
  98
  99 def html_escape(s):
 100     """ Escape html chars """
 101     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 102
 103 ################################################################################
 104
 105 def open_file(filename, mode='r'):
 106     """
 107     Open C{file}, return fileobject.
 108
 109     @type filename: string
 110     @param filename: path/filename to open
 111
 112     @type mode: string
 113     @param mode: open mode
 114
 115     @rtype: fileobject
 116     @return: open fileobject
 117
 118     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 119
 120     """
 121     try:
 122         f = open(filename, mode)
 123     except IOError:
 124         raise CantOpenError(filename)
 125     return f
 126
 127 ################################################################################
 128
 129 def our_raw_input(prompt=""):
 130     if prompt:
 131         while 1:
 132             try:
 133                 sys.stdout.write(prompt)
 134                 break
 135             except IOError:
 136                 pass
 137     sys.stdout.flush()
 138     try:
 139         ret = raw_input()
 140         return ret
 141     except EOFError:
 142         sys.stderr.write("\nUser interrupt (^D).\n")
 143         raise SystemExit
 144
 145 ################################################################################
 146
 147 def extract_component_from_section(section, session=None):
 148     component = ""
 149
 150     if section.find('/') != -1:
 151         component = section.split('/')[0]
 152
 153     # Expand default component
 154     if component == "":
 155         comp = get_component(section, session)
 156         if comp is None:
 157             component = "main"
 158         else:
 159             component = comp.component_name
 160
 161     return (section, component)
 162
 163 ################################################################################
 164
 165 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 166     require_signature = True
 167     if keyrings == None:
 168         keyrings = []
 169         require_signature = False
 170
 171     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 172     contents = signed_file.contents
 173
 174     error = ""
 175     changes = {}
 176
 177     # Split the lines in the input, keeping the linebreaks.
 178     lines = contents.splitlines(True)
 179
 180     if len(lines) == 0:
 181         raise ParseChangesError("[Empty changes file]")
 182
 183     # Reindex by line number so we can easily verify the format of
 184     # .dsc files...
 185     index = 0
 186     indexed_lines = {}
 187     for line in lines:
 188         index += 1
 189         indexed_lines[index] = line[:-1]
 190
 191     num_of_lines = len(indexed_lines.keys())
 192     index = 0
 193     first = -1
 194     while index < num_of_lines:
 195         index += 1
 196         line = indexed_lines[index]
 197         if line == "" and signing_rules == 1:
 198             if index != num_of_lines:
 199                 raise InvalidDscError(index)
 200             break
 201         slf = re_single_line_field.match(line)
 202         if slf:
 203             field = slf.groups()[0].lower()
 204             changes[field] = slf.groups()[1]
 205             first = 1
 206             continue
 207         if line == " .":
 208             changes[field] += '\n'
 209             continue
 210         mlf = re_multi_line_field.match(line)
 211         if mlf:
 212             if first == -1:
 213                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 214             if first == 1 and changes[field] != "":
 215                 changes[field] += '\n'
 216             first = 0
 217             changes[field] += mlf.groups()[0] + '\n'
 218             continue
 219         error += line
 220
 221     changes["filecontents"] = armored_contents
 222
 223     if changes.has_key("source"):
 224         # Strip the source version in brackets from the source field,
 225         # put it in the "source-version" field instead.
 226         srcver = re_srchasver.search(changes["source"])
 227         if srcver:
 228             changes["source"] = srcver.group(1)
 229             changes["source-version"] = srcver.group(2)
 230
 231     if error:
 232         raise ParseChangesError(error)
 233
 234     return changes
 235
 236 ################################################################################
 237
 238 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 239     """
 240     Parses a changes file and returns a dictionary where each field is a
 241     key.  The mandatory first argument is the filename of the .changes
 242     file.
 243
 244     signing_rules is an optional argument:
 245
 246       - If signing_rules == -1, no signature is required.
 247       - If signing_rules == 0 (the default), a signature is required.
 248       - If signing_rules == 1, it turns on the same strict format checking
 249         as dpkg-source.
 250
 251     The rules for (signing_rules == 1)-mode are:
 252
 253       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 254         followed by any PGP header data and must end with a blank line.
 255
 256       - The data section must end with a blank line and must be followed by
 257         "-----BEGIN PGP SIGNATURE-----".
 258     """
 259
 260     changes_in = open_file(filename)
 261     content = changes_in.read()
 262     changes_in.close()
 263     try:
 264         unicode(content, 'utf-8')
 265     except UnicodeError:
 266         raise ChangesUnicodeError("Changes file not proper utf-8")
 267     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 268
 269
 270     if not dsc_file:
 271         # Finally ensure that everything needed for .changes is there
 272         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 273                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 274
 275         missingfields=[]
 276         for keyword in must_keywords:
 277             if not changes.has_key(keyword.lower()):
 278                 missingfields.append(keyword)
 279
 280                 if len(missingfields):
 281                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 282
 283     return changes
 284
 285 ################################################################################
 286
 287 def hash_key(hashname):
 288     return '%ssum' % hashname
 289
 290 ################################################################################
 291
 292 def create_hash(where, files, hashname, hashfunc):
 293     """
 294     create_hash extends the passed files dict with the given hash by
 295     iterating over all files on disk and passing them to the hashing
 296     function given.
 297     """
 298
 299     rejmsg = []
 300     for f in files.keys():
 301         try:
 302             file_handle = open_file(f)
 303         except CantOpenError:
 304             rejmsg.append("Could not open file %s for checksumming" % (f))
 305             continue
 306
 307         files[f][hash_key(hashname)] = hashfunc(file_handle)
 308
 309         file_handle.close()
 310     return rejmsg
 311
 312 ################################################################################
 313
 314 def check_hash(where, files, hashname, hashfunc):
 315     """
 316     check_hash checks the given hash in the files dict against the actual
 317     files on disk.  The hash values need to be present consistently in
 318     all file entries.  It does not modify its input in any way.
 319     """
 320
 321     rejmsg = []
 322     for f in files.keys():
 323         file_handle = None
 324         try:
 325             try:
 326                 file_handle = open_file(f)
 327
 328                 # Check for the hash entry, to not trigger a KeyError.
 329                 if not files[f].has_key(hash_key(hashname)):
 330                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 331                         where))
 332                     continue
 333
 334                 # Actually check the hash for correctness.
 335                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 336                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 337                         where))
 338             except CantOpenError:
 339                 # TODO: This happens when the file is in the pool.
 340                 # warn("Cannot open file %s" % f)
 341                 continue
 342         finally:
 343             if file_handle:
 344                 file_handle.close()
 345     return rejmsg
 346
 347 ################################################################################
 348
 349 def check_size(where, files):
 350     """
 351     check_size checks the file sizes in the passed files dict against the
 352     files on disk.
 353     """
 354
 355     rejmsg = []
 356     for f in files.keys():
 357         try:
 358             entry = os.stat(f)
 359         except OSError as exc:
 360             if exc.errno == 2:
 361                 # TODO: This happens when the file is in the pool.
 362                 continue
 363             raise
 364
 365         actual_size = entry[stat.ST_SIZE]
 366         size = int(files[f]["size"])
 367         if size != actual_size:
 368             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 369                    % (f, actual_size, size, where))
 370     return rejmsg
 371
 372 ################################################################################
 373
 374 def check_dsc_files(dsc_filename, dsc, dsc_files):
 375     """
 376     Verify that the files listed in the Files field of the .dsc are
 377     those expected given the announced Format.
 378
 379     @type dsc_filename: string
 380     @param dsc_filename: path of .dsc file
 381
 382     @type dsc: dict
 383     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 384
 385     @type dsc_files: dict
 386     @param dsc_files: the file list returned by C{build_file_list()}
 387
 388     @rtype: list
 389     @return: all errors detected
 390     """
 391     rejmsg = []
 392
 393     # Ensure .dsc lists proper set of source files according to the format
 394     # announced
 395     has = defaultdict(lambda: 0)
 396
 397     ftype_lookup = (
 398         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 399         (r'diff.gz',                   ('debian_diff',)),
 400         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 401         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 402         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 403         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 404         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 405     )
 406
 407     for f in dsc_files:
 408         m = re_issource.match(f)
 409         if not m:
 410             rejmsg.append("%s: %s in Files field not recognised as source."
 411                           % (dsc_filename, f))
 412             continue
 413
 414         # Populate 'has' dictionary by resolving keys in lookup table
 415         matched = False
 416         for regex, keys in ftype_lookup:
 417             if re.match(regex, m.group(3)):
 418                 matched = True
 419                 for key in keys:
 420                     has[key] += 1
 421                 break
 422
 423         # File does not match anything in lookup table; reject
 424         if not matched:
 425             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 426
 427     # Check for multiple files
 428     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 429         if has[file_type] > 1:
 430             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 431
 432     # Source format specific tests
 433     try:
 434         format = get_format_from_string(dsc['format'])
 435         rejmsg.extend([
 436             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 437         ])
 438
 439     except UnknownFormatError:
 440         # Not an error here for now
 441         pass
 442
 443     return rejmsg
 444
 445 ################################################################################
 446
 447 def check_hash_fields(what, manifest):
 448     """
 449     check_hash_fields ensures that there are no checksum fields in the
 450     given dict that we do not know about.
 451     """
 452
 453     rejmsg = []
 454     hashes = map(lambda x: x[0], known_hashes)
 455     for field in manifest:
 456         if field.startswith("checksums-"):
 457             hashname = field.split("-",1)[1]
 458             if hashname not in hashes:
 459                 rejmsg.append("Unsupported checksum field for %s "\
 460                     "in %s" % (hashname, what))
 461     return rejmsg
 462
 463 ################################################################################
 464
 465 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 466     if format >= version:
 467         # The version should contain the specified hash.
 468         func = check_hash
 469
 470         # Import hashes from the changes
 471         rejmsg = parse_checksums(".changes", files, changes, hashname)
 472         if len(rejmsg) > 0:
 473             return rejmsg
 474     else:
 475         # We need to calculate the hash because it can't possibly
 476         # be in the file.
 477         func = create_hash
 478     return func(".changes", files, hashname, hashfunc)
 479
 480 # We could add the orig which might be in the pool to the files dict to
 481 # access the checksums easily.
 482
 483 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 484     """
 485     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 486     in the dsc is correct, i.e. identical to the changes file and if necessary
 487     the pool.  The latter task is delegated to check_hash.
 488     """
 489
 490     rejmsg = []
 491     if not dsc.has_key('Checksums-%s' % (hashname,)):
 492         return rejmsg
 493     # Import hashes from the dsc
 494     parse_checksums(".dsc", dsc_files, dsc, hashname)
 495     # And check it...
 496     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 497     return rejmsg
 498
 499 ################################################################################
 500
 501 def parse_checksums(where, files, manifest, hashname):
 502     rejmsg = []
 503     field = 'checksums-%s' % hashname
 504     if not field in manifest:
 505         return rejmsg
 506     for line in manifest[field].split('\n'):
 507         if not line:
 508             break
 509         clist = line.strip().split(' ')
 510         if len(clist) == 3:
 511             checksum, size, checkfile = clist
 512         else:
 513             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 514             continue
 515         if not files.has_key(checkfile):
 516         # TODO: check for the file's entry in the original files dict, not
 517         # the one modified by (auto)byhand and other weird stuff
 518         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 519         #        (file, hashname, where))
 520             continue
 521         if not files[checkfile]["size"] == size:
 522             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 523                 "in %s" % (checkfile, hashname, where))
 524             continue
 525         files[checkfile][hash_key(hashname)] = checksum
 526     for f in files.keys():
 527         if not files[f].has_key(hash_key(hashname)):
 528             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 529     return rejmsg
 530
 531 ################################################################################
 532
 533 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 534
 535 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 536     files = {}
 537
 538     # Make sure we have a Files: field to parse...
 539     if not changes.has_key(field):
 540         raise NoFilesFieldError
 541
 542     # Validate .changes Format: field
 543     if not is_a_dsc:
 544         validate_changes_format(parse_format(changes['format']), field)
 545
 546     includes_section = (not is_a_dsc) and field == "files"
 547
 548     # Parse each entry/line:
 549     for i in changes[field].split('\n'):
 550         if not i:
 551             break
 552         s = i.split()
 553         section = priority = ""
 554         try:
 555             if includes_section:
 556                 (md5, size, section, priority, name) = s
 557             else:
 558                 (md5, size, name) = s
 559         except ValueError:
 560             raise ParseChangesError(i)
 561
 562         if section == "":
 563             section = "-"
 564         if priority == "":
 565             priority = "-"
 566
 567         (section, component) = extract_component_from_section(section)
 568
 569         files[name] = dict(size=size, section=section,
 570                            priority=priority, component=component)
 571         files[name][hashname] = md5
 572
 573     return files
 574
 575 ################################################################################
 576
 577 # see http://bugs.debian.org/619131
 578 def build_package_list(dsc, session = None):
 579     if not dsc.has_key("package-list"):
 580         return {}
 581
 582     packages = {}
 583
 584     for line in dsc["package-list"].split("\n"):
 585         if not line:
 586             break
 587
 588         fields = line.split()
 589         name = fields[0]
 590         package_type = fields[1]
 591         (section, component) = extract_component_from_section(fields[2])
 592         priority = fields[3]
 593
 594         # Validate type if we have a session
 595         if session and get_override_type(package_type, session) is None:
 596             # Maybe just warn and ignore? exit(1) might be a bit hard...
 597             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 598
 599         if name not in packages or packages[name]["type"] == "dsc":
 600             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 601
 602     return packages
 603
 604 ################################################################################
 605
 606 def send_mail (message, filename="", whitelists=None):
 607     """sendmail wrapper, takes _either_ a message string or a file as arguments
 608
 609     @type  whitelists: list of (str or None)
 610     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 611                        everything, otherwise an address is whitelisted if it is
 612                        included in any of the lists.
 613                        In addition a global whitelist can be specified in
 614                        Dinstall::MailWhiteList.
 615     """
 616
 617     maildir = Cnf.get('Dir::Mail')
 618     if maildir:
 619         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 620         path = find_next_free(path)
 621         fh = open(path, 'w')
 622         print >>fh, message,
 623         fh.close()
 624
 625     # Check whether we're supposed to be sending mail
 626     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 627         return
 628
 629     # If we've been passed a string dump it into a temporary file
 630     if message:
 631         (fd, filename) = tempfile.mkstemp()
 632         os.write (fd, message)
 633         os.close (fd)
 634
 635     if whitelists is None or None in whitelists:
 636         whitelists = []
 637     if Cnf.get('Dinstall::MailWhiteList', ''):
 638         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 639     if len(whitelists) != 0:
 640         message_in = open_file(filename)
 641         message_raw = modemail.message_from_file(message_in)
 642         message_in.close();
 643
 644         whitelist = [];
 645         for path in whitelists:
 646           with open_file(path, 'r') as whitelist_in:
 647             for line in whitelist_in:
 648                 if not re_whitespace_comment.match(line):
 649                     if re_re_mark.match(line):
 650                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 651                     else:
 652                         whitelist.append(re.compile(re.escape(line.strip())))
 653
 654         # Fields to check.
 655         fields = ["To", "Bcc", "Cc"]
 656         for field in fields:
 657             # Check each field
 658             value = message_raw.get(field, None)
 659             if value != None:
 660                 match = [];
 661                 for item in value.split(","):
 662                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 663                     mail_whitelisted = 0
 664                     for wr in whitelist:
 665                         if wr.match(email):
 666                             mail_whitelisted = 1
 667                             break
 668                     if not mail_whitelisted:
 669                         print "Skipping {0} since it's not whitelisted".format(item)
 670                         continue
 671                     match.append(item)
 672
 673                 # Doesn't have any mail in whitelist so remove the header
 674                 if len(match) == 0:
 675                     del message_raw[field]
 676                 else:
 677                     message_raw.replace_header(field, ', '.join(match))
 678
 679         # Change message fields in order if we don't have a To header
 680         if not message_raw.has_key("To"):
 681             fields.reverse()
 682             for field in fields:
 683                 if message_raw.has_key(field):
 684                     message_raw[fields[-1]] = message_raw[field]
 685                     del message_raw[field]
 686                     break
 687             else:
 688                 # Clean up any temporary files
 689                 # and return, as we removed all recipients.
 690                 if message:
 691                     os.unlink (filename);
 692                 return;
 693
 694         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 695         os.write (fd, message_raw.as_string(True));
 696         os.close (fd);
 697
 698     # Invoke sendmail
 699     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 700     if (result != 0):
 701         raise SendmailFailedError(output)
 702
 703     # Clean up any temporary files
 704     if message:
 705         os.unlink (filename)
 706
 707 ################################################################################
 708
 709 def poolify (source, component=None):
 710     if source[:3] == "lib":
 711         return source[:4] + '/' + source + '/'
 712     else:
 713         return source[:1] + '/' + source + '/'
 714
 715 ################################################################################
 716
 717 def move (src, dest, overwrite = 0, perms = 0o664):
 718     if os.path.exists(dest) and os.path.isdir(dest):
 719         dest_dir = dest
 720     else:
 721         dest_dir = os.path.dirname(dest)
 722     if not os.path.exists(dest_dir):
 723         umask = os.umask(00000)
 724         os.makedirs(dest_dir, 0o2775)
 725         os.umask(umask)
 726     #print "Moving %s to %s..." % (src, dest)
 727     if os.path.exists(dest) and os.path.isdir(dest):
 728         dest += '/' + os.path.basename(src)
 729     # Don't overwrite unless forced to
 730     if os.path.exists(dest):
 731         if not overwrite:
 732             fubar("Can't move %s to %s - file already exists." % (src, dest))
 733         else:
 734             if not os.access(dest, os.W_OK):
 735                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 736     shutil.copy2(src, dest)
 737     os.chmod(dest, perms)
 738     os.unlink(src)
 739
 740 def copy (src, dest, overwrite = 0, perms = 0o664):
 741     if os.path.exists(dest) and os.path.isdir(dest):
 742         dest_dir = dest
 743     else:
 744         dest_dir = os.path.dirname(dest)
 745     if not os.path.exists(dest_dir):
 746         umask = os.umask(00000)
 747         os.makedirs(dest_dir, 0o2775)
 748         os.umask(umask)
 749     #print "Copying %s to %s..." % (src, dest)
 750     if os.path.exists(dest) and os.path.isdir(dest):
 751         dest += '/' + os.path.basename(src)
 752     # Don't overwrite unless forced to
 753     if os.path.exists(dest):
 754         if not overwrite:
 755             raise FileExistsError
 756         else:
 757             if not os.access(dest, os.W_OK):
 758                 raise CantOverwriteError
 759     shutil.copy2(src, dest)
 760     os.chmod(dest, perms)
 761
 762 ################################################################################
 763
 764 def where_am_i ():
 765     res = socket.getfqdn()
 766     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 767     if database_hostname:
 768         return database_hostname
 769     else:
 770         return res
 771
 772 def which_conf_file ():
 773     if os.getenv('DAK_CONFIG'):
 774         return os.getenv('DAK_CONFIG')
 775
 776     res = socket.getfqdn()
 777     # In case we allow local config files per user, try if one exists
 778     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 779         homedir = os.getenv("HOME")
 780         confpath = os.path.join(homedir, "/etc/dak.conf")
 781         if os.path.exists(confpath):
 782             apt_pkg.read_config_file_isc(Cnf,confpath)
 783
 784     # We are still in here, so there is no local config file or we do
 785     # not allow local files. Do the normal stuff.
 786     if Cnf.get("Config::" + res + "::DakConfig"):
 787         return Cnf["Config::" + res + "::DakConfig"]
 788
 789     return default_config
 790
 791 def which_alias_file():
 792     hostname = socket.getfqdn()
 793     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 794     if os.path.exists(aliasfn):
 795         return aliasfn
 796     else:
 797         return None
 798
 799 ################################################################################
 800
 801 def TemplateSubst(subst_map, filename):
 802     """ Perform a substition of template """
 803     templatefile = open_file(filename)
 804     template = templatefile.read()
 805     for k, v in subst_map.iteritems():
 806         template = template.replace(k, str(v))
 807     templatefile.close()
 808     return template
 809
 810 ################################################################################
 811
 812 def fubar(msg, exit_code=1):
 813     sys.stderr.write("E: %s\n" % (msg))
 814     sys.exit(exit_code)
 815
 816 def warn(msg):
 817     sys.stderr.write("W: %s\n" % (msg))
 818
 819 ################################################################################
 820
 821 # Returns the user name with a laughable attempt at rfc822 conformancy
 822 # (read: removing stray periods).
 823 def whoami ():
 824     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 825
 826 def getusername ():
 827     return pwd.getpwuid(os.getuid())[0]
 828
 829 ################################################################################
 830
 831 def size_type (c):
 832     t  = " B"
 833     if c > 10240:
 834         c = c / 1024
 835         t = " KB"
 836     if c > 10240:
 837         c = c / 1024
 838         t = " MB"
 839     return ("%d%s" % (c, t))
 840
 841 ################################################################################
 842
 843 def cc_fix_changes (changes):
 844     o = changes.get("architecture", "")
 845     if o:
 846         del changes["architecture"]
 847     changes["architecture"] = {}
 848     for j in o.split():
 849         changes["architecture"][j] = 1
 850
 851 def changes_compare (a, b):
 852     """ Sort by source name, source version, 'have source', and then by filename """
 853     try:
 854         a_changes = parse_changes(a)
 855     except:
 856         return -1
 857
 858     try:
 859         b_changes = parse_changes(b)
 860     except:
 861         return 1
 862
 863     cc_fix_changes (a_changes)
 864     cc_fix_changes (b_changes)
 865
 866     # Sort by source name
 867     a_source = a_changes.get("source")
 868     b_source = b_changes.get("source")
 869     q = cmp (a_source, b_source)
 870     if q:
 871         return q
 872
 873     # Sort by source version
 874     a_version = a_changes.get("version", "0")
 875     b_version = b_changes.get("version", "0")
 876     q = apt_pkg.version_compare(a_version, b_version)
 877     if q:
 878         return q
 879
 880     # Sort by 'have source'
 881     a_has_source = a_changes["architecture"].get("source")
 882     b_has_source = b_changes["architecture"].get("source")
 883     if a_has_source and not b_has_source:
 884         return -1
 885     elif b_has_source and not a_has_source:
 886         return 1
 887
 888     # Fall back to sort by filename
 889     return cmp(a, b)
 890
 891 ################################################################################
 892
 893 def find_next_free (dest, too_many=100):
 894     extra = 0
 895     orig_dest = dest
 896     while os.path.exists(dest) and extra < too_many:
 897         dest = orig_dest + '.' + repr(extra)
 898         extra += 1
 899     if extra >= too_many:
 900         raise NoFreeFilenameError
 901     return dest
 902
 903 ################################################################################
 904
 905 def result_join (original, sep = '\t'):
 906     resultlist = []
 907     for i in xrange(len(original)):
 908         if original[i] == None:
 909             resultlist.append("")
 910         else:
 911             resultlist.append(original[i])
 912     return sep.join(resultlist)
 913
 914 ################################################################################
 915
 916 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 917     out = ""
 918     for line in str.split('\n'):
 919         line = line.strip()
 920         if line or include_blank_lines:
 921             out += "%s%s\n" % (prefix, line)
 922     # Strip trailing new line
 923     if out:
 924         out = out[:-1]
 925     return out
 926
 927 ################################################################################
 928
 929 def validate_changes_file_arg(filename, require_changes=1):
 930     """
 931     'filename' is either a .changes or .dak file.  If 'filename' is a
 932     .dak file, it's changed to be the corresponding .changes file.  The
 933     function then checks if the .changes file a) exists and b) is
 934     readable and returns the .changes filename if so.  If there's a
 935     problem, the next action depends on the option 'require_changes'
 936     argument:
 937
 938       - If 'require_changes' == -1, errors are ignored and the .changes
 939         filename is returned.
 940       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 941       - If 'require_changes' == 1, a fatal error is raised.
 942
 943     """
 944     error = None
 945
 946     orig_filename = filename
 947     if filename.endswith(".dak"):
 948         filename = filename[:-4]+".changes"
 949
 950     if not filename.endswith(".changes"):
 951         error = "invalid file type; not a changes file"
 952     else:
 953         if not os.access(filename,os.R_OK):
 954             if os.path.exists(filename):
 955                 error = "permission denied"
 956             else:
 957                 error = "file not found"
 958
 959     if error:
 960         if require_changes == 1:
 961             fubar("%s: %s." % (orig_filename, error))
 962         elif require_changes == 0:
 963             warn("Skipping %s - %s" % (orig_filename, error))
 964             return None
 965         else: # We only care about the .dak file
 966             return filename
 967     else:
 968         return filename
 969
 970 ################################################################################
 971
 972 def real_arch(arch):
 973     return (arch != "source" and arch != "all")
 974
 975 ################################################################################
 976
 977 def join_with_commas_and(list):
 978     if len(list) == 0: return "nothing"
 979     if len(list) == 1: return list[0]
 980     return ", ".join(list[:-1]) + " and " + list[-1]
 981
 982 ################################################################################
 983
 984 def pp_deps (deps):
 985     pp_deps = []
 986     for atom in deps:
 987         (pkg, version, constraint) = atom
 988         if constraint:
 989             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 990         else:
 991             pp_dep = pkg
 992         pp_deps.append(pp_dep)
 993     return " |".join(pp_deps)
 994
 995 ################################################################################
 996
 997 def get_conf():
 998     return Cnf
 999
1000 ################################################################################
1001
1002 def parse_args(Options):
1003     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1004     # XXX: This should go away and everything which calls it be converted
1005     #      to use SQLA properly.  For now, we'll just fix it not to use
1006     #      the old Pg interface though
1007     session = DBConn().session()
1008     # Process suite
1009     if Options["Suite"]:
1010         suite_ids_list = []
1011         for suitename in split_args(Options["Suite"]):
1012             suite = get_suite(suitename, session=session)
1013             if not suite or suite.suite_id is None:
1014                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1015             else:
1016                 suite_ids_list.append(suite.suite_id)
1017         if suite_ids_list:
1018             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1019         else:
1020             fubar("No valid suite given.")
1021     else:
1022         con_suites = ""
1023
1024     # Process component
1025     if Options["Component"]:
1026         component_ids_list = []
1027         for componentname in split_args(Options["Component"]):
1028             component = get_component(componentname, session=session)
1029             if component is None:
1030                 warn("component '%s' not recognised." % (componentname))
1031             else:
1032                 component_ids_list.append(component.component_id)
1033         if component_ids_list:
1034             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1035         else:
1036             fubar("No valid component given.")
1037     else:
1038         con_components = ""
1039
1040     # Process architecture
1041     con_architectures = ""
1042     check_source = 0
1043     if Options["Architecture"]:
1044         arch_ids_list = []
1045         for archname in split_args(Options["Architecture"]):
1046             if archname == "source":
1047                 check_source = 1
1048             else:
1049                 arch = get_architecture(archname, session=session)
1050                 if arch is None:
1051                     warn("architecture '%s' not recognised." % (archname))
1052                 else:
1053                     arch_ids_list.append(arch.arch_id)
1054         if arch_ids_list:
1055             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1056         else:
1057             if not check_source:
1058                 fubar("No valid architecture given.")
1059     else:
1060         check_source = 1
1061
1062     return (con_suites, con_architectures, con_components, check_source)
1063
1064 ################################################################################
1065
1066 def arch_compare_sw (a, b):
1067     """
1068     Function for use in sorting lists of architectures.
1069
1070     Sorts normally except that 'source' dominates all others.
1071     """
1072
1073     if a == "source" and b == "source":
1074         return 0
1075     elif a == "source":
1076         return -1
1077     elif b == "source":
1078         return 1
1079
1080     return cmp (a, b)
1081
1082 ################################################################################
1083
1084 def split_args (s, dwim=1):
1085     """
1086     Split command line arguments which can be separated by either commas
1087     or whitespace.  If dwim is set, it will complain about string ending
1088     in comma since this usually means someone did 'dak ls -a i386, m68k
1089     foo' or something and the inevitable confusion resulting from 'm68k'
1090     being treated as an argument is undesirable.
1091     """
1092
1093     if s.find(",") == -1:
1094         return s.split()
1095     else:
1096         if s[-1:] == "," and dwim:
1097             fubar("split_args: found trailing comma, spurious space maybe?")
1098         return s.split(",")
1099
1100 ################################################################################
1101
1102 def gpgv_get_status_output(cmd, status_read, status_write):
1103     """
1104     Our very own version of commands.getouputstatus(), hacked to support
1105     gpgv's status fd.
1106     """
1107
1108     cmd = ['/bin/sh', '-c', cmd]
1109     p2cread, p2cwrite = os.pipe()
1110     c2pread, c2pwrite = os.pipe()
1111     errout, errin = os.pipe()
1112     pid = os.fork()
1113     if pid == 0:
1114         # Child
1115         os.close(0)
1116         os.close(1)
1117         os.dup(p2cread)
1118         os.dup(c2pwrite)
1119         os.close(2)
1120         os.dup(errin)
1121         for i in range(3, 256):
1122             if i != status_write:
1123                 try:
1124                     os.close(i)
1125                 except:
1126                     pass
1127         try:
1128             os.execvp(cmd[0], cmd)
1129         finally:
1130             os._exit(1)
1131
1132     # Parent
1133     os.close(p2cread)
1134     os.dup2(c2pread, c2pwrite)
1135     os.dup2(errout, errin)
1136
1137     output = status = ""
1138     while 1:
1139         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1140         more_data = []
1141         for fd in i:
1142             r = os.read(fd, 8196)
1143             if len(r) > 0:
1144                 more_data.append(fd)
1145                 if fd == c2pwrite or fd == errin:
1146                     output += r
1147                 elif fd == status_read:
1148                     status += r
1149                 else:
1150                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1151         if not more_data:
1152             pid, exit_status = os.waitpid(pid, 0)
1153             try:
1154                 os.close(status_write)
1155                 os.close(status_read)
1156                 os.close(c2pread)
1157                 os.close(c2pwrite)
1158                 os.close(p2cwrite)
1159                 os.close(errin)
1160                 os.close(errout)
1161             except:
1162                 pass
1163             break
1164
1165     return output, status, exit_status
1166
1167 ################################################################################
1168
1169 def process_gpgv_output(status):
1170     # Process the status-fd output
1171     keywords = {}
1172     internal_error = ""
1173     for line in status.split('\n'):
1174         line = line.strip()
1175         if line == "":
1176             continue
1177         split = line.split()
1178         if len(split) < 2:
1179             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1180             continue
1181         (gnupg, keyword) = split[:2]
1182         if gnupg != "[GNUPG:]":
1183             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1184             continue
1185         args = split[2:]
1186         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1187             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1188             continue
1189         else:
1190             keywords[keyword] = args
1191
1192     return (keywords, internal_error)
1193
1194 ################################################################################
1195
1196 def retrieve_key (filename, keyserver=None, keyring=None):
1197     """
1198     Retrieve the key that signed 'filename' from 'keyserver' and
1199     add it to 'keyring'.  Returns nothing on success, or an error message
1200     on error.
1201     """
1202
1203     # Defaults for keyserver and keyring
1204     if not keyserver:
1205         keyserver = Cnf["Dinstall::KeyServer"]
1206     if not keyring:
1207         keyring = get_primary_keyring_path()
1208
1209     # Ensure the filename contains no shell meta-characters or other badness
1210     if not re_taint_free.match(filename):
1211         return "%s: tainted filename" % (filename)
1212
1213     # Invoke gpgv on the file
1214     status_read, status_write = os.pipe()
1215     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1216     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1217
1218     # Process the status-fd output
1219     (keywords, internal_error) = process_gpgv_output(status)
1220     if internal_error:
1221         return internal_error
1222
1223     if not keywords.has_key("NO_PUBKEY"):
1224         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1225
1226     fingerprint = keywords["NO_PUBKEY"][0]
1227     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1228     # it'll try to create a lockfile in /dev.  A better solution might
1229     # be a tempfile or something.
1230     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1231           % (Cnf["Dinstall::SigningKeyring"])
1232     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1233            % (keyring, keyserver, fingerprint)
1234     (result, output) = commands.getstatusoutput(cmd)
1235     if (result != 0):
1236         return "'%s' failed with exit code %s" % (cmd, result)
1237
1238     return ""
1239
1240 ################################################################################
1241
1242 def gpg_keyring_args(keyrings=None):
1243     if not keyrings:
1244         keyrings = get_active_keyring_paths()
1245
1246     return " ".join(["--keyring %s" % x for x in keyrings])
1247
1248 ################################################################################
1249 @session_wrapper
1250 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1251     """
1252     Check the signature of a file and return the fingerprint if the
1253     signature is valid or 'None' if it's not.  The first argument is the
1254     filename whose signature should be checked.  The second argument is a
1255     reject function and is called when an error is found.  The reject()
1256     function must allow for two arguments: the first is the error message,
1257     the second is an optional prefix string.  It's possible for reject()
1258     to be called more than once during an invocation of check_signature().
1259     The third argument is optional and is the name of the files the
1260     detached signature applies to.  The fourth argument is optional and is
1261     a *list* of keyrings to use.  'autofetch' can either be None, True or
1262     False.  If None, the default behaviour specified in the config will be
1263     used.
1264     """
1265
1266     rejects = []
1267
1268     # Ensure the filename contains no shell meta-characters or other badness
1269     if not re_taint_free.match(sig_filename):
1270         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1271         return (None, rejects)
1272
1273     if data_filename and not re_taint_free.match(data_filename):
1274         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1275         return (None, rejects)
1276
1277     if not keyrings:
1278         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1279
1280     # Autofetch the signing key if that's enabled
1281     if autofetch == None:
1282         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1283     if autofetch:
1284         error_msg = retrieve_key(sig_filename)
1285         if error_msg:
1286             rejects.append(error_msg)
1287             return (None, rejects)
1288
1289     # Build the command line
1290     status_read, status_write = os.pipe()
1291     cmd = "gpgv --status-fd %s %s %s %s" % (
1292         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1293
1294     # Invoke gpgv on the file
1295     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1296
1297     # Process the status-fd output
1298     (keywords, internal_error) = process_gpgv_output(status)
1299
1300     # If we failed to parse the status-fd output, let's just whine and bail now
1301     if internal_error:
1302         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1303         rejects.append(internal_error, "")
1304         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1305         return (None, rejects)
1306
1307     # Now check for obviously bad things in the processed output
1308     if keywords.has_key("KEYREVOKED"):
1309         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1310     if keywords.has_key("BADSIG"):
1311         rejects.append("bad signature on %s." % (sig_filename))
1312     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1313         rejects.append("failed to check signature on %s." % (sig_filename))
1314     if keywords.has_key("NO_PUBKEY"):
1315         args = keywords["NO_PUBKEY"]
1316         if len(args) >= 1:
1317             key = args[0]
1318         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1319     if keywords.has_key("BADARMOR"):
1320         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1321     if keywords.has_key("NODATA"):
1322         rejects.append("no signature found in %s." % (sig_filename))
1323     if keywords.has_key("EXPKEYSIG"):
1324         args = keywords["EXPKEYSIG"]
1325         if len(args) >= 1:
1326             key = args[0]
1327         rejects.append("Signature made by expired key 0x%s" % (key))
1328     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1329         args = keywords["KEYEXPIRED"]
1330         expiredate=""
1331         if len(args) >= 1:
1332             timestamp = args[0]
1333             if timestamp.count("T") == 0:
1334                 try:
1335                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1336                 except ValueError:
1337                     expiredate = "unknown (%s)" % (timestamp)
1338             else:
1339                 expiredate = timestamp
1340         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1341
1342     if len(rejects) > 0:
1343         return (None, rejects)
1344
1345     # Next check gpgv exited with a zero return code
1346     if exit_status:
1347         rejects.append("gpgv failed while checking %s." % (sig_filename))
1348         if status.strip():
1349             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1350         else:
1351             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1352         return (None, rejects)
1353
1354     # Sanity check the good stuff we expect
1355     if not keywords.has_key("VALIDSIG"):
1356         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1357     else:
1358         args = keywords["VALIDSIG"]
1359         if len(args) < 1:
1360             rejects.append("internal error while checking signature on %s." % (sig_filename))
1361         else:
1362             fingerprint = args[0]
1363     if not keywords.has_key("GOODSIG"):
1364         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1365     if not keywords.has_key("SIG_ID"):
1366         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1367
1368     # Finally ensure there's not something we don't recognise
1369     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1370                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1371                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1372
1373     for keyword in keywords.keys():
1374         if not known_keywords.has_key(keyword):
1375             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1376
1377     if len(rejects) > 0:
1378         return (None, rejects)
1379     else:
1380         return (fingerprint, [])
1381
1382 ################################################################################
1383
1384 def gpg_get_key_addresses(fingerprint):
1385     """retreive email addresses from gpg key uids for a given fingerprint"""
1386     addresses = key_uid_email_cache.get(fingerprint)
1387     if addresses != None:
1388         return addresses
1389     addresses = list()
1390     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1391                 % (gpg_keyring_args(), fingerprint)
1392     (result, output) = commands.getstatusoutput(cmd)
1393     if result == 0:
1394         for l in output.split('\n'):
1395             m = re_gpg_uid.match(l)
1396             if not m:
1397                 continue
1398             address = m.group(1)
1399             if address.endswith('@debian.org'):
1400                 # prefer @debian.org addresses
1401                 # TODO: maybe not hardcode the domain
1402                 addresses.insert(0, address)
1403             else:
1404                 addresses.append(m.group(1))
1405     key_uid_email_cache[fingerprint] = addresses
1406     return addresses
1407
1408 ################################################################################
1409
1410 def get_logins_from_ldap(fingerprint='*'):
1411     """retrieve login from LDAP linked to a given fingerprint"""
1412
1413     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1414     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1415     l = ldap.open(LDAPServer)
1416     l.simple_bind_s('','')
1417     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1418                        '(keyfingerprint=%s)' % fingerprint,
1419                        ['uid', 'keyfingerprint'])
1420     login = {}
1421     for elem in Attrs:
1422         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1423     return login
1424
1425 ################################################################################
1426
1427 def get_users_from_ldap():
1428     """retrieve login and user names from LDAP"""
1429
1430     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1431     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1432     l = ldap.open(LDAPServer)
1433     l.simple_bind_s('','')
1434     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1435                        '(uid=*)', ['uid', 'cn', 'mn', 'sn'])
1436     users = {}
1437     for elem in Attrs:
1438         elem = elem[1]
1439         name = []
1440         for k in ('cn', 'mn', 'sn'):
1441             try:
1442                 if elem[k][0] != '-':
1443                     name.append(elem[k][0])
1444             except KeyError:
1445                 pass
1446         users[' '.join(name)] = elem['uid'][0]
1447     return users
1448
1449 ################################################################################
1450
1451 def clean_symlink (src, dest, root):
1452     """
1453     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1454     Returns fixed 'src'
1455     """
1456     src = src.replace(root, '', 1)
1457     dest = dest.replace(root, '', 1)
1458     dest = os.path.dirname(dest)
1459     new_src = '../' * len(dest.split('/'))
1460     return new_src + src
1461
1462 ################################################################################
1463
1464 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1465     """
1466     Return a secure and unique filename by pre-creating it.
1467
1468     @type directory: str
1469     @param directory: If non-null it will be the directory the file is pre-created in.
1470
1471     @type prefix: str
1472     @param prefix: The filename will be prefixed with this string
1473
1474     @type suffix: str
1475     @param suffix: The filename will end with this string
1476
1477     @type mode: str
1478     @param mode: If set the file will get chmodded to those permissions
1479
1480     @type group: str
1481     @param group: If set the file will get chgrped to the specified group.
1482
1483     @rtype: list
1484     @return: Returns a pair (fd, name)
1485     """
1486
1487     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1488     if mode:
1489         os.chmod(tfname, mode)
1490     if group:
1491         gid = grp.getgrnam(group).gr_gid
1492         os.chown(tfname, -1, gid)
1493     return (tfd, tfname)
1494
1495 ################################################################################
1496
1497 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1498     """
1499     Return a secure and unique directory by pre-creating it.
1500
1501     @type parent: str
1502     @param parent: If non-null it will be the directory the directory is pre-created in.
1503
1504     @type prefix: str
1505     @param prefix: The filename will be prefixed with this string
1506
1507     @type suffix: str
1508     @param suffix: The filename will end with this string
1509
1510     @type mode: str
1511     @param mode: If set the file will get chmodded to those permissions
1512
1513     @type group: str
1514     @param group: If set the file will get chgrped to the specified group.
1515
1516     @rtype: list
1517     @return: Returns a pair (fd, name)
1518
1519     """
1520
1521     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1522     if mode:
1523         os.chmod(tfname, mode)
1524     if group:
1525         gid = grp.getgrnam(group).gr_gid
1526         os.chown(tfname, -1, gid)
1527     return tfname
1528
1529 ################################################################################
1530
1531 def is_email_alias(email):
1532     """ checks if the user part of the email is listed in the alias file """
1533     global alias_cache
1534     if alias_cache == None:
1535         aliasfn = which_alias_file()
1536         alias_cache = set()
1537         if aliasfn:
1538             for l in open(aliasfn):
1539                 alias_cache.add(l.split(':')[0])
1540     uid = email.split('@')[0]
1541     return uid in alias_cache
1542
1543 ################################################################################
1544
1545 def get_changes_files(from_dir):
1546     """
1547     Takes a directory and lists all .changes files in it (as well as chdir'ing
1548     to the directory; this is due to broken behaviour on the part of p-u/p-a
1549     when you're not in the right place)
1550
1551     Returns a list of filenames
1552     """
1553     try:
1554         # Much of the rest of p-u/p-a depends on being in the right place
1555         os.chdir(from_dir)
1556         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1557     except OSError as e:
1558         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1559
1560     return changes_files
1561
1562 ################################################################################
1563
1564 Cnf = config.Config().Cnf
1565
1566 ################################################################################
1567
1568 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1569     """
1570     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1571     Well, actually it parsed a local copy, but let's document the source
1572     somewhere ;)
1573
1574     returns a dict associating source package name with a list of open wnpp
1575     bugs (Yes, there might be more than one)
1576     """
1577
1578     line = []
1579     try:
1580         f = open(file)
1581         lines = f.readlines()
1582     except IOError as e:
1583         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1584         lines = []
1585     wnpp = {}
1586
1587     for line in lines:
1588         splited_line = line.split(": ", 1)
1589         if len(splited_line) > 1:
1590             wnpp[splited_line[0]] = splited_line[1].split("|")
1591
1592     for source in wnpp.keys():
1593         bugs = []
1594         for wnpp_bug in wnpp[source]:
1595             bug_no = re.search("(\d)+", wnpp_bug).group()
1596             if bug_no:
1597                 bugs.append(bug_no)
1598         wnpp[source] = bugs
1599     return wnpp
1600
1601 ################################################################################
1602
1603 def get_packages_from_ftp(root, suite, component, architecture):
1604     """
1605     Returns an object containing apt_pkg-parseable data collected by
1606     aggregating Packages.gz files gathered for each architecture.
1607
1608     @type root: string
1609     @param root: path to ftp archive root directory
1610
1611     @type suite: string
1612     @param suite: suite to extract files from
1613
1614     @type component: string
1615     @param component: component to extract files from
1616
1617     @type architecture: string
1618     @param architecture: architecture to extract files from
1619
1620     @rtype: TagFile
1621     @return: apt_pkg class containing package data
1622     """
1623     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1624     (fd, temp_file) = temp_filename()
1625     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1626     if (result != 0):
1627         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1628     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1629     if os.path.exists(filename):
1630         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1631         if (result != 0):
1632             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1633     packages = open_file(temp_file)
1634     Packages = apt_pkg.TagFile(packages)
1635     os.unlink(temp_file)
1636     return Packages
1637
1638 ################################################################################
1639
1640 def deb_extract_control(fh):
1641     """extract DEBIAN/control from a binary package"""
1642     return apt_inst.DebFile(fh).control.extractdata("control")
1643
1644 ################################################################################
1645
1646 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1647     """mail addresses to contact for an upload
1648
1649     @type  maintainer: str
1650     @param maintainer: Maintainer field of the .changes file
1651
1652     @type  changed_by: str
1653     @param changed_by: Changed-By field of the .changes file
1654
1655     @type  fingerprint: str
1656     @param fingerprint: fingerprint of the key used to sign the upload
1657
1658     @rtype:  list of str
1659     @return: list of RFC 2047-encoded mail addresses to contact regarding
1660              this upload
1661     """
1662     addresses = [maintainer]
1663     if changed_by != maintainer:
1664         addresses.append(changed_by)
1665
1666     fpr_addresses = gpg_get_key_addresses(fingerprint)
1667     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1668         addresses.append(fpr_addresses[0])
1669
1670     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1671     return encoded_addresses
1672
1673 ################################################################################
1674
1675 def call_editor(text="", suffix=".txt"):
1676     """run editor and return the result as a string
1677
1678     @type  text: str
1679     @param text: initial text
1680
1681     @type  suffix: str
1682     @param suffix: extension for temporary file
1683
1684     @rtype:  str
1685     @return: string with the edited text
1686     """
1687     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1688     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1689     try:
1690         print >>tmp, text,
1691         tmp.close()
1692         subprocess.check_call([editor, tmp.name])
1693         return open(tmp.name, 'r').read()
1694     finally:
1695         os.unlink(tmp.name)
1696
1697 ################################################################################
1698
1699 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1700     dbsuite = get_suite(suite, session)
1701     overridesuite = dbsuite
1702     if dbsuite.overridesuite is not None:
1703         overridesuite = get_suite(dbsuite.overridesuite, session)
1704     dep_problem = 0
1705     p2c = {}
1706     all_broken = {}
1707     if arches:
1708         all_arches = set(arches)
1709     else:
1710         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1711     all_arches -= set(["source", "all"])
1712     metakey_d = get_or_set_metadatakey("Depends", session)
1713     metakey_p = get_or_set_metadatakey("Provides", session)
1714     params = {
1715         'suite_id':     dbsuite.suite_id,
1716         'metakey_d_id': metakey_d.key_id,
1717         'metakey_p_id': metakey_p.key_id,
1718     }
1719     for architecture in all_arches | set(['all']):
1720         deps = {}
1721         sources = {}
1722         virtual_packages = {}
1723         params['arch_id'] = get_architecture(architecture, session).arch_id
1724
1725         statement = '''
1726             SELECT b.id, b.package, s.source, c.name as component,
1727                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1728                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1729                 FROM binaries b
1730                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1731                 JOIN source s ON b.source = s.id
1732                 JOIN files_archive_map af ON b.file = af.file_id
1733                 JOIN component c ON af.component_id = c.id
1734                 WHERE b.architecture = :arch_id'''
1735         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1736             from_statement(statement).params(params)
1737         for binary_id, package, source, component, depends, provides in query:
1738             sources[package] = source
1739             p2c[package] = component
1740             if depends is not None:
1741                 deps[package] = depends
1742             # Maintain a counter for each virtual package.  If a
1743             # Provides: exists, set the counter to 0 and count all
1744             # provides by a package not in the list for removal.
1745             # If the counter stays 0 at the end, we know that only
1746             # the to-be-removed packages provided this virtual
1747             # package.
1748             if provides is not None:
1749                 for virtual_pkg in provides.split(","):
1750                     virtual_pkg = virtual_pkg.strip()
1751                     if virtual_pkg == package: continue
1752                     if not virtual_packages.has_key(virtual_pkg):
1753                         virtual_packages[virtual_pkg] = 0
1754                     if package not in removals:
1755                         virtual_packages[virtual_pkg] += 1
1756
1757         # If a virtual package is only provided by the to-be-removed
1758         # packages, treat the virtual package as to-be-removed too.
1759         for virtual_pkg in virtual_packages.keys():
1760             if virtual_packages[virtual_pkg] == 0:
1761                 removals.append(virtual_pkg)
1762
1763         # Check binary dependencies (Depends)
1764         for package in deps.keys():
1765             if package in removals: continue
1766             parsed_dep = []
1767             try:
1768                 parsed_dep += apt_pkg.parse_depends(deps[package])
1769             except ValueError as e:
1770                 print "Error for package %s: %s" % (package, e)
1771             for dep in parsed_dep:
1772                 # Check for partial breakage.  If a package has a ORed
1773                 # dependency, there is only a dependency problem if all
1774                 # packages in the ORed depends will be removed.
1775                 unsat = 0
1776                 for dep_package, _, _ in dep:
1777                     if dep_package in removals:
1778                         unsat += 1
1779                 if unsat == len(dep):
1780                     component = p2c[package]
1781                     source = sources[package]
1782                     if component != "main":
1783                         source = "%s/%s" % (source, component)
1784                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1785                     dep_problem = 1
1786
1787     if all_broken:
1788         if cruft:
1789             print "  - broken Depends:"
1790         else:
1791             print "# Broken Depends:"
1792         for source, bindict in sorted(all_broken.items()):
1793             lines = []
1794             for binary, arches in sorted(bindict.items()):
1795                 if arches == all_arches or 'all' in arches:
1796                     lines.append(binary)
1797                 else:
1798                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1799             if cruft:
1800                 print '    %s: %s' % (source, lines[0])
1801             else:
1802                 print '%s: %s' % (source, lines[0])
1803             for line in lines[1:]:
1804                 if cruft:
1805                     print '    ' + ' ' * (len(source) + 2) + line
1806                 else:
1807                     print ' ' * (len(source) + 2) + line
1808         if not cruft:
1809             print
1810
1811     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1812     all_broken.clear()
1813     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1814     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1815     params = {
1816         'suite_id':    dbsuite.suite_id,
1817         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1818     }
1819     statement = '''
1820         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1821            FROM source s
1822            JOIN source_metadata sm ON s.id = sm.src_id
1823            WHERE s.id in
1824                (SELECT source FROM src_associations
1825                    WHERE suite = :suite_id)
1826                AND sm.key_id in :metakey_ids
1827            GROUP BY s.id, s.source'''
1828     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1829         params(params)
1830     for source_id, source, build_dep in query:
1831         if source in removals: continue
1832         parsed_dep = []
1833         if build_dep is not None:
1834             # Remove [arch] information since we want to see breakage on all arches
1835             build_dep = re_build_dep_arch.sub("", build_dep)
1836             try:
1837                 parsed_dep += apt_pkg.parse_depends(build_dep)
1838             except ValueError as e:
1839                 print "Error for source %s: %s" % (source, e)
1840         for dep in parsed_dep:
1841             unsat = 0
1842             for dep_package, _, _ in dep:
1843                 if dep_package in removals:
1844                     unsat += 1
1845             if unsat == len(dep):
1846                 component, = session.query(Component.component_name) \
1847                     .join(Component.overrides) \
1848                     .filter(Override.suite == overridesuite) \
1849                     .filter(Override.package == re.sub('/(contrib|non-free)$', '', source)) \
1850                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1851                     .first()
1852                 if component != "main":
1853                     source = "%s/%s" % (source, component)
1854                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1855                 dep_problem = 1
1856
1857     if all_broken:
1858         if cruft:
1859             print "  - broken Build-Depends:"
1860         else:
1861             print "# Broken Build-Depends:"
1862         for source, bdeps in sorted(all_broken.items()):
1863             bdeps = sorted(bdeps)
1864             if cruft:
1865                 print '    %s: %s' % (source, bdeps[0])
1866             else:
1867                 print '%s: %s' % (source, bdeps[0])
1868             for bdep in bdeps[1:]:
1869                 if cruft:
1870                     print '    ' + ' ' * (len(source) + 2) + bdep
1871                 else:
1872                     print ' ' * (len(source) + 2) + bdep
1873         if not cruft:
1874             print
1875
1876     return dep_problem