daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import codecs
  27 import datetime
  28 import email.Header
  29 import os
  30 import pwd
  31 import grp
  32 import select
  33 import socket
  34 import shutil
  35 import sys
  36 import tempfile
  37 import traceback
  38 import stat
  39 import apt_inst
  40 import apt_pkg
  41 import time
  42 import re
  43 import email as modemail
  44 import subprocess
  45 import ldap
  46 import errno
  47
  48 import daklib.config as config
  49 import daklib.daksubprocess
  50 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  51                    get_override_type, Keyring, session_wrapper, \
  52                    get_active_keyring_paths, get_primary_keyring_path, \
  53                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  54                    Component, Override, OverrideType
  55 from sqlalchemy import desc
  56 from dak_exceptions import *
  57 from gpg import SignedFile
  58 from textutils import fix_maintainer
  59 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  60                     re_multi_line_field, re_srchasver, re_taint_free, \
  61                     re_re_mark, re_whitespace_comment, re_issource, \
  62                     re_is_orig_source, re_build_dep_arch, re_parse_maintainer
  63
  64 from formats import parse_format, validate_changes_format
  65 from srcformats import get_format_from_string
  66 from collections import defaultdict
  67
  68 ################################################################################
  69
  70 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  71
  72 alias_cache = None        #: Cache for email alias checks
  73 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  74
  75 # (hashname, function, earliest_changes_version)
  76 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  77                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  78
  79 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  80 # code in lenny's Python. This also affects commands.getoutput and
  81 # commands.getstatus.
  82 def dak_getstatusoutput(cmd):
  83     pipe = daklib.daksubprocess.Popen(cmd, shell=True, universal_newlines=True,
  84         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  85
  86     output = pipe.stdout.read()
  87
  88     pipe.wait()
  89
  90     if output[-1:] == '\n':
  91         output = output[:-1]
  92
  93     ret = pipe.wait()
  94     if ret is None:
  95         ret = 0
  96
  97     return ret, output
  98 commands.getstatusoutput = dak_getstatusoutput
  99
 100 ################################################################################
 101
 102 def html_escape(s):
 103     """ Escape html chars """
 104     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 105
 106 ################################################################################
 107
 108 def open_file(filename, mode='r'):
 109     """
 110     Open C{file}, return fileobject.
 111
 112     @type filename: string
 113     @param filename: path/filename to open
 114
 115     @type mode: string
 116     @param mode: open mode
 117
 118     @rtype: fileobject
 119     @return: open fileobject
 120
 121     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 122
 123     """
 124     try:
 125         f = open(filename, mode)
 126     except IOError:
 127         raise CantOpenError(filename)
 128     return f
 129
 130 ################################################################################
 131
 132 def our_raw_input(prompt=""):
 133     if prompt:
 134         while 1:
 135             try:
 136                 sys.stdout.write(prompt)
 137                 break
 138             except IOError:
 139                 pass
 140     sys.stdout.flush()
 141     try:
 142         ret = raw_input()
 143         return ret
 144     except EOFError:
 145         sys.stderr.write("\nUser interrupt (^D).\n")
 146         raise SystemExit
 147
 148 ################################################################################
 149
 150 def extract_component_from_section(section, session=None):
 151     component = ""
 152
 153     if section.find('/') != -1:
 154         component = section.split('/')[0]
 155
 156     # Expand default component
 157     if component == "":
 158         comp = get_component(section, session)
 159         if comp is None:
 160             component = "main"
 161         else:
 162             component = comp.component_name
 163
 164     return (section, component)
 165
 166 ################################################################################
 167
 168 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 169     require_signature = True
 170     if keyrings == None:
 171         keyrings = []
 172         require_signature = False
 173
 174     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 175     contents = signed_file.contents
 176
 177     error = ""
 178     changes = {}
 179
 180     # Split the lines in the input, keeping the linebreaks.
 181     lines = contents.splitlines(True)
 182
 183     if len(lines) == 0:
 184         raise ParseChangesError("[Empty changes file]")
 185
 186     # Reindex by line number so we can easily verify the format of
 187     # .dsc files...
 188     index = 0
 189     indexed_lines = {}
 190     for line in lines:
 191         index += 1
 192         indexed_lines[index] = line[:-1]
 193
 194     num_of_lines = len(indexed_lines.keys())
 195     index = 0
 196     first = -1
 197     while index < num_of_lines:
 198         index += 1
 199         line = indexed_lines[index]
 200         if line == "" and signing_rules == 1:
 201             if index != num_of_lines:
 202                 raise InvalidDscError(index)
 203             break
 204         slf = re_single_line_field.match(line)
 205         if slf:
 206             field = slf.groups()[0].lower()
 207             changes[field] = slf.groups()[1]
 208             first = 1
 209             continue
 210         if line == " .":
 211             changes[field] += '\n'
 212             continue
 213         mlf = re_multi_line_field.match(line)
 214         if mlf:
 215             if first == -1:
 216                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 217             if first == 1 and changes[field] != "":
 218                 changes[field] += '\n'
 219             first = 0
 220             changes[field] += mlf.groups()[0] + '\n'
 221             continue
 222         error += line
 223
 224     changes["filecontents"] = armored_contents
 225
 226     if changes.has_key("source"):
 227         # Strip the source version in brackets from the source field,
 228         # put it in the "source-version" field instead.
 229         srcver = re_srchasver.search(changes["source"])
 230         if srcver:
 231             changes["source"] = srcver.group(1)
 232             changes["source-version"] = srcver.group(2)
 233
 234     if error:
 235         raise ParseChangesError(error)
 236
 237     return changes
 238
 239 ################################################################################
 240
 241 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 242     """
 243     Parses a changes file and returns a dictionary where each field is a
 244     key.  The mandatory first argument is the filename of the .changes
 245     file.
 246
 247     signing_rules is an optional argument:
 248
 249       - If signing_rules == -1, no signature is required.
 250       - If signing_rules == 0 (the default), a signature is required.
 251       - If signing_rules == 1, it turns on the same strict format checking
 252         as dpkg-source.
 253
 254     The rules for (signing_rules == 1)-mode are:
 255
 256       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 257         followed by any PGP header data and must end with a blank line.
 258
 259       - The data section must end with a blank line and must be followed by
 260         "-----BEGIN PGP SIGNATURE-----".
 261     """
 262
 263     with open_file(filename) as changes_in:
 264         content = changes_in.read()
 265     try:
 266         unicode(content, 'utf-8')
 267     except UnicodeError:
 268         raise ChangesUnicodeError("Changes file not proper utf-8")
 269     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 270
 271
 272     if not dsc_file:
 273         # Finally ensure that everything needed for .changes is there
 274         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 275                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 276
 277         missingfields=[]
 278         for keyword in must_keywords:
 279             if not changes.has_key(keyword.lower()):
 280                 missingfields.append(keyword)
 281
 282                 if len(missingfields):
 283                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 284
 285     return changes
 286
 287 ################################################################################
 288
 289 def hash_key(hashname):
 290     return '%ssum' % hashname
 291
 292 ################################################################################
 293
 294 def create_hash(where, files, hashname, hashfunc):
 295     """
 296     create_hash extends the passed files dict with the given hash by
 297     iterating over all files on disk and passing them to the hashing
 298     function given.
 299     """
 300
 301     rejmsg = []
 302     for f in files.keys():
 303         try:
 304             file_handle = open_file(f)
 305         except CantOpenError:
 306             rejmsg.append("Could not open file %s for checksumming" % (f))
 307             continue
 308
 309         files[f][hash_key(hashname)] = hashfunc(file_handle)
 310
 311         file_handle.close()
 312     return rejmsg
 313
 314 ################################################################################
 315
 316 def check_hash(where, files, hashname, hashfunc):
 317     """
 318     check_hash checks the given hash in the files dict against the actual
 319     files on disk.  The hash values need to be present consistently in
 320     all file entries.  It does not modify its input in any way.
 321     """
 322
 323     rejmsg = []
 324     for f in files.keys():
 325         try:
 326             with open_file(f) as file_handle:
 327                 # Check for the hash entry, to not trigger a KeyError.
 328                 if not files[f].has_key(hash_key(hashname)):
 329                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 330                         where))
 331                     continue
 332
 333                 # Actually check the hash for correctness.
 334                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 335                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 336                         where))
 337         except CantOpenError:
 338             # TODO: This happens when the file is in the pool.
 339             # warn("Cannot open file %s" % f)
 340             continue
 341     return rejmsg
 342
 343 ################################################################################
 344
 345 def check_size(where, files):
 346     """
 347     check_size checks the file sizes in the passed files dict against the
 348     files on disk.
 349     """
 350
 351     rejmsg = []
 352     for f in files.keys():
 353         try:
 354             entry = os.stat(f)
 355         except OSError as exc:
 356             if exc.errno == errno.ENOENT:
 357                 # TODO: This happens when the file is in the pool.
 358                 continue
 359             raise
 360
 361         actual_size = entry[stat.ST_SIZE]
 362         size = int(files[f]["size"])
 363         if size != actual_size:
 364             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 365                    % (f, actual_size, size, where))
 366     return rejmsg
 367
 368 ################################################################################
 369
 370 def check_dsc_files(dsc_filename, dsc, dsc_files):
 371     """
 372     Verify that the files listed in the Files field of the .dsc are
 373     those expected given the announced Format.
 374
 375     @type dsc_filename: string
 376     @param dsc_filename: path of .dsc file
 377
 378     @type dsc: dict
 379     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 380
 381     @type dsc_files: dict
 382     @param dsc_files: the file list returned by C{build_file_list()}
 383
 384     @rtype: list
 385     @return: all errors detected
 386     """
 387     rejmsg = []
 388
 389     # Ensure .dsc lists proper set of source files according to the format
 390     # announced
 391     has = defaultdict(lambda: 0)
 392
 393     ftype_lookup = (
 394         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 395         (r'diff.gz',                   ('debian_diff',)),
 396         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 397         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 398         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 399         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 400         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 401     )
 402
 403     for f in dsc_files:
 404         m = re_issource.match(f)
 405         if not m:
 406             rejmsg.append("%s: %s in Files field not recognised as source."
 407                           % (dsc_filename, f))
 408             continue
 409
 410         # Populate 'has' dictionary by resolving keys in lookup table
 411         matched = False
 412         for regex, keys in ftype_lookup:
 413             if re.match(regex, m.group(3)):
 414                 matched = True
 415                 for key in keys:
 416                     has[key] += 1
 417                 break
 418
 419         # File does not match anything in lookup table; reject
 420         if not matched:
 421             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 422
 423     # Check for multiple files
 424     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 425         if has[file_type] > 1:
 426             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 427
 428     # Source format specific tests
 429     try:
 430         format = get_format_from_string(dsc['format'])
 431         rejmsg.extend([
 432             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 433         ])
 434
 435     except UnknownFormatError:
 436         # Not an error here for now
 437         pass
 438
 439     return rejmsg
 440
 441 ################################################################################
 442
 443 def check_hash_fields(what, manifest):
 444     """
 445     check_hash_fields ensures that there are no checksum fields in the
 446     given dict that we do not know about.
 447     """
 448
 449     rejmsg = []
 450     hashes = map(lambda x: x[0], known_hashes)
 451     for field in manifest:
 452         if field.startswith("checksums-"):
 453             hashname = field.split("-",1)[1]
 454             if hashname not in hashes:
 455                 rejmsg.append("Unsupported checksum field for %s "\
 456                     "in %s" % (hashname, what))
 457     return rejmsg
 458
 459 ################################################################################
 460
 461 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 462     if format >= version:
 463         # The version should contain the specified hash.
 464         func = check_hash
 465
 466         # Import hashes from the changes
 467         rejmsg = parse_checksums(".changes", files, changes, hashname)
 468         if len(rejmsg) > 0:
 469             return rejmsg
 470     else:
 471         # We need to calculate the hash because it can't possibly
 472         # be in the file.
 473         func = create_hash
 474     return func(".changes", files, hashname, hashfunc)
 475
 476 # We could add the orig which might be in the pool to the files dict to
 477 # access the checksums easily.
 478
 479 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 480     """
 481     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 482     in the dsc is correct, i.e. identical to the changes file and if necessary
 483     the pool.  The latter task is delegated to check_hash.
 484     """
 485
 486     rejmsg = []
 487     if not dsc.has_key('Checksums-%s' % (hashname,)):
 488         return rejmsg
 489     # Import hashes from the dsc
 490     parse_checksums(".dsc", dsc_files, dsc, hashname)
 491     # And check it...
 492     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 493     return rejmsg
 494
 495 ################################################################################
 496
 497 def parse_checksums(where, files, manifest, hashname):
 498     rejmsg = []
 499     field = 'checksums-%s' % hashname
 500     if not field in manifest:
 501         return rejmsg
 502     for line in manifest[field].split('\n'):
 503         if not line:
 504             break
 505         clist = line.strip().split(' ')
 506         if len(clist) == 3:
 507             checksum, size, checkfile = clist
 508         else:
 509             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 510             continue
 511         if not files.has_key(checkfile):
 512         # TODO: check for the file's entry in the original files dict, not
 513         # the one modified by (auto)byhand and other weird stuff
 514         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 515         #        (file, hashname, where))
 516             continue
 517         if not files[checkfile]["size"] == size:
 518             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 519                 "in %s" % (checkfile, hashname, where))
 520             continue
 521         files[checkfile][hash_key(hashname)] = checksum
 522     for f in files.keys():
 523         if not files[f].has_key(hash_key(hashname)):
 524             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 525     return rejmsg
 526
 527 ################################################################################
 528
 529 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 530
 531 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 532     files = {}
 533
 534     # Make sure we have a Files: field to parse...
 535     if not changes.has_key(field):
 536         raise NoFilesFieldError
 537
 538     # Validate .changes Format: field
 539     if not is_a_dsc:
 540         validate_changes_format(parse_format(changes['format']), field)
 541
 542     includes_section = (not is_a_dsc) and field == "files"
 543
 544     # Parse each entry/line:
 545     for i in changes[field].split('\n'):
 546         if not i:
 547             break
 548         s = i.split()
 549         section = priority = ""
 550         try:
 551             if includes_section:
 552                 (md5, size, section, priority, name) = s
 553             else:
 554                 (md5, size, name) = s
 555         except ValueError:
 556             raise ParseChangesError(i)
 557
 558         if section == "":
 559             section = "-"
 560         if priority == "":
 561             priority = "-"
 562
 563         (section, component) = extract_component_from_section(section)
 564
 565         files[name] = dict(size=size, section=section,
 566                            priority=priority, component=component)
 567         files[name][hashname] = md5
 568
 569     return files
 570
 571 ################################################################################
 572
 573 # see http://bugs.debian.org/619131
 574 def build_package_list(dsc, session = None):
 575     if not dsc.has_key("package-list"):
 576         return {}
 577
 578     packages = {}
 579
 580     for line in dsc["package-list"].split("\n"):
 581         if not line:
 582             break
 583
 584         fields = line.split()
 585         name = fields[0]
 586         package_type = fields[1]
 587         (section, component) = extract_component_from_section(fields[2])
 588         priority = fields[3]
 589
 590         # Validate type if we have a session
 591         if session and get_override_type(package_type, session) is None:
 592             # Maybe just warn and ignore? exit(1) might be a bit hard...
 593             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 594
 595         if name not in packages or packages[name]["type"] == "dsc":
 596             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 597
 598     return packages
 599
 600 ################################################################################
 601
 602 def send_mail (message, filename="", whitelists=None):
 603     """sendmail wrapper, takes _either_ a message string or a file as arguments
 604
 605     @type  whitelists: list of (str or None)
 606     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 607                        everything, otherwise an address is whitelisted if it is
 608                        included in any of the lists.
 609                        In addition a global whitelist can be specified in
 610                        Dinstall::MailWhiteList.
 611     """
 612
 613     maildir = Cnf.get('Dir::Mail')
 614     if maildir:
 615         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 616         path = find_next_free(path)
 617         with open(path, 'w') as fh:
 618             print >>fh, message,
 619
 620     # Check whether we're supposed to be sending mail
 621     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 622         return
 623
 624     # If we've been passed a string dump it into a temporary file
 625     if message:
 626         (fd, filename) = tempfile.mkstemp()
 627         os.write (fd, message)
 628         os.close (fd)
 629
 630     if whitelists is None or None in whitelists:
 631         whitelists = []
 632     if Cnf.get('Dinstall::MailWhiteList', ''):
 633         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 634     if len(whitelists) != 0:
 635         with open_file(filename) as message_in:
 636             message_raw = modemail.message_from_file(message_in)
 637
 638         whitelist = [];
 639         for path in whitelists:
 640           with open_file(path, 'r') as whitelist_in:
 641             for line in whitelist_in:
 642                 if not re_whitespace_comment.match(line):
 643                     if re_re_mark.match(line):
 644                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 645                     else:
 646                         whitelist.append(re.compile(re.escape(line.strip())))
 647
 648         # Fields to check.
 649         fields = ["To", "Bcc", "Cc"]
 650         for field in fields:
 651             # Check each field
 652             value = message_raw.get(field, None)
 653             if value != None:
 654                 match = [];
 655                 for item in value.split(","):
 656                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 657                     mail_whitelisted = 0
 658                     for wr in whitelist:
 659                         if wr.match(email):
 660                             mail_whitelisted = 1
 661                             break
 662                     if not mail_whitelisted:
 663                         print "Skipping {0} since it's not whitelisted".format(item)
 664                         continue
 665                     match.append(item)
 666
 667                 # Doesn't have any mail in whitelist so remove the header
 668                 if len(match) == 0:
 669                     del message_raw[field]
 670                 else:
 671                     message_raw.replace_header(field, ', '.join(match))
 672
 673         # Change message fields in order if we don't have a To header
 674         if not message_raw.has_key("To"):
 675             fields.reverse()
 676             for field in fields:
 677                 if message_raw.has_key(field):
 678                     message_raw[fields[-1]] = message_raw[field]
 679                     del message_raw[field]
 680                     break
 681             else:
 682                 # Clean up any temporary files
 683                 # and return, as we removed all recipients.
 684                 if message:
 685                     os.unlink (filename);
 686                 return;
 687
 688         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 689         os.write (fd, message_raw.as_string(True));
 690         os.close (fd);
 691
 692     # Invoke sendmail
 693     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 694     if (result != 0):
 695         raise SendmailFailedError(output)
 696
 697     # Clean up any temporary files
 698     if message:
 699         os.unlink (filename)
 700
 701 ################################################################################
 702
 703 def poolify (source, component=None):
 704     if source[:3] == "lib":
 705         return source[:4] + '/' + source + '/'
 706     else:
 707         return source[:1] + '/' + source + '/'
 708
 709 ################################################################################
 710
 711 def move (src, dest, overwrite = 0, perms = 0o664):
 712     if os.path.exists(dest) and os.path.isdir(dest):
 713         dest_dir = dest
 714     else:
 715         dest_dir = os.path.dirname(dest)
 716     if not os.path.lexists(dest_dir):
 717         umask = os.umask(00000)
 718         os.makedirs(dest_dir, 0o2775)
 719         os.umask(umask)
 720     #print "Moving %s to %s..." % (src, dest)
 721     if os.path.exists(dest) and os.path.isdir(dest):
 722         dest += '/' + os.path.basename(src)
 723     # Don't overwrite unless forced to
 724     if os.path.lexists(dest):
 725         if not overwrite:
 726             fubar("Can't move %s to %s - file already exists." % (src, dest))
 727         else:
 728             if not os.access(dest, os.W_OK):
 729                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 730     shutil.copy2(src, dest)
 731     os.chmod(dest, perms)
 732     os.unlink(src)
 733
 734 def copy (src, dest, overwrite = 0, perms = 0o664):
 735     if os.path.exists(dest) and os.path.isdir(dest):
 736         dest_dir = dest
 737     else:
 738         dest_dir = os.path.dirname(dest)
 739     if not os.path.exists(dest_dir):
 740         umask = os.umask(00000)
 741         os.makedirs(dest_dir, 0o2775)
 742         os.umask(umask)
 743     #print "Copying %s to %s..." % (src, dest)
 744     if os.path.exists(dest) and os.path.isdir(dest):
 745         dest += '/' + os.path.basename(src)
 746     # Don't overwrite unless forced to
 747     if os.path.lexists(dest):
 748         if not overwrite:
 749             raise FileExistsError
 750         else:
 751             if not os.access(dest, os.W_OK):
 752                 raise CantOverwriteError
 753     shutil.copy2(src, dest)
 754     os.chmod(dest, perms)
 755
 756 ################################################################################
 757
 758 def which_conf_file ():
 759     if os.getenv('DAK_CONFIG'):
 760         return os.getenv('DAK_CONFIG')
 761
 762     res = socket.getfqdn()
 763     # In case we allow local config files per user, try if one exists
 764     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 765         homedir = os.getenv("HOME")
 766         confpath = os.path.join(homedir, "/etc/dak.conf")
 767         if os.path.exists(confpath):
 768             apt_pkg.read_config_file_isc(Cnf,confpath)
 769
 770     # We are still in here, so there is no local config file or we do
 771     # not allow local files. Do the normal stuff.
 772     if Cnf.get("Config::" + res + "::DakConfig"):
 773         return Cnf["Config::" + res + "::DakConfig"]
 774
 775     return default_config
 776
 777 ################################################################################
 778
 779 def TemplateSubst(subst_map, filename):
 780     """ Perform a substition of template """
 781     with open_file(filename) as templatefile:
 782         template = templatefile.read()
 783     for k, v in subst_map.iteritems():
 784         template = template.replace(k, str(v))
 785     return template
 786
 787 ################################################################################
 788
 789 def fubar(msg, exit_code=1):
 790     sys.stderr.write("E: %s\n" % (msg))
 791     sys.exit(exit_code)
 792
 793 def warn(msg):
 794     sys.stderr.write("W: %s\n" % (msg))
 795
 796 ################################################################################
 797
 798 # Returns the user name with a laughable attempt at rfc822 conformancy
 799 # (read: removing stray periods).
 800 def whoami ():
 801     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 802
 803 def getusername ():
 804     return pwd.getpwuid(os.getuid())[0]
 805
 806 ################################################################################
 807
 808 def size_type (c):
 809     t  = " B"
 810     if c > 10240:
 811         c = c / 1024
 812         t = " KB"
 813     if c > 10240:
 814         c = c / 1024
 815         t = " MB"
 816     return ("%d%s" % (c, t))
 817
 818 ################################################################################
 819
 820 def cc_fix_changes (changes):
 821     o = changes.get("architecture", "")
 822     if o:
 823         del changes["architecture"]
 824     changes["architecture"] = {}
 825     for j in o.split():
 826         changes["architecture"][j] = 1
 827
 828 def changes_compare (a, b):
 829     """ Sort by source name, source version, 'have source', and then by filename """
 830     try:
 831         a_changes = parse_changes(a)
 832     except:
 833         return -1
 834
 835     try:
 836         b_changes = parse_changes(b)
 837     except:
 838         return 1
 839
 840     cc_fix_changes (a_changes)
 841     cc_fix_changes (b_changes)
 842
 843     # Sort by source name
 844     a_source = a_changes.get("source")
 845     b_source = b_changes.get("source")
 846     q = cmp (a_source, b_source)
 847     if q:
 848         return q
 849
 850     # Sort by source version
 851     a_version = a_changes.get("version", "0")
 852     b_version = b_changes.get("version", "0")
 853     q = apt_pkg.version_compare(a_version, b_version)
 854     if q:
 855         return q
 856
 857     # Sort by 'have source'
 858     a_has_source = a_changes["architecture"].get("source")
 859     b_has_source = b_changes["architecture"].get("source")
 860     if a_has_source and not b_has_source:
 861         return -1
 862     elif b_has_source and not a_has_source:
 863         return 1
 864
 865     # Fall back to sort by filename
 866     return cmp(a, b)
 867
 868 ################################################################################
 869
 870 def find_next_free (dest, too_many=100):
 871     extra = 0
 872     orig_dest = dest
 873     while os.path.lexists(dest) and extra < too_many:
 874         dest = orig_dest + '.' + repr(extra)
 875         extra += 1
 876     if extra >= too_many:
 877         raise NoFreeFilenameError
 878     return dest
 879
 880 ################################################################################
 881
 882 def result_join (original, sep = '\t'):
 883     resultlist = []
 884     for i in xrange(len(original)):
 885         if original[i] == None:
 886             resultlist.append("")
 887         else:
 888             resultlist.append(original[i])
 889     return sep.join(resultlist)
 890
 891 ################################################################################
 892
 893 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 894     out = ""
 895     for line in str.split('\n'):
 896         line = line.strip()
 897         if line or include_blank_lines:
 898             out += "%s%s\n" % (prefix, line)
 899     # Strip trailing new line
 900     if out:
 901         out = out[:-1]
 902     return out
 903
 904 ################################################################################
 905
 906 def validate_changes_file_arg(filename, require_changes=1):
 907     """
 908     'filename' is either a .changes or .dak file.  If 'filename' is a
 909     .dak file, it's changed to be the corresponding .changes file.  The
 910     function then checks if the .changes file a) exists and b) is
 911     readable and returns the .changes filename if so.  If there's a
 912     problem, the next action depends on the option 'require_changes'
 913     argument:
 914
 915       - If 'require_changes' == -1, errors are ignored and the .changes
 916         filename is returned.
 917       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 918       - If 'require_changes' == 1, a fatal error is raised.
 919
 920     """
 921     error = None
 922
 923     orig_filename = filename
 924     if filename.endswith(".dak"):
 925         filename = filename[:-4]+".changes"
 926
 927     if not filename.endswith(".changes"):
 928         error = "invalid file type; not a changes file"
 929     else:
 930         if not os.access(filename,os.R_OK):
 931             if os.path.exists(filename):
 932                 error = "permission denied"
 933             else:
 934                 error = "file not found"
 935
 936     if error:
 937         if require_changes == 1:
 938             fubar("%s: %s." % (orig_filename, error))
 939         elif require_changes == 0:
 940             warn("Skipping %s - %s" % (orig_filename, error))
 941             return None
 942         else: # We only care about the .dak file
 943             return filename
 944     else:
 945         return filename
 946
 947 ################################################################################
 948
 949 def real_arch(arch):
 950     return (arch != "source" and arch != "all")
 951
 952 ################################################################################
 953
 954 def join_with_commas_and(list):
 955     if len(list) == 0: return "nothing"
 956     if len(list) == 1: return list[0]
 957     return ", ".join(list[:-1]) + " and " + list[-1]
 958
 959 ################################################################################
 960
 961 def pp_deps (deps):
 962     pp_deps = []
 963     for atom in deps:
 964         (pkg, version, constraint) = atom
 965         if constraint:
 966             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 967         else:
 968             pp_dep = pkg
 969         pp_deps.append(pp_dep)
 970     return " |".join(pp_deps)
 971
 972 ################################################################################
 973
 974 def get_conf():
 975     return Cnf
 976
 977 ################################################################################
 978
 979 def parse_args(Options):
 980     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
 981     # XXX: This should go away and everything which calls it be converted
 982     #      to use SQLA properly.  For now, we'll just fix it not to use
 983     #      the old Pg interface though
 984     session = DBConn().session()
 985     # Process suite
 986     if Options["Suite"]:
 987         suite_ids_list = []
 988         for suitename in split_args(Options["Suite"]):
 989             suite = get_suite(suitename, session=session)
 990             if not suite or suite.suite_id is None:
 991                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
 992             else:
 993                 suite_ids_list.append(suite.suite_id)
 994         if suite_ids_list:
 995             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
 996         else:
 997             fubar("No valid suite given.")
 998     else:
 999         con_suites = ""
1000
1001     # Process component
1002     if Options["Component"]:
1003         component_ids_list = []
1004         for componentname in split_args(Options["Component"]):
1005             component = get_component(componentname, session=session)
1006             if component is None:
1007                 warn("component '%s' not recognised." % (componentname))
1008             else:
1009                 component_ids_list.append(component.component_id)
1010         if component_ids_list:
1011             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1012         else:
1013             fubar("No valid component given.")
1014     else:
1015         con_components = ""
1016
1017     # Process architecture
1018     con_architectures = ""
1019     check_source = 0
1020     if Options["Architecture"]:
1021         arch_ids_list = []
1022         for archname in split_args(Options["Architecture"]):
1023             if archname == "source":
1024                 check_source = 1
1025             else:
1026                 arch = get_architecture(archname, session=session)
1027                 if arch is None:
1028                     warn("architecture '%s' not recognised." % (archname))
1029                 else:
1030                     arch_ids_list.append(arch.arch_id)
1031         if arch_ids_list:
1032             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1033         else:
1034             if not check_source:
1035                 fubar("No valid architecture given.")
1036     else:
1037         check_source = 1
1038
1039     return (con_suites, con_architectures, con_components, check_source)
1040
1041 ################################################################################
1042
1043 def arch_compare_sw (a, b):
1044     """
1045     Function for use in sorting lists of architectures.
1046
1047     Sorts normally except that 'source' dominates all others.
1048     """
1049
1050     if a == "source" and b == "source":
1051         return 0
1052     elif a == "source":
1053         return -1
1054     elif b == "source":
1055         return 1
1056
1057     return cmp (a, b)
1058
1059 ################################################################################
1060
1061 def split_args (s, dwim=1):
1062     """
1063     Split command line arguments which can be separated by either commas
1064     or whitespace.  If dwim is set, it will complain about string ending
1065     in comma since this usually means someone did 'dak ls -a i386, m68k
1066     foo' or something and the inevitable confusion resulting from 'm68k'
1067     being treated as an argument is undesirable.
1068     """
1069
1070     if s.find(",") == -1:
1071         return s.split()
1072     else:
1073         if s[-1:] == "," and dwim:
1074             fubar("split_args: found trailing comma, spurious space maybe?")
1075         return s.split(",")
1076
1077 ################################################################################
1078
1079 def gpgv_get_status_output(cmd, status_read, status_write):
1080     """
1081     Our very own version of commands.getouputstatus(), hacked to support
1082     gpgv's status fd.
1083     """
1084
1085     cmd = ['/bin/sh', '-c', cmd]
1086     p2cread, p2cwrite = os.pipe()
1087     c2pread, c2pwrite = os.pipe()
1088     errout, errin = os.pipe()
1089     pid = os.fork()
1090     if pid == 0:
1091         # Child
1092         os.close(0)
1093         os.close(1)
1094         os.dup(p2cread)
1095         os.dup(c2pwrite)
1096         os.close(2)
1097         os.dup(errin)
1098         for i in range(3, 256):
1099             if i != status_write:
1100                 try:
1101                     os.close(i)
1102                 except:
1103                     pass
1104         try:
1105             os.execvp(cmd[0], cmd)
1106         finally:
1107             os._exit(1)
1108
1109     # Parent
1110     os.close(p2cread)
1111     os.dup2(c2pread, c2pwrite)
1112     os.dup2(errout, errin)
1113
1114     output = status = ""
1115     while 1:
1116         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1117         more_data = []
1118         for fd in i:
1119             r = os.read(fd, 8196)
1120             if len(r) > 0:
1121                 more_data.append(fd)
1122                 if fd == c2pwrite or fd == errin:
1123                     output += r
1124                 elif fd == status_read:
1125                     status += r
1126                 else:
1127                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1128         if not more_data:
1129             pid, exit_status = os.waitpid(pid, 0)
1130             try:
1131                 os.close(status_write)
1132                 os.close(status_read)
1133                 os.close(c2pread)
1134                 os.close(c2pwrite)
1135                 os.close(p2cwrite)
1136                 os.close(errin)
1137                 os.close(errout)
1138             except:
1139                 pass
1140             break
1141
1142     return output, status, exit_status
1143
1144 ################################################################################
1145
1146 def process_gpgv_output(status):
1147     # Process the status-fd output
1148     keywords = {}
1149     internal_error = ""
1150     for line in status.split('\n'):
1151         line = line.strip()
1152         if line == "":
1153             continue
1154         split = line.split()
1155         if len(split) < 2:
1156             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1157             continue
1158         (gnupg, keyword) = split[:2]
1159         if gnupg != "[GNUPG:]":
1160             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1161             continue
1162         args = split[2:]
1163         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1164             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1165             continue
1166         else:
1167             keywords[keyword] = args
1168
1169     return (keywords, internal_error)
1170
1171 ################################################################################
1172
1173 def retrieve_key (filename, keyserver=None, keyring=None):
1174     """
1175     Retrieve the key that signed 'filename' from 'keyserver' and
1176     add it to 'keyring'.  Returns nothing on success, or an error message
1177     on error.
1178     """
1179
1180     # Defaults for keyserver and keyring
1181     if not keyserver:
1182         keyserver = Cnf["Dinstall::KeyServer"]
1183     if not keyring:
1184         keyring = get_primary_keyring_path()
1185
1186     # Ensure the filename contains no shell meta-characters or other badness
1187     if not re_taint_free.match(filename):
1188         return "%s: tainted filename" % (filename)
1189
1190     # Invoke gpgv on the file
1191     status_read, status_write = os.pipe()
1192     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1193     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1194
1195     # Process the status-fd output
1196     (keywords, internal_error) = process_gpgv_output(status)
1197     if internal_error:
1198         return internal_error
1199
1200     if not keywords.has_key("NO_PUBKEY"):
1201         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1202
1203     fingerprint = keywords["NO_PUBKEY"][0]
1204     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1205     # it'll try to create a lockfile in /dev.  A better solution might
1206     # be a tempfile or something.
1207     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1208           % (Cnf["Dinstall::SigningKeyring"])
1209     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1210            % (keyring, keyserver, fingerprint)
1211     (result, output) = commands.getstatusoutput(cmd)
1212     if (result != 0):
1213         return "'%s' failed with exit code %s" % (cmd, result)
1214
1215     return ""
1216
1217 ################################################################################
1218
1219 def gpg_keyring_args(keyrings=None):
1220     if not keyrings:
1221         keyrings = get_active_keyring_paths()
1222
1223     return " ".join(["--keyring %s" % x for x in keyrings])
1224
1225 ################################################################################
1226 @session_wrapper
1227 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1228     """
1229     Check the signature of a file and return the fingerprint if the
1230     signature is valid or 'None' if it's not.  The first argument is the
1231     filename whose signature should be checked.  The second argument is a
1232     reject function and is called when an error is found.  The reject()
1233     function must allow for two arguments: the first is the error message,
1234     the second is an optional prefix string.  It's possible for reject()
1235     to be called more than once during an invocation of check_signature().
1236     The third argument is optional and is the name of the files the
1237     detached signature applies to.  The fourth argument is optional and is
1238     a *list* of keyrings to use.  'autofetch' can either be None, True or
1239     False.  If None, the default behaviour specified in the config will be
1240     used.
1241     """
1242
1243     rejects = []
1244
1245     # Ensure the filename contains no shell meta-characters or other badness
1246     if not re_taint_free.match(sig_filename):
1247         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1248         return (None, rejects)
1249
1250     if data_filename and not re_taint_free.match(data_filename):
1251         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1252         return (None, rejects)
1253
1254     if not keyrings:
1255         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1256
1257     # Autofetch the signing key if that's enabled
1258     if autofetch == None:
1259         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1260     if autofetch:
1261         error_msg = retrieve_key(sig_filename)
1262         if error_msg:
1263             rejects.append(error_msg)
1264             return (None, rejects)
1265
1266     # Build the command line
1267     status_read, status_write = os.pipe()
1268     cmd = "gpgv --status-fd %s %s %s %s" % (
1269         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1270
1271     # Invoke gpgv on the file
1272     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1273
1274     # Process the status-fd output
1275     (keywords, internal_error) = process_gpgv_output(status)
1276
1277     # If we failed to parse the status-fd output, let's just whine and bail now
1278     if internal_error:
1279         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1280         rejects.append(internal_error, "")
1281         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1282         return (None, rejects)
1283
1284     # Now check for obviously bad things in the processed output
1285     if keywords.has_key("KEYREVOKED"):
1286         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1287     if keywords.has_key("BADSIG"):
1288         rejects.append("bad signature on %s." % (sig_filename))
1289     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1290         rejects.append("failed to check signature on %s." % (sig_filename))
1291     if keywords.has_key("NO_PUBKEY"):
1292         args = keywords["NO_PUBKEY"]
1293         if len(args) >= 1:
1294             key = args[0]
1295         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1296     if keywords.has_key("BADARMOR"):
1297         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1298     if keywords.has_key("NODATA"):
1299         rejects.append("no signature found in %s." % (sig_filename))
1300     if keywords.has_key("EXPKEYSIG"):
1301         args = keywords["EXPKEYSIG"]
1302         if len(args) >= 1:
1303             key = args[0]
1304         rejects.append("Signature made by expired key 0x%s" % (key))
1305     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1306         args = keywords["KEYEXPIRED"]
1307         expiredate=""
1308         if len(args) >= 1:
1309             timestamp = args[0]
1310             if timestamp.count("T") == 0:
1311                 try:
1312                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1313                 except ValueError:
1314                     expiredate = "unknown (%s)" % (timestamp)
1315             else:
1316                 expiredate = timestamp
1317         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1318
1319     if len(rejects) > 0:
1320         return (None, rejects)
1321
1322     # Next check gpgv exited with a zero return code
1323     if exit_status:
1324         rejects.append("gpgv failed while checking %s." % (sig_filename))
1325         if status.strip():
1326             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1327         else:
1328             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1329         return (None, rejects)
1330
1331     # Sanity check the good stuff we expect
1332     if not keywords.has_key("VALIDSIG"):
1333         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1334     else:
1335         args = keywords["VALIDSIG"]
1336         if len(args) < 1:
1337             rejects.append("internal error while checking signature on %s." % (sig_filename))
1338         else:
1339             fingerprint = args[0]
1340     if not keywords.has_key("GOODSIG"):
1341         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1342     if not keywords.has_key("SIG_ID"):
1343         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1344
1345     # Finally ensure there's not something we don't recognise
1346     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1347                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1348                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1349
1350     for keyword in keywords.keys():
1351         if not known_keywords.has_key(keyword):
1352             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1353
1354     if len(rejects) > 0:
1355         return (None, rejects)
1356     else:
1357         return (fingerprint, [])
1358
1359 ################################################################################
1360
1361 def gpg_get_key_addresses(fingerprint):
1362     """retreive email addresses from gpg key uids for a given fingerprint"""
1363     addresses = key_uid_email_cache.get(fingerprint)
1364     if addresses != None:
1365         return addresses
1366     addresses = list()
1367     try:
1368         with open(os.devnull, "wb") as devnull:
1369             output = daklib.daksubprocess.check_output(
1370                 ["gpg", "--no-default-keyring"] + gpg_keyring_args().split() +
1371                 ["--with-colons", "--list-keys", fingerprint], stderr=devnull)
1372     except subprocess.CalledProcessError:
1373         pass
1374     else:
1375         for l in output.split('\n'):
1376             parts = l.split(':')
1377             if parts[0] not in ("uid", "pub"):
1378                 continue
1379             try:
1380                 uid = parts[9]
1381             except IndexError:
1382                 continue
1383             try:
1384                 # Do not use unicode_escape, because it is locale-specific
1385                 uid = codecs.decode(uid, "string_escape").decode("utf-8")
1386             except UnicodeDecodeError:
1387                 uid = uid.decode("latin1") # does not fail
1388             m = re_parse_maintainer.match(uid)
1389             if not m:
1390                 continue
1391             address = m.group(2)
1392             address = address.encode("utf8") # dak still uses bytes
1393             if address.endswith('@debian.org'):
1394                 # prefer @debian.org addresses
1395                 # TODO: maybe not hardcode the domain
1396                 addresses.insert(0, address)
1397             else:
1398                 addresses.append(address)
1399     key_uid_email_cache[fingerprint] = addresses
1400     return addresses
1401
1402 ################################################################################
1403
1404 def get_logins_from_ldap(fingerprint='*'):
1405     """retrieve login from LDAP linked to a given fingerprint"""
1406
1407     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1408     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1409     l = ldap.open(LDAPServer)
1410     l.simple_bind_s('','')
1411     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1412                        '(keyfingerprint=%s)' % fingerprint,
1413                        ['uid', 'keyfingerprint'])
1414     login = {}
1415     for elem in Attrs:
1416         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1417     return login
1418
1419 ################################################################################
1420
1421 def get_users_from_ldap():
1422     """retrieve login and user names from LDAP"""
1423
1424     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1425     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1426     l = ldap.open(LDAPServer)
1427     l.simple_bind_s('','')
1428     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1429                        '(uid=*)', ['uid', 'cn', 'mn', 'sn'])
1430     users = {}
1431     for elem in Attrs:
1432         elem = elem[1]
1433         name = []
1434         for k in ('cn', 'mn', 'sn'):
1435             try:
1436                 if elem[k][0] != '-':
1437                     name.append(elem[k][0])
1438             except KeyError:
1439                 pass
1440         users[' '.join(name)] = elem['uid'][0]
1441     return users
1442
1443 ################################################################################
1444
1445 def clean_symlink (src, dest, root):
1446     """
1447     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1448     Returns fixed 'src'
1449     """
1450     src = src.replace(root, '', 1)
1451     dest = dest.replace(root, '', 1)
1452     dest = os.path.dirname(dest)
1453     new_src = '../' * len(dest.split('/'))
1454     return new_src + src
1455
1456 ################################################################################
1457
1458 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1459     """
1460     Return a secure and unique filename by pre-creating it.
1461
1462     @type directory: str
1463     @param directory: If non-null it will be the directory the file is pre-created in.
1464
1465     @type prefix: str
1466     @param prefix: The filename will be prefixed with this string
1467
1468     @type suffix: str
1469     @param suffix: The filename will end with this string
1470
1471     @type mode: str
1472     @param mode: If set the file will get chmodded to those permissions
1473
1474     @type group: str
1475     @param group: If set the file will get chgrped to the specified group.
1476
1477     @rtype: list
1478     @return: Returns a pair (fd, name)
1479     """
1480
1481     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1482     if mode:
1483         os.chmod(tfname, mode)
1484     if group:
1485         gid = grp.getgrnam(group).gr_gid
1486         os.chown(tfname, -1, gid)
1487     return (tfd, tfname)
1488
1489 ################################################################################
1490
1491 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1492     """
1493     Return a secure and unique directory by pre-creating it.
1494
1495     @type parent: str
1496     @param parent: If non-null it will be the directory the directory is pre-created in.
1497
1498     @type prefix: str
1499     @param prefix: The filename will be prefixed with this string
1500
1501     @type suffix: str
1502     @param suffix: The filename will end with this string
1503
1504     @type mode: str
1505     @param mode: If set the file will get chmodded to those permissions
1506
1507     @type group: str
1508     @param group: If set the file will get chgrped to the specified group.
1509
1510     @rtype: list
1511     @return: Returns a pair (fd, name)
1512
1513     """
1514
1515     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1516     if mode:
1517         os.chmod(tfname, mode)
1518     if group:
1519         gid = grp.getgrnam(group).gr_gid
1520         os.chown(tfname, -1, gid)
1521     return tfname
1522
1523 ################################################################################
1524
1525 def is_email_alias(email):
1526     """ checks if the user part of the email is listed in the alias file """
1527     global alias_cache
1528     if alias_cache == None:
1529         aliasfn = which_alias_file()
1530         alias_cache = set()
1531         if aliasfn:
1532             for l in open(aliasfn):
1533                 alias_cache.add(l.split(':')[0])
1534     uid = email.split('@')[0]
1535     return uid in alias_cache
1536
1537 ################################################################################
1538
1539 def get_changes_files(from_dir):
1540     """
1541     Takes a directory and lists all .changes files in it (as well as chdir'ing
1542     to the directory; this is due to broken behaviour on the part of p-u/p-a
1543     when you're not in the right place)
1544
1545     Returns a list of filenames
1546     """
1547     try:
1548         # Much of the rest of p-u/p-a depends on being in the right place
1549         os.chdir(from_dir)
1550         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1551     except OSError as e:
1552         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1553
1554     return changes_files
1555
1556 ################################################################################
1557
1558 Cnf = config.Config().Cnf
1559
1560 ################################################################################
1561
1562 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1563     """
1564     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1565     Well, actually it parsed a local copy, but let's document the source
1566     somewhere ;)
1567
1568     returns a dict associating source package name with a list of open wnpp
1569     bugs (Yes, there might be more than one)
1570     """
1571
1572     line = []
1573     try:
1574         f = open(file)
1575         lines = f.readlines()
1576     except IOError as e:
1577         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1578         lines = []
1579     wnpp = {}
1580
1581     for line in lines:
1582         splited_line = line.split(": ", 1)
1583         if len(splited_line) > 1:
1584             wnpp[splited_line[0]] = splited_line[1].split("|")
1585
1586     for source in wnpp.keys():
1587         bugs = []
1588         for wnpp_bug in wnpp[source]:
1589             bug_no = re.search("(\d)+", wnpp_bug).group()
1590             if bug_no:
1591                 bugs.append(bug_no)
1592         wnpp[source] = bugs
1593     return wnpp
1594
1595 ################################################################################
1596
1597 def get_packages_from_ftp(root, suite, component, architecture):
1598     """
1599     Returns an object containing apt_pkg-parseable data collected by
1600     aggregating Packages.gz files gathered for each architecture.
1601
1602     @type root: string
1603     @param root: path to ftp archive root directory
1604
1605     @type suite: string
1606     @param suite: suite to extract files from
1607
1608     @type component: string
1609     @param component: component to extract files from
1610
1611     @type architecture: string
1612     @param architecture: architecture to extract files from
1613
1614     @rtype: TagFile
1615     @return: apt_pkg class containing package data
1616     """
1617     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1618     (fd, temp_file) = temp_filename()
1619     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1620     if (result != 0):
1621         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1622     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1623     if os.path.exists(filename):
1624         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1625         if (result != 0):
1626             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1627     packages = open_file(temp_file)
1628     Packages = apt_pkg.TagFile(packages)
1629     os.unlink(temp_file)
1630     return Packages
1631
1632 ################################################################################
1633
1634 def deb_extract_control(fh):
1635     """extract DEBIAN/control from a binary package"""
1636     return apt_inst.DebFile(fh).control.extractdata("control")
1637
1638 ################################################################################
1639
1640 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1641     """mail addresses to contact for an upload
1642
1643     @type  maintainer: str
1644     @param maintainer: Maintainer field of the .changes file
1645
1646     @type  changed_by: str
1647     @param changed_by: Changed-By field of the .changes file
1648
1649     @type  fingerprint: str
1650     @param fingerprint: fingerprint of the key used to sign the upload
1651
1652     @rtype:  list of str
1653     @return: list of RFC 2047-encoded mail addresses to contact regarding
1654              this upload
1655     """
1656     addresses = [maintainer]
1657     if changed_by != maintainer:
1658         addresses.append(changed_by)
1659
1660     fpr_addresses = gpg_get_key_addresses(fingerprint)
1661     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1662         addresses.append(fpr_addresses[0])
1663
1664     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1665     return encoded_addresses
1666
1667 ################################################################################
1668
1669 def call_editor(text="", suffix=".txt"):
1670     """run editor and return the result as a string
1671
1672     @type  text: str
1673     @param text: initial text
1674
1675     @type  suffix: str
1676     @param suffix: extension for temporary file
1677
1678     @rtype:  str
1679     @return: string with the edited text
1680     """
1681     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1682     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1683     try:
1684         print >>tmp, text,
1685         tmp.close()
1686         daklib.daksubprocess.check_call([editor, tmp.name])
1687         return open(tmp.name, 'r').read()
1688     finally:
1689         os.unlink(tmp.name)
1690
1691 ################################################################################
1692
1693 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1694     dbsuite = get_suite(suite, session)
1695     overridesuite = dbsuite
1696     if dbsuite.overridesuite is not None:
1697         overridesuite = get_suite(dbsuite.overridesuite, session)
1698     dep_problem = 0
1699     p2c = {}
1700     all_broken = {}
1701     if arches:
1702         all_arches = set(arches)
1703     else:
1704         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1705     all_arches -= set(["source", "all"])
1706     metakey_d = get_or_set_metadatakey("Depends", session)
1707     metakey_p = get_or_set_metadatakey("Provides", session)
1708     params = {
1709         'suite_id':     dbsuite.suite_id,
1710         'metakey_d_id': metakey_d.key_id,
1711         'metakey_p_id': metakey_p.key_id,
1712     }
1713     for architecture in all_arches | set(['all']):
1714         deps = {}
1715         sources = {}
1716         virtual_packages = {}
1717         params['arch_id'] = get_architecture(architecture, session).arch_id
1718
1719         statement = '''
1720             SELECT b.id, b.package, s.source, c.name as component,
1721                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1722                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1723                 FROM binaries b
1724                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1725                 JOIN source s ON b.source = s.id
1726                 JOIN files_archive_map af ON b.file = af.file_id
1727                 JOIN component c ON af.component_id = c.id
1728                 WHERE b.architecture = :arch_id'''
1729         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1730             from_statement(statement).params(params)
1731         for binary_id, package, source, component, depends, provides in query:
1732             sources[package] = source
1733             p2c[package] = component
1734             if depends is not None:
1735                 deps[package] = depends
1736             # Maintain a counter for each virtual package.  If a
1737             # Provides: exists, set the counter to 0 and count all
1738             # provides by a package not in the list for removal.
1739             # If the counter stays 0 at the end, we know that only
1740             # the to-be-removed packages provided this virtual
1741             # package.
1742             if provides is not None:
1743                 for virtual_pkg in provides.split(","):
1744                     virtual_pkg = virtual_pkg.strip()
1745                     if virtual_pkg == package: continue
1746                     if not virtual_packages.has_key(virtual_pkg):
1747                         virtual_packages[virtual_pkg] = 0
1748                     if package not in removals:
1749                         virtual_packages[virtual_pkg] += 1
1750
1751         # If a virtual package is only provided by the to-be-removed
1752         # packages, treat the virtual package as to-be-removed too.
1753         for virtual_pkg in virtual_packages.keys():
1754             if virtual_packages[virtual_pkg] == 0:
1755                 removals.append(virtual_pkg)
1756
1757         # Check binary dependencies (Depends)
1758         for package in deps.keys():
1759             if package in removals: continue
1760             parsed_dep = []
1761             try:
1762                 parsed_dep += apt_pkg.parse_depends(deps[package])
1763             except ValueError as e:
1764                 print "Error for package %s: %s" % (package, e)
1765             for dep in parsed_dep:
1766                 # Check for partial breakage.  If a package has a ORed
1767                 # dependency, there is only a dependency problem if all
1768                 # packages in the ORed depends will be removed.
1769                 unsat = 0
1770                 for dep_package, _, _ in dep:
1771                     if dep_package in removals:
1772                         unsat += 1
1773                 if unsat == len(dep):
1774                     component = p2c[package]
1775                     source = sources[package]
1776                     if component != "main":
1777                         source = "%s/%s" % (source, component)
1778                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1779                     dep_problem = 1
1780
1781     if all_broken:
1782         if cruft:
1783             print "  - broken Depends:"
1784         else:
1785             print "# Broken Depends:"
1786         for source, bindict in sorted(all_broken.items()):
1787             lines = []
1788             for binary, arches in sorted(bindict.items()):
1789                 if arches == all_arches or 'all' in arches:
1790                     lines.append(binary)
1791                 else:
1792                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1793             if cruft:
1794                 print '    %s: %s' % (source, lines[0])
1795             else:
1796                 print '%s: %s' % (source, lines[0])
1797             for line in lines[1:]:
1798                 if cruft:
1799                     print '    ' + ' ' * (len(source) + 2) + line
1800                 else:
1801                     print ' ' * (len(source) + 2) + line
1802         if not cruft:
1803             print
1804
1805     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1806     all_broken.clear()
1807     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1808     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1809     params = {
1810         'suite_id':    dbsuite.suite_id,
1811         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1812     }
1813     statement = '''
1814         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1815            FROM source s
1816            JOIN source_metadata sm ON s.id = sm.src_id
1817            WHERE s.id in
1818                (SELECT source FROM src_associations
1819                    WHERE suite = :suite_id)
1820                AND sm.key_id in :metakey_ids
1821            GROUP BY s.id, s.source'''
1822     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1823         params(params)
1824     for source_id, source, build_dep in query:
1825         if source in removals: continue
1826         parsed_dep = []
1827         if build_dep is not None:
1828             # Remove [arch] information since we want to see breakage on all arches
1829             build_dep = re_build_dep_arch.sub("", build_dep)
1830             try:
1831                 parsed_dep += apt_pkg.parse_depends(build_dep)
1832             except ValueError as e:
1833                 print "Error for source %s: %s" % (source, e)
1834         for dep in parsed_dep:
1835             unsat = 0
1836             for dep_package, _, _ in dep:
1837                 if dep_package in removals:
1838                     unsat += 1
1839             if unsat == len(dep):
1840                 component, = session.query(Component.component_name) \
1841                     .join(Component.overrides) \
1842                     .filter(Override.suite == overridesuite) \
1843                     .filter(Override.package == re.sub('/(contrib|non-free)$', '', source)) \
1844                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1845                     .first()
1846                 key = source
1847                 if component != "main":
1848                     key = "%s/%s" % (source, component)
1849                 all_broken.setdefault(key, set()).add(pp_deps(dep))
1850                 dep_problem = 1
1851
1852     if all_broken:
1853         if cruft:
1854             print "  - broken Build-Depends:"
1855         else:
1856             print "# Broken Build-Depends:"
1857         for source, bdeps in sorted(all_broken.items()):
1858             bdeps = sorted(bdeps)
1859             if cruft:
1860                 print '    %s: %s' % (source, bdeps[0])
1861             else:
1862                 print '%s: %s' % (source, bdeps[0])
1863             for bdep in bdeps[1:]:
1864                 if cruft:
1865                     print '    ' + ' ' * (len(source) + 2) + bdep
1866                 else:
1867                     print ' ' * (len(source) + 2) + bdep
1868         if not cruft:
1869             print
1870
1871     return dep_problem