daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import grp
  31 import select
  32 import socket
  33 import shutil
  34 import sys
  35 import tempfile
  36 import traceback
  37 import stat
  38 import apt_inst
  39 import apt_pkg
  40 import time
  41 import re
  42 import email as modemail
  43 import subprocess
  44 import ldap
  45
  46 import daklib.config as config
  47 import daklib.daksubprocess
  48 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  49                    get_override_type, Keyring, session_wrapper, \
  50                    get_active_keyring_paths, get_primary_keyring_path, \
  51                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  52                    Component, Override, OverrideType
  53 from sqlalchemy import desc
  54 from dak_exceptions import *
  55 from gpg import SignedFile
  56 from textutils import fix_maintainer
  57 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  58                     re_multi_line_field, re_srchasver, re_taint_free, \
  59                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  60                     re_is_orig_source, re_build_dep_arch
  61
  62 from formats import parse_format, validate_changes_format
  63 from srcformats import get_format_from_string
  64 from collections import defaultdict
  65
  66 ################################################################################
  67
  68 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  69
  70 alias_cache = None        #: Cache for email alias checks
  71 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  72
  73 # (hashname, function, earliest_changes_version)
  74 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  75                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  76
  77 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  78 # code in lenny's Python. This also affects commands.getoutput and
  79 # commands.getstatus.
  80 def dak_getstatusoutput(cmd):
  81     pipe = daklib.daksubprocess.Popen(cmd, shell=True, universal_newlines=True,
  82         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  83
  84     output = pipe.stdout.read()
  85
  86     pipe.wait()
  87
  88     if output[-1:] == '\n':
  89         output = output[:-1]
  90
  91     ret = pipe.wait()
  92     if ret is None:
  93         ret = 0
  94
  95     return ret, output
  96 commands.getstatusoutput = dak_getstatusoutput
  97
  98 ################################################################################
  99
 100 def html_escape(s):
 101     """ Escape html chars """
 102     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 103
 104 ################################################################################
 105
 106 def open_file(filename, mode='r'):
 107     """
 108     Open C{file}, return fileobject.
 109
 110     @type filename: string
 111     @param filename: path/filename to open
 112
 113     @type mode: string
 114     @param mode: open mode
 115
 116     @rtype: fileobject
 117     @return: open fileobject
 118
 119     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 120
 121     """
 122     try:
 123         f = open(filename, mode)
 124     except IOError:
 125         raise CantOpenError(filename)
 126     return f
 127
 128 ################################################################################
 129
 130 def our_raw_input(prompt=""):
 131     if prompt:
 132         while 1:
 133             try:
 134                 sys.stdout.write(prompt)
 135                 break
 136             except IOError:
 137                 pass
 138     sys.stdout.flush()
 139     try:
 140         ret = raw_input()
 141         return ret
 142     except EOFError:
 143         sys.stderr.write("\nUser interrupt (^D).\n")
 144         raise SystemExit
 145
 146 ################################################################################
 147
 148 def extract_component_from_section(section, session=None):
 149     component = ""
 150
 151     if section.find('/') != -1:
 152         component = section.split('/')[0]
 153
 154     # Expand default component
 155     if component == "":
 156         comp = get_component(section, session)
 157         if comp is None:
 158             component = "main"
 159         else:
 160             component = comp.component_name
 161
 162     return (section, component)
 163
 164 ################################################################################
 165
 166 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 167     require_signature = True
 168     if keyrings == None:
 169         keyrings = []
 170         require_signature = False
 171
 172     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 173     contents = signed_file.contents
 174
 175     error = ""
 176     changes = {}
 177
 178     # Split the lines in the input, keeping the linebreaks.
 179     lines = contents.splitlines(True)
 180
 181     if len(lines) == 0:
 182         raise ParseChangesError("[Empty changes file]")
 183
 184     # Reindex by line number so we can easily verify the format of
 185     # .dsc files...
 186     index = 0
 187     indexed_lines = {}
 188     for line in lines:
 189         index += 1
 190         indexed_lines[index] = line[:-1]
 191
 192     num_of_lines = len(indexed_lines.keys())
 193     index = 0
 194     first = -1
 195     while index < num_of_lines:
 196         index += 1
 197         line = indexed_lines[index]
 198         if line == "" and signing_rules == 1:
 199             if index != num_of_lines:
 200                 raise InvalidDscError(index)
 201             break
 202         slf = re_single_line_field.match(line)
 203         if slf:
 204             field = slf.groups()[0].lower()
 205             changes[field] = slf.groups()[1]
 206             first = 1
 207             continue
 208         if line == " .":
 209             changes[field] += '\n'
 210             continue
 211         mlf = re_multi_line_field.match(line)
 212         if mlf:
 213             if first == -1:
 214                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 215             if first == 1 and changes[field] != "":
 216                 changes[field] += '\n'
 217             first = 0
 218             changes[field] += mlf.groups()[0] + '\n'
 219             continue
 220         error += line
 221
 222     changes["filecontents"] = armored_contents
 223
 224     if changes.has_key("source"):
 225         # Strip the source version in brackets from the source field,
 226         # put it in the "source-version" field instead.
 227         srcver = re_srchasver.search(changes["source"])
 228         if srcver:
 229             changes["source"] = srcver.group(1)
 230             changes["source-version"] = srcver.group(2)
 231
 232     if error:
 233         raise ParseChangesError(error)
 234
 235     return changes
 236
 237 ################################################################################
 238
 239 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 240     """
 241     Parses a changes file and returns a dictionary where each field is a
 242     key.  The mandatory first argument is the filename of the .changes
 243     file.
 244
 245     signing_rules is an optional argument:
 246
 247       - If signing_rules == -1, no signature is required.
 248       - If signing_rules == 0 (the default), a signature is required.
 249       - If signing_rules == 1, it turns on the same strict format checking
 250         as dpkg-source.
 251
 252     The rules for (signing_rules == 1)-mode are:
 253
 254       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 255         followed by any PGP header data and must end with a blank line.
 256
 257       - The data section must end with a blank line and must be followed by
 258         "-----BEGIN PGP SIGNATURE-----".
 259     """
 260
 261     changes_in = open_file(filename)
 262     content = changes_in.read()
 263     changes_in.close()
 264     try:
 265         unicode(content, 'utf-8')
 266     except UnicodeError:
 267         raise ChangesUnicodeError("Changes file not proper utf-8")
 268     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 269
 270
 271     if not dsc_file:
 272         # Finally ensure that everything needed for .changes is there
 273         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 274                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 275
 276         missingfields=[]
 277         for keyword in must_keywords:
 278             if not changes.has_key(keyword.lower()):
 279                 missingfields.append(keyword)
 280
 281                 if len(missingfields):
 282                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 283
 284     return changes
 285
 286 ################################################################################
 287
 288 def hash_key(hashname):
 289     return '%ssum' % hashname
 290
 291 ################################################################################
 292
 293 def create_hash(where, files, hashname, hashfunc):
 294     """
 295     create_hash extends the passed files dict with the given hash by
 296     iterating over all files on disk and passing them to the hashing
 297     function given.
 298     """
 299
 300     rejmsg = []
 301     for f in files.keys():
 302         try:
 303             file_handle = open_file(f)
 304         except CantOpenError:
 305             rejmsg.append("Could not open file %s for checksumming" % (f))
 306             continue
 307
 308         files[f][hash_key(hashname)] = hashfunc(file_handle)
 309
 310         file_handle.close()
 311     return rejmsg
 312
 313 ################################################################################
 314
 315 def check_hash(where, files, hashname, hashfunc):
 316     """
 317     check_hash checks the given hash in the files dict against the actual
 318     files on disk.  The hash values need to be present consistently in
 319     all file entries.  It does not modify its input in any way.
 320     """
 321
 322     rejmsg = []
 323     for f in files.keys():
 324         file_handle = None
 325         try:
 326             try:
 327                 file_handle = open_file(f)
 328
 329                 # Check for the hash entry, to not trigger a KeyError.
 330                 if not files[f].has_key(hash_key(hashname)):
 331                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 332                         where))
 333                     continue
 334
 335                 # Actually check the hash for correctness.
 336                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 337                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 338                         where))
 339             except CantOpenError:
 340                 # TODO: This happens when the file is in the pool.
 341                 # warn("Cannot open file %s" % f)
 342                 continue
 343         finally:
 344             if file_handle:
 345                 file_handle.close()
 346     return rejmsg
 347
 348 ################################################################################
 349
 350 def check_size(where, files):
 351     """
 352     check_size checks the file sizes in the passed files dict against the
 353     files on disk.
 354     """
 355
 356     rejmsg = []
 357     for f in files.keys():
 358         try:
 359             entry = os.stat(f)
 360         except OSError as exc:
 361             if exc.errno == 2:
 362                 # TODO: This happens when the file is in the pool.
 363                 continue
 364             raise
 365
 366         actual_size = entry[stat.ST_SIZE]
 367         size = int(files[f]["size"])
 368         if size != actual_size:
 369             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 370                    % (f, actual_size, size, where))
 371     return rejmsg
 372
 373 ################################################################################
 374
 375 def check_dsc_files(dsc_filename, dsc, dsc_files):
 376     """
 377     Verify that the files listed in the Files field of the .dsc are
 378     those expected given the announced Format.
 379
 380     @type dsc_filename: string
 381     @param dsc_filename: path of .dsc file
 382
 383     @type dsc: dict
 384     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 385
 386     @type dsc_files: dict
 387     @param dsc_files: the file list returned by C{build_file_list()}
 388
 389     @rtype: list
 390     @return: all errors detected
 391     """
 392     rejmsg = []
 393
 394     # Ensure .dsc lists proper set of source files according to the format
 395     # announced
 396     has = defaultdict(lambda: 0)
 397
 398     ftype_lookup = (
 399         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 400         (r'diff.gz',                   ('debian_diff',)),
 401         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 402         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 403         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 404         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 405         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 406     )
 407
 408     for f in dsc_files:
 409         m = re_issource.match(f)
 410         if not m:
 411             rejmsg.append("%s: %s in Files field not recognised as source."
 412                           % (dsc_filename, f))
 413             continue
 414
 415         # Populate 'has' dictionary by resolving keys in lookup table
 416         matched = False
 417         for regex, keys in ftype_lookup:
 418             if re.match(regex, m.group(3)):
 419                 matched = True
 420                 for key in keys:
 421                     has[key] += 1
 422                 break
 423
 424         # File does not match anything in lookup table; reject
 425         if not matched:
 426             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 427
 428     # Check for multiple files
 429     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 430         if has[file_type] > 1:
 431             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 432
 433     # Source format specific tests
 434     try:
 435         format = get_format_from_string(dsc['format'])
 436         rejmsg.extend([
 437             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 438         ])
 439
 440     except UnknownFormatError:
 441         # Not an error here for now
 442         pass
 443
 444     return rejmsg
 445
 446 ################################################################################
 447
 448 def check_hash_fields(what, manifest):
 449     """
 450     check_hash_fields ensures that there are no checksum fields in the
 451     given dict that we do not know about.
 452     """
 453
 454     rejmsg = []
 455     hashes = map(lambda x: x[0], known_hashes)
 456     for field in manifest:
 457         if field.startswith("checksums-"):
 458             hashname = field.split("-",1)[1]
 459             if hashname not in hashes:
 460                 rejmsg.append("Unsupported checksum field for %s "\
 461                     "in %s" % (hashname, what))
 462     return rejmsg
 463
 464 ################################################################################
 465
 466 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 467     if format >= version:
 468         # The version should contain the specified hash.
 469         func = check_hash
 470
 471         # Import hashes from the changes
 472         rejmsg = parse_checksums(".changes", files, changes, hashname)
 473         if len(rejmsg) > 0:
 474             return rejmsg
 475     else:
 476         # We need to calculate the hash because it can't possibly
 477         # be in the file.
 478         func = create_hash
 479     return func(".changes", files, hashname, hashfunc)
 480
 481 # We could add the orig which might be in the pool to the files dict to
 482 # access the checksums easily.
 483
 484 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 485     """
 486     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 487     in the dsc is correct, i.e. identical to the changes file and if necessary
 488     the pool.  The latter task is delegated to check_hash.
 489     """
 490
 491     rejmsg = []
 492     if not dsc.has_key('Checksums-%s' % (hashname,)):
 493         return rejmsg
 494     # Import hashes from the dsc
 495     parse_checksums(".dsc", dsc_files, dsc, hashname)
 496     # And check it...
 497     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 498     return rejmsg
 499
 500 ################################################################################
 501
 502 def parse_checksums(where, files, manifest, hashname):
 503     rejmsg = []
 504     field = 'checksums-%s' % hashname
 505     if not field in manifest:
 506         return rejmsg
 507     for line in manifest[field].split('\n'):
 508         if not line:
 509             break
 510         clist = line.strip().split(' ')
 511         if len(clist) == 3:
 512             checksum, size, checkfile = clist
 513         else:
 514             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 515             continue
 516         if not files.has_key(checkfile):
 517         # TODO: check for the file's entry in the original files dict, not
 518         # the one modified by (auto)byhand and other weird stuff
 519         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 520         #        (file, hashname, where))
 521             continue
 522         if not files[checkfile]["size"] == size:
 523             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 524                 "in %s" % (checkfile, hashname, where))
 525             continue
 526         files[checkfile][hash_key(hashname)] = checksum
 527     for f in files.keys():
 528         if not files[f].has_key(hash_key(hashname)):
 529             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 530     return rejmsg
 531
 532 ################################################################################
 533
 534 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 535
 536 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 537     files = {}
 538
 539     # Make sure we have a Files: field to parse...
 540     if not changes.has_key(field):
 541         raise NoFilesFieldError
 542
 543     # Validate .changes Format: field
 544     if not is_a_dsc:
 545         validate_changes_format(parse_format(changes['format']), field)
 546
 547     includes_section = (not is_a_dsc) and field == "files"
 548
 549     # Parse each entry/line:
 550     for i in changes[field].split('\n'):
 551         if not i:
 552             break
 553         s = i.split()
 554         section = priority = ""
 555         try:
 556             if includes_section:
 557                 (md5, size, section, priority, name) = s
 558             else:
 559                 (md5, size, name) = s
 560         except ValueError:
 561             raise ParseChangesError(i)
 562
 563         if section == "":
 564             section = "-"
 565         if priority == "":
 566             priority = "-"
 567
 568         (section, component) = extract_component_from_section(section)
 569
 570         files[name] = dict(size=size, section=section,
 571                            priority=priority, component=component)
 572         files[name][hashname] = md5
 573
 574     return files
 575
 576 ################################################################################
 577
 578 # see http://bugs.debian.org/619131
 579 def build_package_list(dsc, session = None):
 580     if not dsc.has_key("package-list"):
 581         return {}
 582
 583     packages = {}
 584
 585     for line in dsc["package-list"].split("\n"):
 586         if not line:
 587             break
 588
 589         fields = line.split()
 590         name = fields[0]
 591         package_type = fields[1]
 592         (section, component) = extract_component_from_section(fields[2])
 593         priority = fields[3]
 594
 595         # Validate type if we have a session
 596         if session and get_override_type(package_type, session) is None:
 597             # Maybe just warn and ignore? exit(1) might be a bit hard...
 598             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 599
 600         if name not in packages or packages[name]["type"] == "dsc":
 601             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 602
 603     return packages
 604
 605 ################################################################################
 606
 607 def send_mail (message, filename="", whitelists=None):
 608     """sendmail wrapper, takes _either_ a message string or a file as arguments
 609
 610     @type  whitelists: list of (str or None)
 611     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 612                        everything, otherwise an address is whitelisted if it is
 613                        included in any of the lists.
 614                        In addition a global whitelist can be specified in
 615                        Dinstall::MailWhiteList.
 616     """
 617
 618     maildir = Cnf.get('Dir::Mail')
 619     if maildir:
 620         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 621         path = find_next_free(path)
 622         fh = open(path, 'w')
 623         print >>fh, message,
 624         fh.close()
 625
 626     # Check whether we're supposed to be sending mail
 627     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 628         return
 629
 630     # If we've been passed a string dump it into a temporary file
 631     if message:
 632         (fd, filename) = tempfile.mkstemp()
 633         os.write (fd, message)
 634         os.close (fd)
 635
 636     if whitelists is None or None in whitelists:
 637         whitelists = []
 638     if Cnf.get('Dinstall::MailWhiteList', ''):
 639         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 640     if len(whitelists) != 0:
 641         message_in = open_file(filename)
 642         message_raw = modemail.message_from_file(message_in)
 643         message_in.close();
 644
 645         whitelist = [];
 646         for path in whitelists:
 647           with open_file(path, 'r') as whitelist_in:
 648             for line in whitelist_in:
 649                 if not re_whitespace_comment.match(line):
 650                     if re_re_mark.match(line):
 651                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 652                     else:
 653                         whitelist.append(re.compile(re.escape(line.strip())))
 654
 655         # Fields to check.
 656         fields = ["To", "Bcc", "Cc"]
 657         for field in fields:
 658             # Check each field
 659             value = message_raw.get(field, None)
 660             if value != None:
 661                 match = [];
 662                 for item in value.split(","):
 663                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 664                     mail_whitelisted = 0
 665                     for wr in whitelist:
 666                         if wr.match(email):
 667                             mail_whitelisted = 1
 668                             break
 669                     if not mail_whitelisted:
 670                         print "Skipping {0} since it's not whitelisted".format(item)
 671                         continue
 672                     match.append(item)
 673
 674                 # Doesn't have any mail in whitelist so remove the header
 675                 if len(match) == 0:
 676                     del message_raw[field]
 677                 else:
 678                     message_raw.replace_header(field, ', '.join(match))
 679
 680         # Change message fields in order if we don't have a To header
 681         if not message_raw.has_key("To"):
 682             fields.reverse()
 683             for field in fields:
 684                 if message_raw.has_key(field):
 685                     message_raw[fields[-1]] = message_raw[field]
 686                     del message_raw[field]
 687                     break
 688             else:
 689                 # Clean up any temporary files
 690                 # and return, as we removed all recipients.
 691                 if message:
 692                     os.unlink (filename);
 693                 return;
 694
 695         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 696         os.write (fd, message_raw.as_string(True));
 697         os.close (fd);
 698
 699     # Invoke sendmail
 700     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 701     if (result != 0):
 702         raise SendmailFailedError(output)
 703
 704     # Clean up any temporary files
 705     if message:
 706         os.unlink (filename)
 707
 708 ################################################################################
 709
 710 def poolify (source, component=None):
 711     if source[:3] == "lib":
 712         return source[:4] + '/' + source + '/'
 713     else:
 714         return source[:1] + '/' + source + '/'
 715
 716 ################################################################################
 717
 718 def move (src, dest, overwrite = 0, perms = 0o664):
 719     if os.path.exists(dest) and os.path.isdir(dest):
 720         dest_dir = dest
 721     else:
 722         dest_dir = os.path.dirname(dest)
 723     if not os.path.lexists(dest_dir):
 724         umask = os.umask(00000)
 725         os.makedirs(dest_dir, 0o2775)
 726         os.umask(umask)
 727     #print "Moving %s to %s..." % (src, dest)
 728     if os.path.exists(dest) and os.path.isdir(dest):
 729         dest += '/' + os.path.basename(src)
 730     # Don't overwrite unless forced to
 731     if os.path.lexists(dest):
 732         if not overwrite:
 733             fubar("Can't move %s to %s - file already exists." % (src, dest))
 734         else:
 735             if not os.access(dest, os.W_OK):
 736                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 737     shutil.copy2(src, dest)
 738     os.chmod(dest, perms)
 739     os.unlink(src)
 740
 741 def copy (src, dest, overwrite = 0, perms = 0o664):
 742     if os.path.exists(dest) and os.path.isdir(dest):
 743         dest_dir = dest
 744     else:
 745         dest_dir = os.path.dirname(dest)
 746     if not os.path.exists(dest_dir):
 747         umask = os.umask(00000)
 748         os.makedirs(dest_dir, 0o2775)
 749         os.umask(umask)
 750     #print "Copying %s to %s..." % (src, dest)
 751     if os.path.exists(dest) and os.path.isdir(dest):
 752         dest += '/' + os.path.basename(src)
 753     # Don't overwrite unless forced to
 754     if os.path.lexists(dest):
 755         if not overwrite:
 756             raise FileExistsError
 757         else:
 758             if not os.access(dest, os.W_OK):
 759                 raise CantOverwriteError
 760     shutil.copy2(src, dest)
 761     os.chmod(dest, perms)
 762
 763 ################################################################################
 764
 765 def which_conf_file ():
 766     if os.getenv('DAK_CONFIG'):
 767         return os.getenv('DAK_CONFIG')
 768
 769     res = socket.getfqdn()
 770     # In case we allow local config files per user, try if one exists
 771     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 772         homedir = os.getenv("HOME")
 773         confpath = os.path.join(homedir, "/etc/dak.conf")
 774         if os.path.exists(confpath):
 775             apt_pkg.read_config_file_isc(Cnf,confpath)
 776
 777     # We are still in here, so there is no local config file or we do
 778     # not allow local files. Do the normal stuff.
 779     if Cnf.get("Config::" + res + "::DakConfig"):
 780         return Cnf["Config::" + res + "::DakConfig"]
 781
 782     return default_config
 783
 784 def which_alias_file():
 785     hostname = socket.getfqdn()
 786     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 787     if os.path.exists(aliasfn):
 788         return aliasfn
 789     else:
 790         return None
 791
 792 ################################################################################
 793
 794 def TemplateSubst(subst_map, filename):
 795     """ Perform a substition of template """
 796     templatefile = open_file(filename)
 797     template = templatefile.read()
 798     for k, v in subst_map.iteritems():
 799         template = template.replace(k, str(v))
 800     templatefile.close()
 801     return template
 802
 803 ################################################################################
 804
 805 def fubar(msg, exit_code=1):
 806     sys.stderr.write("E: %s\n" % (msg))
 807     sys.exit(exit_code)
 808
 809 def warn(msg):
 810     sys.stderr.write("W: %s\n" % (msg))
 811
 812 ################################################################################
 813
 814 # Returns the user name with a laughable attempt at rfc822 conformancy
 815 # (read: removing stray periods).
 816 def whoami ():
 817     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 818
 819 def getusername ():
 820     return pwd.getpwuid(os.getuid())[0]
 821
 822 ################################################################################
 823
 824 def size_type (c):
 825     t  = " B"
 826     if c > 10240:
 827         c = c / 1024
 828         t = " KB"
 829     if c > 10240:
 830         c = c / 1024
 831         t = " MB"
 832     return ("%d%s" % (c, t))
 833
 834 ################################################################################
 835
 836 def cc_fix_changes (changes):
 837     o = changes.get("architecture", "")
 838     if o:
 839         del changes["architecture"]
 840     changes["architecture"] = {}
 841     for j in o.split():
 842         changes["architecture"][j] = 1
 843
 844 def changes_compare (a, b):
 845     """ Sort by source name, source version, 'have source', and then by filename """
 846     try:
 847         a_changes = parse_changes(a)
 848     except:
 849         return -1
 850
 851     try:
 852         b_changes = parse_changes(b)
 853     except:
 854         return 1
 855
 856     cc_fix_changes (a_changes)
 857     cc_fix_changes (b_changes)
 858
 859     # Sort by source name
 860     a_source = a_changes.get("source")
 861     b_source = b_changes.get("source")
 862     q = cmp (a_source, b_source)
 863     if q:
 864         return q
 865
 866     # Sort by source version
 867     a_version = a_changes.get("version", "0")
 868     b_version = b_changes.get("version", "0")
 869     q = apt_pkg.version_compare(a_version, b_version)
 870     if q:
 871         return q
 872
 873     # Sort by 'have source'
 874     a_has_source = a_changes["architecture"].get("source")
 875     b_has_source = b_changes["architecture"].get("source")
 876     if a_has_source and not b_has_source:
 877         return -1
 878     elif b_has_source and not a_has_source:
 879         return 1
 880
 881     # Fall back to sort by filename
 882     return cmp(a, b)
 883
 884 ################################################################################
 885
 886 def find_next_free (dest, too_many=100):
 887     extra = 0
 888     orig_dest = dest
 889     while os.path.lexists(dest) and extra < too_many:
 890         dest = orig_dest + '.' + repr(extra)
 891         extra += 1
 892     if extra >= too_many:
 893         raise NoFreeFilenameError
 894     return dest
 895
 896 ################################################################################
 897
 898 def result_join (original, sep = '\t'):
 899     resultlist = []
 900     for i in xrange(len(original)):
 901         if original[i] == None:
 902             resultlist.append("")
 903         else:
 904             resultlist.append(original[i])
 905     return sep.join(resultlist)
 906
 907 ################################################################################
 908
 909 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 910     out = ""
 911     for line in str.split('\n'):
 912         line = line.strip()
 913         if line or include_blank_lines:
 914             out += "%s%s\n" % (prefix, line)
 915     # Strip trailing new line
 916     if out:
 917         out = out[:-1]
 918     return out
 919
 920 ################################################################################
 921
 922 def validate_changes_file_arg(filename, require_changes=1):
 923     """
 924     'filename' is either a .changes or .dak file.  If 'filename' is a
 925     .dak file, it's changed to be the corresponding .changes file.  The
 926     function then checks if the .changes file a) exists and b) is
 927     readable and returns the .changes filename if so.  If there's a
 928     problem, the next action depends on the option 'require_changes'
 929     argument:
 930
 931       - If 'require_changes' == -1, errors are ignored and the .changes
 932         filename is returned.
 933       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 934       - If 'require_changes' == 1, a fatal error is raised.
 935
 936     """
 937     error = None
 938
 939     orig_filename = filename
 940     if filename.endswith(".dak"):
 941         filename = filename[:-4]+".changes"
 942
 943     if not filename.endswith(".changes"):
 944         error = "invalid file type; not a changes file"
 945     else:
 946         if not os.access(filename,os.R_OK):
 947             if os.path.exists(filename):
 948                 error = "permission denied"
 949             else:
 950                 error = "file not found"
 951
 952     if error:
 953         if require_changes == 1:
 954             fubar("%s: %s." % (orig_filename, error))
 955         elif require_changes == 0:
 956             warn("Skipping %s - %s" % (orig_filename, error))
 957             return None
 958         else: # We only care about the .dak file
 959             return filename
 960     else:
 961         return filename
 962
 963 ################################################################################
 964
 965 def real_arch(arch):
 966     return (arch != "source" and arch != "all")
 967
 968 ################################################################################
 969
 970 def join_with_commas_and(list):
 971     if len(list) == 0: return "nothing"
 972     if len(list) == 1: return list[0]
 973     return ", ".join(list[:-1]) + " and " + list[-1]
 974
 975 ################################################################################
 976
 977 def pp_deps (deps):
 978     pp_deps = []
 979     for atom in deps:
 980         (pkg, version, constraint) = atom
 981         if constraint:
 982             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 983         else:
 984             pp_dep = pkg
 985         pp_deps.append(pp_dep)
 986     return " |".join(pp_deps)
 987
 988 ################################################################################
 989
 990 def get_conf():
 991     return Cnf
 992
 993 ################################################################################
 994
 995 def parse_args(Options):
 996     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
 997     # XXX: This should go away and everything which calls it be converted
 998     #      to use SQLA properly.  For now, we'll just fix it not to use
 999     #      the old Pg interface though
1000     session = DBConn().session()
1001     # Process suite
1002     if Options["Suite"]:
1003         suite_ids_list = []
1004         for suitename in split_args(Options["Suite"]):
1005             suite = get_suite(suitename, session=session)
1006             if not suite or suite.suite_id is None:
1007                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1008             else:
1009                 suite_ids_list.append(suite.suite_id)
1010         if suite_ids_list:
1011             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1012         else:
1013             fubar("No valid suite given.")
1014     else:
1015         con_suites = ""
1016
1017     # Process component
1018     if Options["Component"]:
1019         component_ids_list = []
1020         for componentname in split_args(Options["Component"]):
1021             component = get_component(componentname, session=session)
1022             if component is None:
1023                 warn("component '%s' not recognised." % (componentname))
1024             else:
1025                 component_ids_list.append(component.component_id)
1026         if component_ids_list:
1027             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1028         else:
1029             fubar("No valid component given.")
1030     else:
1031         con_components = ""
1032
1033     # Process architecture
1034     con_architectures = ""
1035     check_source = 0
1036     if Options["Architecture"]:
1037         arch_ids_list = []
1038         for archname in split_args(Options["Architecture"]):
1039             if archname == "source":
1040                 check_source = 1
1041             else:
1042                 arch = get_architecture(archname, session=session)
1043                 if arch is None:
1044                     warn("architecture '%s' not recognised." % (archname))
1045                 else:
1046                     arch_ids_list.append(arch.arch_id)
1047         if arch_ids_list:
1048             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1049         else:
1050             if not check_source:
1051                 fubar("No valid architecture given.")
1052     else:
1053         check_source = 1
1054
1055     return (con_suites, con_architectures, con_components, check_source)
1056
1057 ################################################################################
1058
1059 def arch_compare_sw (a, b):
1060     """
1061     Function for use in sorting lists of architectures.
1062
1063     Sorts normally except that 'source' dominates all others.
1064     """
1065
1066     if a == "source" and b == "source":
1067         return 0
1068     elif a == "source":
1069         return -1
1070     elif b == "source":
1071         return 1
1072
1073     return cmp (a, b)
1074
1075 ################################################################################
1076
1077 def split_args (s, dwim=1):
1078     """
1079     Split command line arguments which can be separated by either commas
1080     or whitespace.  If dwim is set, it will complain about string ending
1081     in comma since this usually means someone did 'dak ls -a i386, m68k
1082     foo' or something and the inevitable confusion resulting from 'm68k'
1083     being treated as an argument is undesirable.
1084     """
1085
1086     if s.find(",") == -1:
1087         return s.split()
1088     else:
1089         if s[-1:] == "," and dwim:
1090             fubar("split_args: found trailing comma, spurious space maybe?")
1091         return s.split(",")
1092
1093 ################################################################################
1094
1095 def gpgv_get_status_output(cmd, status_read, status_write):
1096     """
1097     Our very own version of commands.getouputstatus(), hacked to support
1098     gpgv's status fd.
1099     """
1100
1101     cmd = ['/bin/sh', '-c', cmd]
1102     p2cread, p2cwrite = os.pipe()
1103     c2pread, c2pwrite = os.pipe()
1104     errout, errin = os.pipe()
1105     pid = os.fork()
1106     if pid == 0:
1107         # Child
1108         os.close(0)
1109         os.close(1)
1110         os.dup(p2cread)
1111         os.dup(c2pwrite)
1112         os.close(2)
1113         os.dup(errin)
1114         for i in range(3, 256):
1115             if i != status_write:
1116                 try:
1117                     os.close(i)
1118                 except:
1119                     pass
1120         try:
1121             os.execvp(cmd[0], cmd)
1122         finally:
1123             os._exit(1)
1124
1125     # Parent
1126     os.close(p2cread)
1127     os.dup2(c2pread, c2pwrite)
1128     os.dup2(errout, errin)
1129
1130     output = status = ""
1131     while 1:
1132         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1133         more_data = []
1134         for fd in i:
1135             r = os.read(fd, 8196)
1136             if len(r) > 0:
1137                 more_data.append(fd)
1138                 if fd == c2pwrite or fd == errin:
1139                     output += r
1140                 elif fd == status_read:
1141                     status += r
1142                 else:
1143                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1144         if not more_data:
1145             pid, exit_status = os.waitpid(pid, 0)
1146             try:
1147                 os.close(status_write)
1148                 os.close(status_read)
1149                 os.close(c2pread)
1150                 os.close(c2pwrite)
1151                 os.close(p2cwrite)
1152                 os.close(errin)
1153                 os.close(errout)
1154             except:
1155                 pass
1156             break
1157
1158     return output, status, exit_status
1159
1160 ################################################################################
1161
1162 def process_gpgv_output(status):
1163     # Process the status-fd output
1164     keywords = {}
1165     internal_error = ""
1166     for line in status.split('\n'):
1167         line = line.strip()
1168         if line == "":
1169             continue
1170         split = line.split()
1171         if len(split) < 2:
1172             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1173             continue
1174         (gnupg, keyword) = split[:2]
1175         if gnupg != "[GNUPG:]":
1176             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1177             continue
1178         args = split[2:]
1179         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1180             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1181             continue
1182         else:
1183             keywords[keyword] = args
1184
1185     return (keywords, internal_error)
1186
1187 ################################################################################
1188
1189 def retrieve_key (filename, keyserver=None, keyring=None):
1190     """
1191     Retrieve the key that signed 'filename' from 'keyserver' and
1192     add it to 'keyring'.  Returns nothing on success, or an error message
1193     on error.
1194     """
1195
1196     # Defaults for keyserver and keyring
1197     if not keyserver:
1198         keyserver = Cnf["Dinstall::KeyServer"]
1199     if not keyring:
1200         keyring = get_primary_keyring_path()
1201
1202     # Ensure the filename contains no shell meta-characters or other badness
1203     if not re_taint_free.match(filename):
1204         return "%s: tainted filename" % (filename)
1205
1206     # Invoke gpgv on the file
1207     status_read, status_write = os.pipe()
1208     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1209     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1210
1211     # Process the status-fd output
1212     (keywords, internal_error) = process_gpgv_output(status)
1213     if internal_error:
1214         return internal_error
1215
1216     if not keywords.has_key("NO_PUBKEY"):
1217         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1218
1219     fingerprint = keywords["NO_PUBKEY"][0]
1220     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1221     # it'll try to create a lockfile in /dev.  A better solution might
1222     # be a tempfile or something.
1223     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1224           % (Cnf["Dinstall::SigningKeyring"])
1225     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1226            % (keyring, keyserver, fingerprint)
1227     (result, output) = commands.getstatusoutput(cmd)
1228     if (result != 0):
1229         return "'%s' failed with exit code %s" % (cmd, result)
1230
1231     return ""
1232
1233 ################################################################################
1234
1235 def gpg_keyring_args(keyrings=None):
1236     if not keyrings:
1237         keyrings = get_active_keyring_paths()
1238
1239     return " ".join(["--keyring %s" % x for x in keyrings])
1240
1241 ################################################################################
1242 @session_wrapper
1243 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1244     """
1245     Check the signature of a file and return the fingerprint if the
1246     signature is valid or 'None' if it's not.  The first argument is the
1247     filename whose signature should be checked.  The second argument is a
1248     reject function and is called when an error is found.  The reject()
1249     function must allow for two arguments: the first is the error message,
1250     the second is an optional prefix string.  It's possible for reject()
1251     to be called more than once during an invocation of check_signature().
1252     The third argument is optional and is the name of the files the
1253     detached signature applies to.  The fourth argument is optional and is
1254     a *list* of keyrings to use.  'autofetch' can either be None, True or
1255     False.  If None, the default behaviour specified in the config will be
1256     used.
1257     """
1258
1259     rejects = []
1260
1261     # Ensure the filename contains no shell meta-characters or other badness
1262     if not re_taint_free.match(sig_filename):
1263         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1264         return (None, rejects)
1265
1266     if data_filename and not re_taint_free.match(data_filename):
1267         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1268         return (None, rejects)
1269
1270     if not keyrings:
1271         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1272
1273     # Autofetch the signing key if that's enabled
1274     if autofetch == None:
1275         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1276     if autofetch:
1277         error_msg = retrieve_key(sig_filename)
1278         if error_msg:
1279             rejects.append(error_msg)
1280             return (None, rejects)
1281
1282     # Build the command line
1283     status_read, status_write = os.pipe()
1284     cmd = "gpgv --status-fd %s %s %s %s" % (
1285         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1286
1287     # Invoke gpgv on the file
1288     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1289
1290     # Process the status-fd output
1291     (keywords, internal_error) = process_gpgv_output(status)
1292
1293     # If we failed to parse the status-fd output, let's just whine and bail now
1294     if internal_error:
1295         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1296         rejects.append(internal_error, "")
1297         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1298         return (None, rejects)
1299
1300     # Now check for obviously bad things in the processed output
1301     if keywords.has_key("KEYREVOKED"):
1302         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1303     if keywords.has_key("BADSIG"):
1304         rejects.append("bad signature on %s." % (sig_filename))
1305     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1306         rejects.append("failed to check signature on %s." % (sig_filename))
1307     if keywords.has_key("NO_PUBKEY"):
1308         args = keywords["NO_PUBKEY"]
1309         if len(args) >= 1:
1310             key = args[0]
1311         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1312     if keywords.has_key("BADARMOR"):
1313         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1314     if keywords.has_key("NODATA"):
1315         rejects.append("no signature found in %s." % (sig_filename))
1316     if keywords.has_key("EXPKEYSIG"):
1317         args = keywords["EXPKEYSIG"]
1318         if len(args) >= 1:
1319             key = args[0]
1320         rejects.append("Signature made by expired key 0x%s" % (key))
1321     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1322         args = keywords["KEYEXPIRED"]
1323         expiredate=""
1324         if len(args) >= 1:
1325             timestamp = args[0]
1326             if timestamp.count("T") == 0:
1327                 try:
1328                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1329                 except ValueError:
1330                     expiredate = "unknown (%s)" % (timestamp)
1331             else:
1332                 expiredate = timestamp
1333         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1334
1335     if len(rejects) > 0:
1336         return (None, rejects)
1337
1338     # Next check gpgv exited with a zero return code
1339     if exit_status:
1340         rejects.append("gpgv failed while checking %s." % (sig_filename))
1341         if status.strip():
1342             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1343         else:
1344             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1345         return (None, rejects)
1346
1347     # Sanity check the good stuff we expect
1348     if not keywords.has_key("VALIDSIG"):
1349         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1350     else:
1351         args = keywords["VALIDSIG"]
1352         if len(args) < 1:
1353             rejects.append("internal error while checking signature on %s." % (sig_filename))
1354         else:
1355             fingerprint = args[0]
1356     if not keywords.has_key("GOODSIG"):
1357         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1358     if not keywords.has_key("SIG_ID"):
1359         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1360
1361     # Finally ensure there's not something we don't recognise
1362     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1363                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1364                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1365
1366     for keyword in keywords.keys():
1367         if not known_keywords.has_key(keyword):
1368             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1369
1370     if len(rejects) > 0:
1371         return (None, rejects)
1372     else:
1373         return (fingerprint, [])
1374
1375 ################################################################################
1376
1377 def gpg_get_key_addresses(fingerprint):
1378     """retreive email addresses from gpg key uids for a given fingerprint"""
1379     addresses = key_uid_email_cache.get(fingerprint)
1380     if addresses != None:
1381         return addresses
1382     addresses = list()
1383     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1384                 % (gpg_keyring_args(), fingerprint)
1385     (result, output) = commands.getstatusoutput(cmd)
1386     if result == 0:
1387         for l in output.split('\n'):
1388             m = re_gpg_uid.match(l)
1389             if not m:
1390                 continue
1391             address = m.group(1)
1392             if address.endswith('@debian.org'):
1393                 # prefer @debian.org addresses
1394                 # TODO: maybe not hardcode the domain
1395                 addresses.insert(0, address)
1396             else:
1397                 addresses.append(m.group(1))
1398     key_uid_email_cache[fingerprint] = addresses
1399     return addresses
1400
1401 ################################################################################
1402
1403 def get_logins_from_ldap(fingerprint='*'):
1404     """retrieve login from LDAP linked to a given fingerprint"""
1405
1406     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1407     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1408     l = ldap.open(LDAPServer)
1409     l.simple_bind_s('','')
1410     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1411                        '(keyfingerprint=%s)' % fingerprint,
1412                        ['uid', 'keyfingerprint'])
1413     login = {}
1414     for elem in Attrs:
1415         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1416     return login
1417
1418 ################################################################################
1419
1420 def get_users_from_ldap():
1421     """retrieve login and user names from LDAP"""
1422
1423     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1424     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1425     l = ldap.open(LDAPServer)
1426     l.simple_bind_s('','')
1427     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1428                        '(uid=*)', ['uid', 'cn', 'mn', 'sn'])
1429     users = {}
1430     for elem in Attrs:
1431         elem = elem[1]
1432         name = []
1433         for k in ('cn', 'mn', 'sn'):
1434             try:
1435                 if elem[k][0] != '-':
1436                     name.append(elem[k][0])
1437             except KeyError:
1438                 pass
1439         users[' '.join(name)] = elem['uid'][0]
1440     return users
1441
1442 ################################################################################
1443
1444 def clean_symlink (src, dest, root):
1445     """
1446     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1447     Returns fixed 'src'
1448     """
1449     src = src.replace(root, '', 1)
1450     dest = dest.replace(root, '', 1)
1451     dest = os.path.dirname(dest)
1452     new_src = '../' * len(dest.split('/'))
1453     return new_src + src
1454
1455 ################################################################################
1456
1457 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1458     """
1459     Return a secure and unique filename by pre-creating it.
1460
1461     @type directory: str
1462     @param directory: If non-null it will be the directory the file is pre-created in.
1463
1464     @type prefix: str
1465     @param prefix: The filename will be prefixed with this string
1466
1467     @type suffix: str
1468     @param suffix: The filename will end with this string
1469
1470     @type mode: str
1471     @param mode: If set the file will get chmodded to those permissions
1472
1473     @type group: str
1474     @param group: If set the file will get chgrped to the specified group.
1475
1476     @rtype: list
1477     @return: Returns a pair (fd, name)
1478     """
1479
1480     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1481     if mode:
1482         os.chmod(tfname, mode)
1483     if group:
1484         gid = grp.getgrnam(group).gr_gid
1485         os.chown(tfname, -1, gid)
1486     return (tfd, tfname)
1487
1488 ################################################################################
1489
1490 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1491     """
1492     Return a secure and unique directory by pre-creating it.
1493
1494     @type parent: str
1495     @param parent: If non-null it will be the directory the directory is pre-created in.
1496
1497     @type prefix: str
1498     @param prefix: The filename will be prefixed with this string
1499
1500     @type suffix: str
1501     @param suffix: The filename will end with this string
1502
1503     @type mode: str
1504     @param mode: If set the file will get chmodded to those permissions
1505
1506     @type group: str
1507     @param group: If set the file will get chgrped to the specified group.
1508
1509     @rtype: list
1510     @return: Returns a pair (fd, name)
1511
1512     """
1513
1514     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1515     if mode:
1516         os.chmod(tfname, mode)
1517     if group:
1518         gid = grp.getgrnam(group).gr_gid
1519         os.chown(tfname, -1, gid)
1520     return tfname
1521
1522 ################################################################################
1523
1524 def is_email_alias(email):
1525     """ checks if the user part of the email is listed in the alias file """
1526     global alias_cache
1527     if alias_cache == None:
1528         aliasfn = which_alias_file()
1529         alias_cache = set()
1530         if aliasfn:
1531             for l in open(aliasfn):
1532                 alias_cache.add(l.split(':')[0])
1533     uid = email.split('@')[0]
1534     return uid in alias_cache
1535
1536 ################################################################################
1537
1538 def get_changes_files(from_dir):
1539     """
1540     Takes a directory and lists all .changes files in it (as well as chdir'ing
1541     to the directory; this is due to broken behaviour on the part of p-u/p-a
1542     when you're not in the right place)
1543
1544     Returns a list of filenames
1545     """
1546     try:
1547         # Much of the rest of p-u/p-a depends on being in the right place
1548         os.chdir(from_dir)
1549         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1550     except OSError as e:
1551         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1552
1553     return changes_files
1554
1555 ################################################################################
1556
1557 Cnf = config.Config().Cnf
1558
1559 ################################################################################
1560
1561 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1562     """
1563     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1564     Well, actually it parsed a local copy, but let's document the source
1565     somewhere ;)
1566
1567     returns a dict associating source package name with a list of open wnpp
1568     bugs (Yes, there might be more than one)
1569     """
1570
1571     line = []
1572     try:
1573         f = open(file)
1574         lines = f.readlines()
1575     except IOError as e:
1576         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1577         lines = []
1578     wnpp = {}
1579
1580     for line in lines:
1581         splited_line = line.split(": ", 1)
1582         if len(splited_line) > 1:
1583             wnpp[splited_line[0]] = splited_line[1].split("|")
1584
1585     for source in wnpp.keys():
1586         bugs = []
1587         for wnpp_bug in wnpp[source]:
1588             bug_no = re.search("(\d)+", wnpp_bug).group()
1589             if bug_no:
1590                 bugs.append(bug_no)
1591         wnpp[source] = bugs
1592     return wnpp
1593
1594 ################################################################################
1595
1596 def get_packages_from_ftp(root, suite, component, architecture):
1597     """
1598     Returns an object containing apt_pkg-parseable data collected by
1599     aggregating Packages.gz files gathered for each architecture.
1600
1601     @type root: string
1602     @param root: path to ftp archive root directory
1603
1604     @type suite: string
1605     @param suite: suite to extract files from
1606
1607     @type component: string
1608     @param component: component to extract files from
1609
1610     @type architecture: string
1611     @param architecture: architecture to extract files from
1612
1613     @rtype: TagFile
1614     @return: apt_pkg class containing package data
1615     """
1616     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1617     (fd, temp_file) = temp_filename()
1618     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1619     if (result != 0):
1620         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1621     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1622     if os.path.exists(filename):
1623         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1624         if (result != 0):
1625             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1626     packages = open_file(temp_file)
1627     Packages = apt_pkg.TagFile(packages)
1628     os.unlink(temp_file)
1629     return Packages
1630
1631 ################################################################################
1632
1633 def deb_extract_control(fh):
1634     """extract DEBIAN/control from a binary package"""
1635     return apt_inst.DebFile(fh).control.extractdata("control")
1636
1637 ################################################################################
1638
1639 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1640     """mail addresses to contact for an upload
1641
1642     @type  maintainer: str
1643     @param maintainer: Maintainer field of the .changes file
1644
1645     @type  changed_by: str
1646     @param changed_by: Changed-By field of the .changes file
1647
1648     @type  fingerprint: str
1649     @param fingerprint: fingerprint of the key used to sign the upload
1650
1651     @rtype:  list of str
1652     @return: list of RFC 2047-encoded mail addresses to contact regarding
1653              this upload
1654     """
1655     addresses = [maintainer]
1656     if changed_by != maintainer:
1657         addresses.append(changed_by)
1658
1659     fpr_addresses = gpg_get_key_addresses(fingerprint)
1660     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1661         addresses.append(fpr_addresses[0])
1662
1663     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1664     return encoded_addresses
1665
1666 ################################################################################
1667
1668 def call_editor(text="", suffix=".txt"):
1669     """run editor and return the result as a string
1670
1671     @type  text: str
1672     @param text: initial text
1673
1674     @type  suffix: str
1675     @param suffix: extension for temporary file
1676
1677     @rtype:  str
1678     @return: string with the edited text
1679     """
1680     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1681     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1682     try:
1683         print >>tmp, text,
1684         tmp.close()
1685         daklib.daksubprocess.check_call([editor, tmp.name])
1686         return open(tmp.name, 'r').read()
1687     finally:
1688         os.unlink(tmp.name)
1689
1690 ################################################################################
1691
1692 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1693     dbsuite = get_suite(suite, session)
1694     overridesuite = dbsuite
1695     if dbsuite.overridesuite is not None:
1696         overridesuite = get_suite(dbsuite.overridesuite, session)
1697     dep_problem = 0
1698     p2c = {}
1699     all_broken = {}
1700     if arches:
1701         all_arches = set(arches)
1702     else:
1703         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1704     all_arches -= set(["source", "all"])
1705     metakey_d = get_or_set_metadatakey("Depends", session)
1706     metakey_p = get_or_set_metadatakey("Provides", session)
1707     params = {
1708         'suite_id':     dbsuite.suite_id,
1709         'metakey_d_id': metakey_d.key_id,
1710         'metakey_p_id': metakey_p.key_id,
1711     }
1712     for architecture in all_arches | set(['all']):
1713         deps = {}
1714         sources = {}
1715         virtual_packages = {}
1716         params['arch_id'] = get_architecture(architecture, session).arch_id
1717
1718         statement = '''
1719             SELECT b.id, b.package, s.source, c.name as component,
1720                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1721                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1722                 FROM binaries b
1723                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1724                 JOIN source s ON b.source = s.id
1725                 JOIN files_archive_map af ON b.file = af.file_id
1726                 JOIN component c ON af.component_id = c.id
1727                 WHERE b.architecture = :arch_id'''
1728         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1729             from_statement(statement).params(params)
1730         for binary_id, package, source, component, depends, provides in query:
1731             sources[package] = source
1732             p2c[package] = component
1733             if depends is not None:
1734                 deps[package] = depends
1735             # Maintain a counter for each virtual package.  If a
1736             # Provides: exists, set the counter to 0 and count all
1737             # provides by a package not in the list for removal.
1738             # If the counter stays 0 at the end, we know that only
1739             # the to-be-removed packages provided this virtual
1740             # package.
1741             if provides is not None:
1742                 for virtual_pkg in provides.split(","):
1743                     virtual_pkg = virtual_pkg.strip()
1744                     if virtual_pkg == package: continue
1745                     if not virtual_packages.has_key(virtual_pkg):
1746                         virtual_packages[virtual_pkg] = 0
1747                     if package not in removals:
1748                         virtual_packages[virtual_pkg] += 1
1749
1750         # If a virtual package is only provided by the to-be-removed
1751         # packages, treat the virtual package as to-be-removed too.
1752         for virtual_pkg in virtual_packages.keys():
1753             if virtual_packages[virtual_pkg] == 0:
1754                 removals.append(virtual_pkg)
1755
1756         # Check binary dependencies (Depends)
1757         for package in deps.keys():
1758             if package in removals: continue
1759             parsed_dep = []
1760             try:
1761                 parsed_dep += apt_pkg.parse_depends(deps[package])
1762             except ValueError as e:
1763                 print "Error for package %s: %s" % (package, e)
1764             for dep in parsed_dep:
1765                 # Check for partial breakage.  If a package has a ORed
1766                 # dependency, there is only a dependency problem if all
1767                 # packages in the ORed depends will be removed.
1768                 unsat = 0
1769                 for dep_package, _, _ in dep:
1770                     if dep_package in removals:
1771                         unsat += 1
1772                 if unsat == len(dep):
1773                     component = p2c[package]
1774                     source = sources[package]
1775                     if component != "main":
1776                         source = "%s/%s" % (source, component)
1777                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1778                     dep_problem = 1
1779
1780     if all_broken:
1781         if cruft:
1782             print "  - broken Depends:"
1783         else:
1784             print "# Broken Depends:"
1785         for source, bindict in sorted(all_broken.items()):
1786             lines = []
1787             for binary, arches in sorted(bindict.items()):
1788                 if arches == all_arches or 'all' in arches:
1789                     lines.append(binary)
1790                 else:
1791                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1792             if cruft:
1793                 print '    %s: %s' % (source, lines[0])
1794             else:
1795                 print '%s: %s' % (source, lines[0])
1796             for line in lines[1:]:
1797                 if cruft:
1798                     print '    ' + ' ' * (len(source) + 2) + line
1799                 else:
1800                     print ' ' * (len(source) + 2) + line
1801         if not cruft:
1802             print
1803
1804     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1805     all_broken.clear()
1806     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1807     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1808     params = {
1809         'suite_id':    dbsuite.suite_id,
1810         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1811     }
1812     statement = '''
1813         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1814            FROM source s
1815            JOIN source_metadata sm ON s.id = sm.src_id
1816            WHERE s.id in
1817                (SELECT source FROM src_associations
1818                    WHERE suite = :suite_id)
1819                AND sm.key_id in :metakey_ids
1820            GROUP BY s.id, s.source'''
1821     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1822         params(params)
1823     for source_id, source, build_dep in query:
1824         if source in removals: continue
1825         parsed_dep = []
1826         if build_dep is not None:
1827             # Remove [arch] information since we want to see breakage on all arches
1828             build_dep = re_build_dep_arch.sub("", build_dep)
1829             try:
1830                 parsed_dep += apt_pkg.parse_depends(build_dep)
1831             except ValueError as e:
1832                 print "Error for source %s: %s" % (source, e)
1833         for dep in parsed_dep:
1834             unsat = 0
1835             for dep_package, _, _ in dep:
1836                 if dep_package in removals:
1837                     unsat += 1
1838             if unsat == len(dep):
1839                 component, = session.query(Component.component_name) \
1840                     .join(Component.overrides) \
1841                     .filter(Override.suite == overridesuite) \
1842                     .filter(Override.package == re.sub('/(contrib|non-free)$', '', source)) \
1843                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1844                     .first()
1845                 if component != "main":
1846                     source = "%s/%s" % (source, component)
1847                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1848                 dep_problem = 1
1849
1850     if all_broken:
1851         if cruft:
1852             print "  - broken Build-Depends:"
1853         else:
1854             print "# Broken Build-Depends:"
1855         for source, bdeps in sorted(all_broken.items()):
1856             bdeps = sorted(bdeps)
1857             if cruft:
1858                 print '    %s: %s' % (source, bdeps[0])
1859             else:
1860                 print '%s: %s' % (source, bdeps[0])
1861             for bdep in bdeps[1:]:
1862                 if cruft:
1863                     print '    ' + ' ' * (len(source) + 2) + bdep
1864                 else:
1865                     print ' ' * (len(source) + 2) + bdep
1866         if not cruft:
1867             print
1868
1869     return dep_problem