daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import select
  31 import socket
  32 import shutil
  33 import sys
  34 import tempfile
  35 import traceback
  36 import stat
  37 import apt_inst
  38 import apt_pkg
  39 import time
  40 import re
  41 import email as modemail
  42 import subprocess
  43 import ldap
  44
  45 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  46                    get_override_type, Keyring, session_wrapper, \
  47                    get_active_keyring_paths, get_primary_keyring_path, \
  48                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  49                    Component, Override, OverrideType
  50 from sqlalchemy import desc
  51 from dak_exceptions import *
  52 from gpg import SignedFile
  53 from textutils import fix_maintainer
  54 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  55                     re_multi_line_field, re_srchasver, re_taint_free, \
  56                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  57                     re_is_orig_source, re_build_dep_arch
  58
  59 from formats import parse_format, validate_changes_format
  60 from srcformats import get_format_from_string
  61 from collections import defaultdict
  62
  63 ################################################################################
  64
  65 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  66 default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
  67
  68 alias_cache = None        #: Cache for email alias checks
  69 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  70
  71 # (hashname, function, earliest_changes_version)
  72 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  73                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  74
  75 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  76 # code in lenny's Python. This also affects commands.getoutput and
  77 # commands.getstatus.
  78 def dak_getstatusoutput(cmd):
  79     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  80         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  81
  82     output = pipe.stdout.read()
  83
  84     pipe.wait()
  85
  86     if output[-1:] == '\n':
  87         output = output[:-1]
  88
  89     ret = pipe.wait()
  90     if ret is None:
  91         ret = 0
  92
  93     return ret, output
  94 commands.getstatusoutput = dak_getstatusoutput
  95
  96 ################################################################################
  97
  98 def html_escape(s):
  99     """ Escape html chars """
 100     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 101
 102 ################################################################################
 103
 104 def open_file(filename, mode='r'):
 105     """
 106     Open C{file}, return fileobject.
 107
 108     @type filename: string
 109     @param filename: path/filename to open
 110
 111     @type mode: string
 112     @param mode: open mode
 113
 114     @rtype: fileobject
 115     @return: open fileobject
 116
 117     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 118
 119     """
 120     try:
 121         f = open(filename, mode)
 122     except IOError:
 123         raise CantOpenError(filename)
 124     return f
 125
 126 ################################################################################
 127
 128 def our_raw_input(prompt=""):
 129     if prompt:
 130         while 1:
 131             try:
 132                 sys.stdout.write(prompt)
 133                 break
 134             except IOError:
 135                 pass
 136     sys.stdout.flush()
 137     try:
 138         ret = raw_input()
 139         return ret
 140     except EOFError:
 141         sys.stderr.write("\nUser interrupt (^D).\n")
 142         raise SystemExit
 143
 144 ################################################################################
 145
 146 def extract_component_from_section(section, session=None):
 147     component = ""
 148
 149     if section.find('/') != -1:
 150         component = section.split('/')[0]
 151
 152     # Expand default component
 153     if component == "":
 154         comp = get_component(section, session)
 155         if comp is None:
 156             component = "main"
 157         else:
 158             component = comp.component_name
 159
 160     return (section, component)
 161
 162 ################################################################################
 163
 164 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 165     require_signature = True
 166     if keyrings == None:
 167         keyrings = []
 168         require_signature = False
 169
 170     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 171     contents = signed_file.contents
 172
 173     error = ""
 174     changes = {}
 175
 176     # Split the lines in the input, keeping the linebreaks.
 177     lines = contents.splitlines(True)
 178
 179     if len(lines) == 0:
 180         raise ParseChangesError("[Empty changes file]")
 181
 182     # Reindex by line number so we can easily verify the format of
 183     # .dsc files...
 184     index = 0
 185     indexed_lines = {}
 186     for line in lines:
 187         index += 1
 188         indexed_lines[index] = line[:-1]
 189
 190     num_of_lines = len(indexed_lines.keys())
 191     index = 0
 192     first = -1
 193     while index < num_of_lines:
 194         index += 1
 195         line = indexed_lines[index]
 196         if line == "" and signing_rules == 1:
 197             if index != num_of_lines:
 198                 raise InvalidDscError(index)
 199             break
 200         slf = re_single_line_field.match(line)
 201         if slf:
 202             field = slf.groups()[0].lower()
 203             changes[field] = slf.groups()[1]
 204             first = 1
 205             continue
 206         if line == " .":
 207             changes[field] += '\n'
 208             continue
 209         mlf = re_multi_line_field.match(line)
 210         if mlf:
 211             if first == -1:
 212                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 213             if first == 1 and changes[field] != "":
 214                 changes[field] += '\n'
 215             first = 0
 216             changes[field] += mlf.groups()[0] + '\n'
 217             continue
 218         error += line
 219
 220     changes["filecontents"] = armored_contents
 221
 222     if changes.has_key("source"):
 223         # Strip the source version in brackets from the source field,
 224         # put it in the "source-version" field instead.
 225         srcver = re_srchasver.search(changes["source"])
 226         if srcver:
 227             changes["source"] = srcver.group(1)
 228             changes["source-version"] = srcver.group(2)
 229
 230     if error:
 231         raise ParseChangesError(error)
 232
 233     return changes
 234
 235 ################################################################################
 236
 237 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 238     """
 239     Parses a changes file and returns a dictionary where each field is a
 240     key.  The mandatory first argument is the filename of the .changes
 241     file.
 242
 243     signing_rules is an optional argument:
 244
 245       - If signing_rules == -1, no signature is required.
 246       - If signing_rules == 0 (the default), a signature is required.
 247       - If signing_rules == 1, it turns on the same strict format checking
 248         as dpkg-source.
 249
 250     The rules for (signing_rules == 1)-mode are:
 251
 252       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 253         followed by any PGP header data and must end with a blank line.
 254
 255       - The data section must end with a blank line and must be followed by
 256         "-----BEGIN PGP SIGNATURE-----".
 257     """
 258
 259     changes_in = open_file(filename)
 260     content = changes_in.read()
 261     changes_in.close()
 262     try:
 263         unicode(content, 'utf-8')
 264     except UnicodeError:
 265         raise ChangesUnicodeError("Changes file not proper utf-8")
 266     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 267
 268
 269     if not dsc_file:
 270         # Finally ensure that everything needed for .changes is there
 271         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 272                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 273
 274         missingfields=[]
 275         for keyword in must_keywords:
 276             if not changes.has_key(keyword.lower()):
 277                 missingfields.append(keyword)
 278
 279                 if len(missingfields):
 280                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 281
 282     return changes
 283
 284 ################################################################################
 285
 286 def hash_key(hashname):
 287     return '%ssum' % hashname
 288
 289 ################################################################################
 290
 291 def create_hash(where, files, hashname, hashfunc):
 292     """
 293     create_hash extends the passed files dict with the given hash by
 294     iterating over all files on disk and passing them to the hashing
 295     function given.
 296     """
 297
 298     rejmsg = []
 299     for f in files.keys():
 300         try:
 301             file_handle = open_file(f)
 302         except CantOpenError:
 303             rejmsg.append("Could not open file %s for checksumming" % (f))
 304             continue
 305
 306         files[f][hash_key(hashname)] = hashfunc(file_handle)
 307
 308         file_handle.close()
 309     return rejmsg
 310
 311 ################################################################################
 312
 313 def check_hash(where, files, hashname, hashfunc):
 314     """
 315     check_hash checks the given hash in the files dict against the actual
 316     files on disk.  The hash values need to be present consistently in
 317     all file entries.  It does not modify its input in any way.
 318     """
 319
 320     rejmsg = []
 321     for f in files.keys():
 322         file_handle = None
 323         try:
 324             try:
 325                 file_handle = open_file(f)
 326
 327                 # Check for the hash entry, to not trigger a KeyError.
 328                 if not files[f].has_key(hash_key(hashname)):
 329                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 330                         where))
 331                     continue
 332
 333                 # Actually check the hash for correctness.
 334                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 335                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 336                         where))
 337             except CantOpenError:
 338                 # TODO: This happens when the file is in the pool.
 339                 # warn("Cannot open file %s" % f)
 340                 continue
 341         finally:
 342             if file_handle:
 343                 file_handle.close()
 344     return rejmsg
 345
 346 ################################################################################
 347
 348 def check_size(where, files):
 349     """
 350     check_size checks the file sizes in the passed files dict against the
 351     files on disk.
 352     """
 353
 354     rejmsg = []
 355     for f in files.keys():
 356         try:
 357             entry = os.stat(f)
 358         except OSError as exc:
 359             if exc.errno == 2:
 360                 # TODO: This happens when the file is in the pool.
 361                 continue
 362             raise
 363
 364         actual_size = entry[stat.ST_SIZE]
 365         size = int(files[f]["size"])
 366         if size != actual_size:
 367             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 368                    % (f, actual_size, size, where))
 369     return rejmsg
 370
 371 ################################################################################
 372
 373 def check_dsc_files(dsc_filename, dsc, dsc_files):
 374     """
 375     Verify that the files listed in the Files field of the .dsc are
 376     those expected given the announced Format.
 377
 378     @type dsc_filename: string
 379     @param dsc_filename: path of .dsc file
 380
 381     @type dsc: dict
 382     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 383
 384     @type dsc_files: dict
 385     @param dsc_files: the file list returned by C{build_file_list()}
 386
 387     @rtype: list
 388     @return: all errors detected
 389     """
 390     rejmsg = []
 391
 392     # Ensure .dsc lists proper set of source files according to the format
 393     # announced
 394     has = defaultdict(lambda: 0)
 395
 396     ftype_lookup = (
 397         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 398         (r'diff.gz',                   ('debian_diff',)),
 399         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 400         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 401         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 402         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 403         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 404     )
 405
 406     for f in dsc_files:
 407         m = re_issource.match(f)
 408         if not m:
 409             rejmsg.append("%s: %s in Files field not recognised as source."
 410                           % (dsc_filename, f))
 411             continue
 412
 413         # Populate 'has' dictionary by resolving keys in lookup table
 414         matched = False
 415         for regex, keys in ftype_lookup:
 416             if re.match(regex, m.group(3)):
 417                 matched = True
 418                 for key in keys:
 419                     has[key] += 1
 420                 break
 421
 422         # File does not match anything in lookup table; reject
 423         if not matched:
 424             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 425
 426     # Check for multiple files
 427     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 428         if has[file_type] > 1:
 429             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 430
 431     # Source format specific tests
 432     try:
 433         format = get_format_from_string(dsc['format'])
 434         rejmsg.extend([
 435             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 436         ])
 437
 438     except UnknownFormatError:
 439         # Not an error here for now
 440         pass
 441
 442     return rejmsg
 443
 444 ################################################################################
 445
 446 def check_hash_fields(what, manifest):
 447     """
 448     check_hash_fields ensures that there are no checksum fields in the
 449     given dict that we do not know about.
 450     """
 451
 452     rejmsg = []
 453     hashes = map(lambda x: x[0], known_hashes)
 454     for field in manifest:
 455         if field.startswith("checksums-"):
 456             hashname = field.split("-",1)[1]
 457             if hashname not in hashes:
 458                 rejmsg.append("Unsupported checksum field for %s "\
 459                     "in %s" % (hashname, what))
 460     return rejmsg
 461
 462 ################################################################################
 463
 464 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 465     if format >= version:
 466         # The version should contain the specified hash.
 467         func = check_hash
 468
 469         # Import hashes from the changes
 470         rejmsg = parse_checksums(".changes", files, changes, hashname)
 471         if len(rejmsg) > 0:
 472             return rejmsg
 473     else:
 474         # We need to calculate the hash because it can't possibly
 475         # be in the file.
 476         func = create_hash
 477     return func(".changes", files, hashname, hashfunc)
 478
 479 # We could add the orig which might be in the pool to the files dict to
 480 # access the checksums easily.
 481
 482 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 483     """
 484     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 485     in the dsc is correct, i.e. identical to the changes file and if necessary
 486     the pool.  The latter task is delegated to check_hash.
 487     """
 488
 489     rejmsg = []
 490     if not dsc.has_key('Checksums-%s' % (hashname,)):
 491         return rejmsg
 492     # Import hashes from the dsc
 493     parse_checksums(".dsc", dsc_files, dsc, hashname)
 494     # And check it...
 495     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 496     return rejmsg
 497
 498 ################################################################################
 499
 500 def parse_checksums(where, files, manifest, hashname):
 501     rejmsg = []
 502     field = 'checksums-%s' % hashname
 503     if not field in manifest:
 504         return rejmsg
 505     for line in manifest[field].split('\n'):
 506         if not line:
 507             break
 508         clist = line.strip().split(' ')
 509         if len(clist) == 3:
 510             checksum, size, checkfile = clist
 511         else:
 512             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 513             continue
 514         if not files.has_key(checkfile):
 515         # TODO: check for the file's entry in the original files dict, not
 516         # the one modified by (auto)byhand and other weird stuff
 517         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 518         #        (file, hashname, where))
 519             continue
 520         if not files[checkfile]["size"] == size:
 521             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 522                 "in %s" % (checkfile, hashname, where))
 523             continue
 524         files[checkfile][hash_key(hashname)] = checksum
 525     for f in files.keys():
 526         if not files[f].has_key(hash_key(hashname)):
 527             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 528     return rejmsg
 529
 530 ################################################################################
 531
 532 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 533
 534 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 535     files = {}
 536
 537     # Make sure we have a Files: field to parse...
 538     if not changes.has_key(field):
 539         raise NoFilesFieldError
 540
 541     # Validate .changes Format: field
 542     if not is_a_dsc:
 543         validate_changes_format(parse_format(changes['format']), field)
 544
 545     includes_section = (not is_a_dsc) and field == "files"
 546
 547     # Parse each entry/line:
 548     for i in changes[field].split('\n'):
 549         if not i:
 550             break
 551         s = i.split()
 552         section = priority = ""
 553         try:
 554             if includes_section:
 555                 (md5, size, section, priority, name) = s
 556             else:
 557                 (md5, size, name) = s
 558         except ValueError:
 559             raise ParseChangesError(i)
 560
 561         if section == "":
 562             section = "-"
 563         if priority == "":
 564             priority = "-"
 565
 566         (section, component) = extract_component_from_section(section)
 567
 568         files[name] = dict(size=size, section=section,
 569                            priority=priority, component=component)
 570         files[name][hashname] = md5
 571
 572     return files
 573
 574 ################################################################################
 575
 576 # see http://bugs.debian.org/619131
 577 def build_package_list(dsc, session = None):
 578     if not dsc.has_key("package-list"):
 579         return {}
 580
 581     packages = {}
 582
 583     for line in dsc["package-list"].split("\n"):
 584         if not line:
 585             break
 586
 587         fields = line.split()
 588         name = fields[0]
 589         package_type = fields[1]
 590         (section, component) = extract_component_from_section(fields[2])
 591         priority = fields[3]
 592
 593         # Validate type if we have a session
 594         if session and get_override_type(package_type, session) is None:
 595             # Maybe just warn and ignore? exit(1) might be a bit hard...
 596             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 597
 598         if name not in packages or packages[name]["type"] == "dsc":
 599             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 600
 601     return packages
 602
 603 ################################################################################
 604
 605 def send_mail (message, filename=""):
 606     """sendmail wrapper, takes _either_ a message string or a file as arguments"""
 607
 608     maildir = Cnf.get('Dir::Mail')
 609     if maildir:
 610         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 611         path = find_next_free(path)
 612         fh = open(path, 'w')
 613         print >>fh, message,
 614         fh.close()
 615
 616     # Check whether we're supposed to be sending mail
 617     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 618         return
 619
 620     # If we've been passed a string dump it into a temporary file
 621     if message:
 622         (fd, filename) = tempfile.mkstemp()
 623         os.write (fd, message)
 624         os.close (fd)
 625
 626     if Cnf.has_key("Dinstall::MailWhiteList") and \
 627            Cnf["Dinstall::MailWhiteList"] != "":
 628         message_in = open_file(filename)
 629         message_raw = modemail.message_from_file(message_in)
 630         message_in.close();
 631
 632         whitelist = [];
 633         whitelist_in = open_file(Cnf["Dinstall::MailWhiteList"])
 634         try:
 635             for line in whitelist_in:
 636                 if not re_whitespace_comment.match(line):
 637                     if re_re_mark.match(line):
 638                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 639                     else:
 640                         whitelist.append(re.compile(re.escape(line.strip())))
 641         finally:
 642             whitelist_in.close()
 643
 644         # Fields to check.
 645         fields = ["To", "Bcc", "Cc"]
 646         for field in fields:
 647             # Check each field
 648             value = message_raw.get(field, None)
 649             if value != None:
 650                 match = [];
 651                 for item in value.split(","):
 652                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 653                     mail_whitelisted = 0
 654                     for wr in whitelist:
 655                         if wr.match(email):
 656                             mail_whitelisted = 1
 657                             break
 658                     if not mail_whitelisted:
 659                         print "Skipping %s since it's not in %s" % (item, Cnf["Dinstall::MailWhiteList"])
 660                         continue
 661                     match.append(item)
 662
 663                 # Doesn't have any mail in whitelist so remove the header
 664                 if len(match) == 0:
 665                     del message_raw[field]
 666                 else:
 667                     message_raw.replace_header(field, ', '.join(match))
 668
 669         # Change message fields in order if we don't have a To header
 670         if not message_raw.has_key("To"):
 671             fields.reverse()
 672             for field in fields:
 673                 if message_raw.has_key(field):
 674                     message_raw[fields[-1]] = message_raw[field]
 675                     del message_raw[field]
 676                     break
 677             else:
 678                 # Clean up any temporary files
 679                 # and return, as we removed all recipients.
 680                 if message:
 681                     os.unlink (filename);
 682                 return;
 683
 684         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 685         os.write (fd, message_raw.as_string(True));
 686         os.close (fd);
 687
 688     # Invoke sendmail
 689     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 690     if (result != 0):
 691         raise SendmailFailedError(output)
 692
 693     # Clean up any temporary files
 694     if message:
 695         os.unlink (filename)
 696
 697 ################################################################################
 698
 699 def poolify (source, component=None):
 700     if source[:3] == "lib":
 701         return source[:4] + '/' + source + '/'
 702     else:
 703         return source[:1] + '/' + source + '/'
 704
 705 ################################################################################
 706
 707 def move (src, dest, overwrite = 0, perms = 0o664):
 708     if os.path.exists(dest) and os.path.isdir(dest):
 709         dest_dir = dest
 710     else:
 711         dest_dir = os.path.dirname(dest)
 712     if not os.path.exists(dest_dir):
 713         umask = os.umask(00000)
 714         os.makedirs(dest_dir, 0o2775)
 715         os.umask(umask)
 716     #print "Moving %s to %s..." % (src, dest)
 717     if os.path.exists(dest) and os.path.isdir(dest):
 718         dest += '/' + os.path.basename(src)
 719     # Don't overwrite unless forced to
 720     if os.path.exists(dest):
 721         if not overwrite:
 722             fubar("Can't move %s to %s - file already exists." % (src, dest))
 723         else:
 724             if not os.access(dest, os.W_OK):
 725                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 726     shutil.copy2(src, dest)
 727     os.chmod(dest, perms)
 728     os.unlink(src)
 729
 730 def copy (src, dest, overwrite = 0, perms = 0o664):
 731     if os.path.exists(dest) and os.path.isdir(dest):
 732         dest_dir = dest
 733     else:
 734         dest_dir = os.path.dirname(dest)
 735     if not os.path.exists(dest_dir):
 736         umask = os.umask(00000)
 737         os.makedirs(dest_dir, 0o2775)
 738         os.umask(umask)
 739     #print "Copying %s to %s..." % (src, dest)
 740     if os.path.exists(dest) and os.path.isdir(dest):
 741         dest += '/' + os.path.basename(src)
 742     # Don't overwrite unless forced to
 743     if os.path.exists(dest):
 744         if not overwrite:
 745             raise FileExistsError
 746         else:
 747             if not os.access(dest, os.W_OK):
 748                 raise CantOverwriteError
 749     shutil.copy2(src, dest)
 750     os.chmod(dest, perms)
 751
 752 ################################################################################
 753
 754 def where_am_i ():
 755     res = socket.getfqdn()
 756     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 757     if database_hostname:
 758         return database_hostname
 759     else:
 760         return res
 761
 762 def which_conf_file ():
 763     if os.getenv('DAK_CONFIG'):
 764         return os.getenv('DAK_CONFIG')
 765
 766     res = socket.getfqdn()
 767     # In case we allow local config files per user, try if one exists
 768     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 769         homedir = os.getenv("HOME")
 770         confpath = os.path.join(homedir, "/etc/dak.conf")
 771         if os.path.exists(confpath):
 772             apt_pkg.ReadConfigFileISC(Cnf,confpath)
 773
 774     # We are still in here, so there is no local config file or we do
 775     # not allow local files. Do the normal stuff.
 776     if Cnf.get("Config::" + res + "::DakConfig"):
 777         return Cnf["Config::" + res + "::DakConfig"]
 778
 779     return default_config
 780
 781 def which_apt_conf_file ():
 782     res = socket.getfqdn()
 783     # In case we allow local config files per user, try if one exists
 784     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 785         homedir = os.getenv("HOME")
 786         confpath = os.path.join(homedir, "/etc/dak.conf")
 787         if os.path.exists(confpath):
 788             apt_pkg.ReadConfigFileISC(Cnf,default_config)
 789
 790     if Cnf.get("Config::" + res + "::AptConfig"):
 791         return Cnf["Config::" + res + "::AptConfig"]
 792     else:
 793         return default_apt_config
 794
 795 def which_alias_file():
 796     hostname = socket.getfqdn()
 797     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 798     if os.path.exists(aliasfn):
 799         return aliasfn
 800     else:
 801         return None
 802
 803 ################################################################################
 804
 805 def TemplateSubst(subst_map, filename):
 806     """ Perform a substition of template """
 807     templatefile = open_file(filename)
 808     template = templatefile.read()
 809     for k, v in subst_map.iteritems():
 810         template = template.replace(k, str(v))
 811     templatefile.close()
 812     return template
 813
 814 ################################################################################
 815
 816 def fubar(msg, exit_code=1):
 817     sys.stderr.write("E: %s\n" % (msg))
 818     sys.exit(exit_code)
 819
 820 def warn(msg):
 821     sys.stderr.write("W: %s\n" % (msg))
 822
 823 ################################################################################
 824
 825 # Returns the user name with a laughable attempt at rfc822 conformancy
 826 # (read: removing stray periods).
 827 def whoami ():
 828     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 829
 830 def getusername ():
 831     return pwd.getpwuid(os.getuid())[0]
 832
 833 ################################################################################
 834
 835 def size_type (c):
 836     t  = " B"
 837     if c > 10240:
 838         c = c / 1024
 839         t = " KB"
 840     if c > 10240:
 841         c = c / 1024
 842         t = " MB"
 843     return ("%d%s" % (c, t))
 844
 845 ################################################################################
 846
 847 def cc_fix_changes (changes):
 848     o = changes.get("architecture", "")
 849     if o:
 850         del changes["architecture"]
 851     changes["architecture"] = {}
 852     for j in o.split():
 853         changes["architecture"][j] = 1
 854
 855 def changes_compare (a, b):
 856     """ Sort by source name, source version, 'have source', and then by filename """
 857     try:
 858         a_changes = parse_changes(a)
 859     except:
 860         return -1
 861
 862     try:
 863         b_changes = parse_changes(b)
 864     except:
 865         return 1
 866
 867     cc_fix_changes (a_changes)
 868     cc_fix_changes (b_changes)
 869
 870     # Sort by source name
 871     a_source = a_changes.get("source")
 872     b_source = b_changes.get("source")
 873     q = cmp (a_source, b_source)
 874     if q:
 875         return q
 876
 877     # Sort by source version
 878     a_version = a_changes.get("version", "0")
 879     b_version = b_changes.get("version", "0")
 880     q = apt_pkg.version_compare(a_version, b_version)
 881     if q:
 882         return q
 883
 884     # Sort by 'have source'
 885     a_has_source = a_changes["architecture"].get("source")
 886     b_has_source = b_changes["architecture"].get("source")
 887     if a_has_source and not b_has_source:
 888         return -1
 889     elif b_has_source and not a_has_source:
 890         return 1
 891
 892     # Fall back to sort by filename
 893     return cmp(a, b)
 894
 895 ################################################################################
 896
 897 def find_next_free (dest, too_many=100):
 898     extra = 0
 899     orig_dest = dest
 900     while os.path.exists(dest) and extra < too_many:
 901         dest = orig_dest + '.' + repr(extra)
 902         extra += 1
 903     if extra >= too_many:
 904         raise NoFreeFilenameError
 905     return dest
 906
 907 ################################################################################
 908
 909 def result_join (original, sep = '\t'):
 910     resultlist = []
 911     for i in xrange(len(original)):
 912         if original[i] == None:
 913             resultlist.append("")
 914         else:
 915             resultlist.append(original[i])
 916     return sep.join(resultlist)
 917
 918 ################################################################################
 919
 920 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 921     out = ""
 922     for line in str.split('\n'):
 923         line = line.strip()
 924         if line or include_blank_lines:
 925             out += "%s%s\n" % (prefix, line)
 926     # Strip trailing new line
 927     if out:
 928         out = out[:-1]
 929     return out
 930
 931 ################################################################################
 932
 933 def validate_changes_file_arg(filename, require_changes=1):
 934     """
 935     'filename' is either a .changes or .dak file.  If 'filename' is a
 936     .dak file, it's changed to be the corresponding .changes file.  The
 937     function then checks if the .changes file a) exists and b) is
 938     readable and returns the .changes filename if so.  If there's a
 939     problem, the next action depends on the option 'require_changes'
 940     argument:
 941
 942       - If 'require_changes' == -1, errors are ignored and the .changes
 943         filename is returned.
 944       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 945       - If 'require_changes' == 1, a fatal error is raised.
 946
 947     """
 948     error = None
 949
 950     orig_filename = filename
 951     if filename.endswith(".dak"):
 952         filename = filename[:-4]+".changes"
 953
 954     if not filename.endswith(".changes"):
 955         error = "invalid file type; not a changes file"
 956     else:
 957         if not os.access(filename,os.R_OK):
 958             if os.path.exists(filename):
 959                 error = "permission denied"
 960             else:
 961                 error = "file not found"
 962
 963     if error:
 964         if require_changes == 1:
 965             fubar("%s: %s." % (orig_filename, error))
 966         elif require_changes == 0:
 967             warn("Skipping %s - %s" % (orig_filename, error))
 968             return None
 969         else: # We only care about the .dak file
 970             return filename
 971     else:
 972         return filename
 973
 974 ################################################################################
 975
 976 def real_arch(arch):
 977     return (arch != "source" and arch != "all")
 978
 979 ################################################################################
 980
 981 def join_with_commas_and(list):
 982     if len(list) == 0: return "nothing"
 983     if len(list) == 1: return list[0]
 984     return ", ".join(list[:-1]) + " and " + list[-1]
 985
 986 ################################################################################
 987
 988 def pp_deps (deps):
 989     pp_deps = []
 990     for atom in deps:
 991         (pkg, version, constraint) = atom
 992         if constraint:
 993             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 994         else:
 995             pp_dep = pkg
 996         pp_deps.append(pp_dep)
 997     return " |".join(pp_deps)
 998
 999 ################################################################################
1000
1001 def get_conf():
1002     return Cnf
1003
1004 ################################################################################
1005
1006 def parse_args(Options):
1007     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1008     # XXX: This should go away and everything which calls it be converted
1009     #      to use SQLA properly.  For now, we'll just fix it not to use
1010     #      the old Pg interface though
1011     session = DBConn().session()
1012     # Process suite
1013     if Options["Suite"]:
1014         suite_ids_list = []
1015         for suitename in split_args(Options["Suite"]):
1016             suite = get_suite(suitename, session=session)
1017             if not suite or suite.suite_id is None:
1018                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1019             else:
1020                 suite_ids_list.append(suite.suite_id)
1021         if suite_ids_list:
1022             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1023         else:
1024             fubar("No valid suite given.")
1025     else:
1026         con_suites = ""
1027
1028     # Process component
1029     if Options["Component"]:
1030         component_ids_list = []
1031         for componentname in split_args(Options["Component"]):
1032             component = get_component(componentname, session=session)
1033             if component is None:
1034                 warn("component '%s' not recognised." % (componentname))
1035             else:
1036                 component_ids_list.append(component.component_id)
1037         if component_ids_list:
1038             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1039         else:
1040             fubar("No valid component given.")
1041     else:
1042         con_components = ""
1043
1044     # Process architecture
1045     con_architectures = ""
1046     check_source = 0
1047     if Options["Architecture"]:
1048         arch_ids_list = []
1049         for archname in split_args(Options["Architecture"]):
1050             if archname == "source":
1051                 check_source = 1
1052             else:
1053                 arch = get_architecture(archname, session=session)
1054                 if arch is None:
1055                     warn("architecture '%s' not recognised." % (archname))
1056                 else:
1057                     arch_ids_list.append(arch.arch_id)
1058         if arch_ids_list:
1059             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1060         else:
1061             if not check_source:
1062                 fubar("No valid architecture given.")
1063     else:
1064         check_source = 1
1065
1066     return (con_suites, con_architectures, con_components, check_source)
1067
1068 ################################################################################
1069
1070 def arch_compare_sw (a, b):
1071     """
1072     Function for use in sorting lists of architectures.
1073
1074     Sorts normally except that 'source' dominates all others.
1075     """
1076
1077     if a == "source" and b == "source":
1078         return 0
1079     elif a == "source":
1080         return -1
1081     elif b == "source":
1082         return 1
1083
1084     return cmp (a, b)
1085
1086 ################################################################################
1087
1088 def split_args (s, dwim=1):
1089     """
1090     Split command line arguments which can be separated by either commas
1091     or whitespace.  If dwim is set, it will complain about string ending
1092     in comma since this usually means someone did 'dak ls -a i386, m68k
1093     foo' or something and the inevitable confusion resulting from 'm68k'
1094     being treated as an argument is undesirable.
1095     """
1096
1097     if s.find(",") == -1:
1098         return s.split()
1099     else:
1100         if s[-1:] == "," and dwim:
1101             fubar("split_args: found trailing comma, spurious space maybe?")
1102         return s.split(",")
1103
1104 ################################################################################
1105
1106 def gpgv_get_status_output(cmd, status_read, status_write):
1107     """
1108     Our very own version of commands.getouputstatus(), hacked to support
1109     gpgv's status fd.
1110     """
1111
1112     cmd = ['/bin/sh', '-c', cmd]
1113     p2cread, p2cwrite = os.pipe()
1114     c2pread, c2pwrite = os.pipe()
1115     errout, errin = os.pipe()
1116     pid = os.fork()
1117     if pid == 0:
1118         # Child
1119         os.close(0)
1120         os.close(1)
1121         os.dup(p2cread)
1122         os.dup(c2pwrite)
1123         os.close(2)
1124         os.dup(errin)
1125         for i in range(3, 256):
1126             if i != status_write:
1127                 try:
1128                     os.close(i)
1129                 except:
1130                     pass
1131         try:
1132             os.execvp(cmd[0], cmd)
1133         finally:
1134             os._exit(1)
1135
1136     # Parent
1137     os.close(p2cread)
1138     os.dup2(c2pread, c2pwrite)
1139     os.dup2(errout, errin)
1140
1141     output = status = ""
1142     while 1:
1143         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1144         more_data = []
1145         for fd in i:
1146             r = os.read(fd, 8196)
1147             if len(r) > 0:
1148                 more_data.append(fd)
1149                 if fd == c2pwrite or fd == errin:
1150                     output += r
1151                 elif fd == status_read:
1152                     status += r
1153                 else:
1154                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1155         if not more_data:
1156             pid, exit_status = os.waitpid(pid, 0)
1157             try:
1158                 os.close(status_write)
1159                 os.close(status_read)
1160                 os.close(c2pread)
1161                 os.close(c2pwrite)
1162                 os.close(p2cwrite)
1163                 os.close(errin)
1164                 os.close(errout)
1165             except:
1166                 pass
1167             break
1168
1169     return output, status, exit_status
1170
1171 ################################################################################
1172
1173 def process_gpgv_output(status):
1174     # Process the status-fd output
1175     keywords = {}
1176     internal_error = ""
1177     for line in status.split('\n'):
1178         line = line.strip()
1179         if line == "":
1180             continue
1181         split = line.split()
1182         if len(split) < 2:
1183             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1184             continue
1185         (gnupg, keyword) = split[:2]
1186         if gnupg != "[GNUPG:]":
1187             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1188             continue
1189         args = split[2:]
1190         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1191             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1192             continue
1193         else:
1194             keywords[keyword] = args
1195
1196     return (keywords, internal_error)
1197
1198 ################################################################################
1199
1200 def retrieve_key (filename, keyserver=None, keyring=None):
1201     """
1202     Retrieve the key that signed 'filename' from 'keyserver' and
1203     add it to 'keyring'.  Returns nothing on success, or an error message
1204     on error.
1205     """
1206
1207     # Defaults for keyserver and keyring
1208     if not keyserver:
1209         keyserver = Cnf["Dinstall::KeyServer"]
1210     if not keyring:
1211         keyring = get_primary_keyring_path()
1212
1213     # Ensure the filename contains no shell meta-characters or other badness
1214     if not re_taint_free.match(filename):
1215         return "%s: tainted filename" % (filename)
1216
1217     # Invoke gpgv on the file
1218     status_read, status_write = os.pipe()
1219     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1220     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1221
1222     # Process the status-fd output
1223     (keywords, internal_error) = process_gpgv_output(status)
1224     if internal_error:
1225         return internal_error
1226
1227     if not keywords.has_key("NO_PUBKEY"):
1228         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1229
1230     fingerprint = keywords["NO_PUBKEY"][0]
1231     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1232     # it'll try to create a lockfile in /dev.  A better solution might
1233     # be a tempfile or something.
1234     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1235           % (Cnf["Dinstall::SigningKeyring"])
1236     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1237            % (keyring, keyserver, fingerprint)
1238     (result, output) = commands.getstatusoutput(cmd)
1239     if (result != 0):
1240         return "'%s' failed with exit code %s" % (cmd, result)
1241
1242     return ""
1243
1244 ################################################################################
1245
1246 def gpg_keyring_args(keyrings=None):
1247     if not keyrings:
1248         keyrings = get_active_keyring_paths()
1249
1250     return " ".join(["--keyring %s" % x for x in keyrings])
1251
1252 ################################################################################
1253 @session_wrapper
1254 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1255     """
1256     Check the signature of a file and return the fingerprint if the
1257     signature is valid or 'None' if it's not.  The first argument is the
1258     filename whose signature should be checked.  The second argument is a
1259     reject function and is called when an error is found.  The reject()
1260     function must allow for two arguments: the first is the error message,
1261     the second is an optional prefix string.  It's possible for reject()
1262     to be called more than once during an invocation of check_signature().
1263     The third argument is optional and is the name of the files the
1264     detached signature applies to.  The fourth argument is optional and is
1265     a *list* of keyrings to use.  'autofetch' can either be None, True or
1266     False.  If None, the default behaviour specified in the config will be
1267     used.
1268     """
1269
1270     rejects = []
1271
1272     # Ensure the filename contains no shell meta-characters or other badness
1273     if not re_taint_free.match(sig_filename):
1274         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1275         return (None, rejects)
1276
1277     if data_filename and not re_taint_free.match(data_filename):
1278         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1279         return (None, rejects)
1280
1281     if not keyrings:
1282         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1283
1284     # Autofetch the signing key if that's enabled
1285     if autofetch == None:
1286         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1287     if autofetch:
1288         error_msg = retrieve_key(sig_filename)
1289         if error_msg:
1290             rejects.append(error_msg)
1291             return (None, rejects)
1292
1293     # Build the command line
1294     status_read, status_write = os.pipe()
1295     cmd = "gpgv --status-fd %s %s %s %s" % (
1296         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1297
1298     # Invoke gpgv on the file
1299     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1300
1301     # Process the status-fd output
1302     (keywords, internal_error) = process_gpgv_output(status)
1303
1304     # If we failed to parse the status-fd output, let's just whine and bail now
1305     if internal_error:
1306         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1307         rejects.append(internal_error, "")
1308         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1309         return (None, rejects)
1310
1311     # Now check for obviously bad things in the processed output
1312     if keywords.has_key("KEYREVOKED"):
1313         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1314     if keywords.has_key("BADSIG"):
1315         rejects.append("bad signature on %s." % (sig_filename))
1316     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1317         rejects.append("failed to check signature on %s." % (sig_filename))
1318     if keywords.has_key("NO_PUBKEY"):
1319         args = keywords["NO_PUBKEY"]
1320         if len(args) >= 1:
1321             key = args[0]
1322         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1323     if keywords.has_key("BADARMOR"):
1324         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1325     if keywords.has_key("NODATA"):
1326         rejects.append("no signature found in %s." % (sig_filename))
1327     if keywords.has_key("EXPKEYSIG"):
1328         args = keywords["EXPKEYSIG"]
1329         if len(args) >= 1:
1330             key = args[0]
1331         rejects.append("Signature made by expired key 0x%s" % (key))
1332     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1333         args = keywords["KEYEXPIRED"]
1334         expiredate=""
1335         if len(args) >= 1:
1336             timestamp = args[0]
1337             if timestamp.count("T") == 0:
1338                 try:
1339                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1340                 except ValueError:
1341                     expiredate = "unknown (%s)" % (timestamp)
1342             else:
1343                 expiredate = timestamp
1344         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1345
1346     if len(rejects) > 0:
1347         return (None, rejects)
1348
1349     # Next check gpgv exited with a zero return code
1350     if exit_status:
1351         rejects.append("gpgv failed while checking %s." % (sig_filename))
1352         if status.strip():
1353             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1354         else:
1355             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1356         return (None, rejects)
1357
1358     # Sanity check the good stuff we expect
1359     if not keywords.has_key("VALIDSIG"):
1360         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1361     else:
1362         args = keywords["VALIDSIG"]
1363         if len(args) < 1:
1364             rejects.append("internal error while checking signature on %s." % (sig_filename))
1365         else:
1366             fingerprint = args[0]
1367     if not keywords.has_key("GOODSIG"):
1368         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1369     if not keywords.has_key("SIG_ID"):
1370         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1371
1372     # Finally ensure there's not something we don't recognise
1373     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1374                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1375                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1376
1377     for keyword in keywords.keys():
1378         if not known_keywords.has_key(keyword):
1379             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1380
1381     if len(rejects) > 0:
1382         return (None, rejects)
1383     else:
1384         return (fingerprint, [])
1385
1386 ################################################################################
1387
1388 def gpg_get_key_addresses(fingerprint):
1389     """retreive email addresses from gpg key uids for a given fingerprint"""
1390     addresses = key_uid_email_cache.get(fingerprint)
1391     if addresses != None:
1392         return addresses
1393     addresses = list()
1394     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1395                 % (gpg_keyring_args(), fingerprint)
1396     (result, output) = commands.getstatusoutput(cmd)
1397     if result == 0:
1398         for l in output.split('\n'):
1399             m = re_gpg_uid.match(l)
1400             if not m:
1401                 continue
1402             address = m.group(1)
1403             if address.endswith('@debian.org'):
1404                 # prefer @debian.org addresses
1405                 # TODO: maybe not hardcode the domain
1406                 addresses.insert(0, address)
1407             else:
1408                 addresses.append(m.group(1))
1409     key_uid_email_cache[fingerprint] = addresses
1410     return addresses
1411
1412 ################################################################################
1413
1414 def get_logins_from_ldap(fingerprint='*'):
1415     """retrieve login from LDAP linked to a given fingerprint"""
1416
1417     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1418     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1419     l = ldap.open(LDAPServer)
1420     l.simple_bind_s('','')
1421     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1422                        '(keyfingerprint=%s)' % fingerprint,
1423                        ['uid', 'keyfingerprint'])
1424     login = {}
1425     for elem in Attrs:
1426         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1427     return login
1428
1429 ################################################################################
1430
1431 def clean_symlink (src, dest, root):
1432     """
1433     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1434     Returns fixed 'src'
1435     """
1436     src = src.replace(root, '', 1)
1437     dest = dest.replace(root, '', 1)
1438     dest = os.path.dirname(dest)
1439     new_src = '../' * len(dest.split('/'))
1440     return new_src + src
1441
1442 ################################################################################
1443
1444 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1445     """
1446     Return a secure and unique filename by pre-creating it.
1447     If 'directory' is non-null, it will be the directory the file is pre-created in.
1448     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1449     If 'suffix' is non-null, the filename will end with it.
1450
1451     Returns a pair (fd, name).
1452     """
1453
1454     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1455     if mode:
1456         os.chmod(tfname, mode)
1457     if group:
1458         os.chown(tfname, -1, group)
1459     return (tfd, tfname)
1460
1461 ################################################################################
1462
1463 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1464     """
1465     Return a secure and unique directory by pre-creating it.
1466     If 'parent' is non-null, it will be the directory the directory is pre-created in.
1467     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1468     If 'suffix' is non-null, the filename will end with it.
1469
1470     Returns a pathname to the new directory
1471     """
1472
1473     (tfd, tfname) = tempfile.mkdtemp(suffix, prefix, parent)
1474     if mode:
1475         os.chmod(tfname, mode)
1476     if group:
1477         os.chown(tfname, -1, group)
1478     return (tfd, tfname)
1479
1480 ################################################################################
1481
1482 def is_email_alias(email):
1483     """ checks if the user part of the email is listed in the alias file """
1484     global alias_cache
1485     if alias_cache == None:
1486         aliasfn = which_alias_file()
1487         alias_cache = set()
1488         if aliasfn:
1489             for l in open(aliasfn):
1490                 alias_cache.add(l.split(':')[0])
1491     uid = email.split('@')[0]
1492     return uid in alias_cache
1493
1494 ################################################################################
1495
1496 def get_changes_files(from_dir):
1497     """
1498     Takes a directory and lists all .changes files in it (as well as chdir'ing
1499     to the directory; this is due to broken behaviour on the part of p-u/p-a
1500     when you're not in the right place)
1501
1502     Returns a list of filenames
1503     """
1504     try:
1505         # Much of the rest of p-u/p-a depends on being in the right place
1506         os.chdir(from_dir)
1507         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1508     except OSError as e:
1509         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1510
1511     return changes_files
1512
1513 ################################################################################
1514
1515 apt_pkg.init()
1516
1517 Cnf = apt_pkg.Configuration()
1518 if not os.getenv("DAK_TEST"):
1519     apt_pkg.read_config_file_isc(Cnf,default_config)
1520
1521 if which_conf_file() != default_config:
1522     apt_pkg.read_config_file_isc(Cnf,which_conf_file())
1523
1524 ################################################################################
1525
1526 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1527     """
1528     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1529     Well, actually it parsed a local copy, but let's document the source
1530     somewhere ;)
1531
1532     returns a dict associating source package name with a list of open wnpp
1533     bugs (Yes, there might be more than one)
1534     """
1535
1536     line = []
1537     try:
1538         f = open(file)
1539         lines = f.readlines()
1540     except IOError as e:
1541         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1542         lines = []
1543     wnpp = {}
1544
1545     for line in lines:
1546         splited_line = line.split(": ", 1)
1547         if len(splited_line) > 1:
1548             wnpp[splited_line[0]] = splited_line[1].split("|")
1549
1550     for source in wnpp.keys():
1551         bugs = []
1552         for wnpp_bug in wnpp[source]:
1553             bug_no = re.search("(\d)+", wnpp_bug).group()
1554             if bug_no:
1555                 bugs.append(bug_no)
1556         wnpp[source] = bugs
1557     return wnpp
1558
1559 ################################################################################
1560
1561 def get_packages_from_ftp(root, suite, component, architecture):
1562     """
1563     Returns an object containing apt_pkg-parseable data collected by
1564     aggregating Packages.gz files gathered for each architecture.
1565
1566     @type root: string
1567     @param root: path to ftp archive root directory
1568
1569     @type suite: string
1570     @param suite: suite to extract files from
1571
1572     @type component: string
1573     @param component: component to extract files from
1574
1575     @type architecture: string
1576     @param architecture: architecture to extract files from
1577
1578     @rtype: TagFile
1579     @return: apt_pkg class containing package data
1580     """
1581     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1582     (fd, temp_file) = temp_filename()
1583     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1584     if (result != 0):
1585         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1586     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1587     if os.path.exists(filename):
1588         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1589         if (result != 0):
1590             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1591     packages = open_file(temp_file)
1592     Packages = apt_pkg.ParseTagFile(packages)
1593     os.unlink(temp_file)
1594     return Packages
1595
1596 ################################################################################
1597
1598 def deb_extract_control(fh):
1599     """extract DEBIAN/control from a binary package"""
1600     return apt_inst.DebFile(fh).control.extractdata("control")
1601
1602 ################################################################################
1603
1604 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1605     """mail addresses to contact for an upload
1606
1607     @type  maintainer: str
1608     @param maintainer: Maintainer field of the .changes file
1609
1610     @type  changed_by: str
1611     @param changed_by: Changed-By field of the .changes file
1612
1613     @type  fingerprint: str
1614     @param fingerprint: fingerprint of the key used to sign the upload
1615
1616     @rtype:  list of str
1617     @return: list of RFC 2047-encoded mail addresses to contact regarding
1618              this upload
1619     """
1620     addresses = [maintainer]
1621     if changed_by != maintainer:
1622         addresses.append(changed_by)
1623
1624     fpr_addresses = gpg_get_key_addresses(fingerprint)
1625     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1626         addresses.append(fpr_addresses[0])
1627
1628     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1629     return encoded_addresses
1630
1631 ################################################################################
1632
1633 def call_editor(text="", suffix=".txt"):
1634     """run editor and return the result as a string
1635
1636     @type  text: str
1637     @param text: initial text
1638
1639     @type  suffix: str
1640     @param suffix: extension for temporary file
1641
1642     @rtype:  str
1643     @return: string with the edited text
1644     """
1645     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1646     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1647     try:
1648         print >>tmp, text,
1649         tmp.close()
1650         subprocess.check_call([editor, tmp.name])
1651         return open(tmp.name, 'r').read()
1652     finally:
1653         os.unlink(tmp.name)
1654
1655 ################################################################################
1656
1657 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1658     dbsuite = get_suite(suite, session)
1659     overridesuite = dbsuite
1660     if dbsuite.overridesuite is not None:
1661         overridesuite = get_suite(dbsuite.overridesuite, session)
1662     dep_problem = 0
1663     p2c = {}
1664     all_broken = {}
1665     if arches:
1666         all_arches = set(arches)
1667     else:
1668         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1669     all_arches -= set(["source", "all"])
1670     metakey_d = get_or_set_metadatakey("Depends", session)
1671     metakey_p = get_or_set_metadatakey("Provides", session)
1672     params = {
1673         'suite_id':     dbsuite.suite_id,
1674         'metakey_d_id': metakey_d.key_id,
1675         'metakey_p_id': metakey_p.key_id,
1676     }
1677     for architecture in all_arches | set(['all']):
1678         deps = {}
1679         sources = {}
1680         virtual_packages = {}
1681         params['arch_id'] = get_architecture(architecture, session).arch_id
1682
1683         statement = '''
1684             SELECT b.id, b.package, s.source, c.name as component,
1685                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1686                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1687                 FROM binaries b
1688                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1689                 JOIN source s ON b.source = s.id
1690                 JOIN files_archive_map af ON b.file = af.file_id
1691                 JOIN component c ON af.component_id = c.id
1692                 WHERE b.architecture = :arch_id'''
1693         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1694             from_statement(statement).params(params)
1695         for binary_id, package, source, component, depends, provides in query:
1696             sources[package] = source
1697             p2c[package] = component
1698             if depends is not None:
1699                 deps[package] = depends
1700             # Maintain a counter for each virtual package.  If a
1701             # Provides: exists, set the counter to 0 and count all
1702             # provides by a package not in the list for removal.
1703             # If the counter stays 0 at the end, we know that only
1704             # the to-be-removed packages provided this virtual
1705             # package.
1706             if provides is not None:
1707                 for virtual_pkg in provides.split(","):
1708                     virtual_pkg = virtual_pkg.strip()
1709                     if virtual_pkg == package: continue
1710                     if not virtual_packages.has_key(virtual_pkg):
1711                         virtual_packages[virtual_pkg] = 0
1712                     if package not in removals:
1713                         virtual_packages[virtual_pkg] += 1
1714
1715         # If a virtual package is only provided by the to-be-removed
1716         # packages, treat the virtual package as to-be-removed too.
1717         for virtual_pkg in virtual_packages.keys():
1718             if virtual_packages[virtual_pkg] == 0:
1719                 removals.append(virtual_pkg)
1720
1721         # Check binary dependencies (Depends)
1722         for package in deps.keys():
1723             if package in removals: continue
1724             parsed_dep = []
1725             try:
1726                 parsed_dep += apt_pkg.ParseDepends(deps[package])
1727             except ValueError as e:
1728                 print "Error for package %s: %s" % (package, e)
1729             for dep in parsed_dep:
1730                 # Check for partial breakage.  If a package has a ORed
1731                 # dependency, there is only a dependency problem if all
1732                 # packages in the ORed depends will be removed.
1733                 unsat = 0
1734                 for dep_package, _, _ in dep:
1735                     if dep_package in removals:
1736                         unsat += 1
1737                 if unsat == len(dep):
1738                     component = p2c[package]
1739                     source = sources[package]
1740                     if component != "main":
1741                         source = "%s/%s" % (source, component)
1742                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1743                     dep_problem = 1
1744
1745     if all_broken:
1746         if cruft:
1747             print "  - broken Depends:"
1748         else:
1749             print "# Broken Depends:"
1750         for source, bindict in sorted(all_broken.items()):
1751             lines = []
1752             for binary, arches in sorted(bindict.items()):
1753                 if arches == all_arches or 'all' in arches:
1754                     lines.append(binary)
1755                 else:
1756                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1757             if cruft:
1758                 print '    %s: %s' % (source, lines[0])
1759             else:
1760                 print '%s: %s' % (source, lines[0])
1761             for line in lines[1:]:
1762                 if cruft:
1763                     print '    ' + ' ' * (len(source) + 2) + line
1764                 else:
1765                     print ' ' * (len(source) + 2) + line
1766         if not cruft:
1767             print
1768
1769     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1770     all_broken.clear()
1771     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1772     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1773     params = {
1774         'suite_id':    dbsuite.suite_id,
1775         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1776     }
1777     statement = '''
1778         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1779            FROM source s
1780            JOIN source_metadata sm ON s.id = sm.src_id
1781            WHERE s.id in
1782                (SELECT source FROM src_associations
1783                    WHERE suite = :suite_id)
1784                AND sm.key_id in :metakey_ids
1785            GROUP BY s.id, s.source'''
1786     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1787         params(params)
1788     for source_id, source, build_dep in query:
1789         if source in removals: continue
1790         parsed_dep = []
1791         if build_dep is not None:
1792             # Remove [arch] information since we want to see breakage on all arches
1793             build_dep = re_build_dep_arch.sub("", build_dep)
1794             try:
1795                 parsed_dep += apt_pkg.ParseDepends(build_dep)
1796             except ValueError as e:
1797                 print "Error for source %s: %s" % (source, e)
1798         for dep in parsed_dep:
1799             unsat = 0
1800             for dep_package, _, _ in dep:
1801                 if dep_package in removals:
1802                     unsat += 1
1803             if unsat == len(dep):
1804                 component, = session.query(Component.component_name) \
1805                     .join(Component.overrides) \
1806                     .filter(Override.suite == overridesuite) \
1807                     .filter(Override.package == source) \
1808                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1809                     .first()
1810                 if component != "main":
1811                     source = "%s/%s" % (source, component)
1812                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1813                 dep_problem = 1
1814
1815     if all_broken:
1816         if cruft:
1817             print "  - broken Build-Depends:"
1818         else:
1819             print "# Broken Build-Depends:"
1820         for source, bdeps in sorted(all_broken.items()):
1821             bdeps = sorted(bdeps)
1822             if cruft:
1823                 print '    %s: %s' % (source, bdeps[0])
1824             else:
1825                 print '%s: %s' % (source, bdeps[0])
1826             for bdep in bdeps[1:]:
1827                 if cruft:
1828                     print '    ' + ' ' * (len(source) + 2) + bdep
1829                 else:
1830                     print ' ' * (len(source) + 2) + bdep
1831         if not cruft:
1832             print
1833
1834     return dep_problem