daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import codecs
  27 import datetime
  28 import email.Header
  29 import os
  30 import pwd
  31 import grp
  32 import select
  33 import socket
  34 import shutil
  35 import sys
  36 import tempfile
  37 import traceback
  38 import stat
  39 import apt_inst
  40 import apt_pkg
  41 import time
  42 import re
  43 import email as modemail
  44 import subprocess
  45 import ldap
  46 import errno
  47
  48 import daklib.config as config
  49 import daklib.daksubprocess
  50 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  51                    get_override_type, Keyring, session_wrapper, \
  52                    get_active_keyring_paths, get_primary_keyring_path, \
  53                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  54                    Component, Override, OverrideType
  55 from sqlalchemy import desc
  56 from dak_exceptions import *
  57 from gpg import SignedFile
  58 from textutils import fix_maintainer
  59 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  60                     re_multi_line_field, re_srchasver, re_taint_free, \
  61                     re_re_mark, re_whitespace_comment, re_issource, \
  62                     re_is_orig_source, re_build_dep_arch, re_parse_maintainer
  63
  64 from formats import parse_format, validate_changes_format
  65 from srcformats import get_format_from_string
  66 from collections import defaultdict
  67
  68 ################################################################################
  69
  70 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  71
  72 alias_cache = None        #: Cache for email alias checks
  73 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  74
  75 # (hashname, function, earliest_changes_version)
  76 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  77                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  78
  79 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  80 # code in lenny's Python. This also affects commands.getoutput and
  81 # commands.getstatus.
  82 def dak_getstatusoutput(cmd):
  83     pipe = daklib.daksubprocess.Popen(cmd, shell=True, universal_newlines=True,
  84         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  85
  86     output = pipe.stdout.read()
  87
  88     pipe.wait()
  89
  90     if output[-1:] == '\n':
  91         output = output[:-1]
  92
  93     ret = pipe.wait()
  94     if ret is None:
  95         ret = 0
  96
  97     return ret, output
  98 commands.getstatusoutput = dak_getstatusoutput
  99
 100 ################################################################################
 101
 102 def html_escape(s):
 103     """ Escape html chars """
 104     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 105
 106 ################################################################################
 107
 108 def open_file(filename, mode='r'):
 109     """
 110     Open C{file}, return fileobject.
 111
 112     @type filename: string
 113     @param filename: path/filename to open
 114
 115     @type mode: string
 116     @param mode: open mode
 117
 118     @rtype: fileobject
 119     @return: open fileobject
 120
 121     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 122
 123     """
 124     try:
 125         f = open(filename, mode)
 126     except IOError:
 127         raise CantOpenError(filename)
 128     return f
 129
 130 ################################################################################
 131
 132 def our_raw_input(prompt=""):
 133     if prompt:
 134         while 1:
 135             try:
 136                 sys.stdout.write(prompt)
 137                 break
 138             except IOError:
 139                 pass
 140     sys.stdout.flush()
 141     try:
 142         ret = raw_input()
 143         return ret
 144     except EOFError:
 145         sys.stderr.write("\nUser interrupt (^D).\n")
 146         raise SystemExit
 147
 148 ################################################################################
 149
 150 def extract_component_from_section(section, session=None):
 151     component = ""
 152
 153     if section.find('/') != -1:
 154         component = section.split('/')[0]
 155
 156     # Expand default component
 157     if component == "":
 158         component = "main"
 159
 160     return (section, component)
 161
 162 ################################################################################
 163
 164 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 165     require_signature = True
 166     if keyrings == None:
 167         keyrings = []
 168         require_signature = False
 169
 170     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 171     contents = signed_file.contents
 172
 173     error = ""
 174     changes = {}
 175
 176     # Split the lines in the input, keeping the linebreaks.
 177     lines = contents.splitlines(True)
 178
 179     if len(lines) == 0:
 180         raise ParseChangesError("[Empty changes file]")
 181
 182     # Reindex by line number so we can easily verify the format of
 183     # .dsc files...
 184     index = 0
 185     indexed_lines = {}
 186     for line in lines:
 187         index += 1
 188         indexed_lines[index] = line[:-1]
 189
 190     num_of_lines = len(indexed_lines.keys())
 191     index = 0
 192     first = -1
 193     while index < num_of_lines:
 194         index += 1
 195         line = indexed_lines[index]
 196         if line == "" and signing_rules == 1:
 197             if index != num_of_lines:
 198                 raise InvalidDscError(index)
 199             break
 200         slf = re_single_line_field.match(line)
 201         if slf:
 202             field = slf.groups()[0].lower()
 203             changes[field] = slf.groups()[1]
 204             first = 1
 205             continue
 206         if line == " .":
 207             changes[field] += '\n'
 208             continue
 209         mlf = re_multi_line_field.match(line)
 210         if mlf:
 211             if first == -1:
 212                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 213             if first == 1 and changes[field] != "":
 214                 changes[field] += '\n'
 215             first = 0
 216             changes[field] += mlf.groups()[0] + '\n'
 217             continue
 218         error += line
 219
 220     changes["filecontents"] = armored_contents
 221
 222     if changes.has_key("source"):
 223         # Strip the source version in brackets from the source field,
 224         # put it in the "source-version" field instead.
 225         srcver = re_srchasver.search(changes["source"])
 226         if srcver:
 227             changes["source"] = srcver.group(1)
 228             changes["source-version"] = srcver.group(2)
 229
 230     if error:
 231         raise ParseChangesError(error)
 232
 233     return changes
 234
 235 ################################################################################
 236
 237 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 238     """
 239     Parses a changes file and returns a dictionary where each field is a
 240     key.  The mandatory first argument is the filename of the .changes
 241     file.
 242
 243     signing_rules is an optional argument:
 244
 245       - If signing_rules == -1, no signature is required.
 246       - If signing_rules == 0 (the default), a signature is required.
 247       - If signing_rules == 1, it turns on the same strict format checking
 248         as dpkg-source.
 249
 250     The rules for (signing_rules == 1)-mode are:
 251
 252       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 253         followed by any PGP header data and must end with a blank line.
 254
 255       - The data section must end with a blank line and must be followed by
 256         "-----BEGIN PGP SIGNATURE-----".
 257     """
 258
 259     with open_file(filename) as changes_in:
 260         content = changes_in.read()
 261     try:
 262         unicode(content, 'utf-8')
 263     except UnicodeError:
 264         raise ChangesUnicodeError("Changes file not proper utf-8")
 265     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 266
 267
 268     if not dsc_file:
 269         # Finally ensure that everything needed for .changes is there
 270         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 271                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 272
 273         missingfields=[]
 274         for keyword in must_keywords:
 275             if not changes.has_key(keyword.lower()):
 276                 missingfields.append(keyword)
 277
 278                 if len(missingfields):
 279                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 280
 281     return changes
 282
 283 ################################################################################
 284
 285 def hash_key(hashname):
 286     return '%ssum' % hashname
 287
 288 ################################################################################
 289
 290 def create_hash(where, files, hashname, hashfunc):
 291     """
 292     create_hash extends the passed files dict with the given hash by
 293     iterating over all files on disk and passing them to the hashing
 294     function given.
 295     """
 296
 297     rejmsg = []
 298     for f in files.keys():
 299         try:
 300             file_handle = open_file(f)
 301         except CantOpenError:
 302             rejmsg.append("Could not open file %s for checksumming" % (f))
 303             continue
 304
 305         files[f][hash_key(hashname)] = hashfunc(file_handle)
 306
 307         file_handle.close()
 308     return rejmsg
 309
 310 ################################################################################
 311
 312 def check_hash(where, files, hashname, hashfunc):
 313     """
 314     check_hash checks the given hash in the files dict against the actual
 315     files on disk.  The hash values need to be present consistently in
 316     all file entries.  It does not modify its input in any way.
 317     """
 318
 319     rejmsg = []
 320     for f in files.keys():
 321         try:
 322             with open_file(f) as file_handle:
 323                 # Check for the hash entry, to not trigger a KeyError.
 324                 if not files[f].has_key(hash_key(hashname)):
 325                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 326                         where))
 327                     continue
 328
 329                 # Actually check the hash for correctness.
 330                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 331                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 332                         where))
 333         except CantOpenError:
 334             # TODO: This happens when the file is in the pool.
 335             # warn("Cannot open file %s" % f)
 336             continue
 337     return rejmsg
 338
 339 ################################################################################
 340
 341 def check_size(where, files):
 342     """
 343     check_size checks the file sizes in the passed files dict against the
 344     files on disk.
 345     """
 346
 347     rejmsg = []
 348     for f in files.keys():
 349         try:
 350             entry = os.stat(f)
 351         except OSError as exc:
 352             if exc.errno == errno.ENOENT:
 353                 # TODO: This happens when the file is in the pool.
 354                 continue
 355             raise
 356
 357         actual_size = entry[stat.ST_SIZE]
 358         size = int(files[f]["size"])
 359         if size != actual_size:
 360             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 361                    % (f, actual_size, size, where))
 362     return rejmsg
 363
 364 ################################################################################
 365
 366 def check_dsc_files(dsc_filename, dsc, dsc_files):
 367     """
 368     Verify that the files listed in the Files field of the .dsc are
 369     those expected given the announced Format.
 370
 371     @type dsc_filename: string
 372     @param dsc_filename: path of .dsc file
 373
 374     @type dsc: dict
 375     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 376
 377     @type dsc_files: dict
 378     @param dsc_files: the file list returned by C{build_file_list()}
 379
 380     @rtype: list
 381     @return: all errors detected
 382     """
 383     rejmsg = []
 384
 385     # Ensure .dsc lists proper set of source files according to the format
 386     # announced
 387     has = defaultdict(lambda: 0)
 388
 389     ftype_lookup = (
 390         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 391         (r'diff.gz',                   ('debian_diff',)),
 392         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 393         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 394         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 395         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 396         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 397     )
 398
 399     for f in dsc_files:
 400         m = re_issource.match(f)
 401         if not m:
 402             rejmsg.append("%s: %s in Files field not recognised as source."
 403                           % (dsc_filename, f))
 404             continue
 405
 406         # Populate 'has' dictionary by resolving keys in lookup table
 407         matched = False
 408         for regex, keys in ftype_lookup:
 409             if re.match(regex, m.group(3)):
 410                 matched = True
 411                 for key in keys:
 412                     has[key] += 1
 413                 break
 414
 415         # File does not match anything in lookup table; reject
 416         if not matched:
 417             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 418
 419     # Check for multiple files
 420     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 421         if has[file_type] > 1:
 422             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 423
 424     # Source format specific tests
 425     try:
 426         format = get_format_from_string(dsc['format'])
 427         rejmsg.extend([
 428             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 429         ])
 430
 431     except UnknownFormatError:
 432         # Not an error here for now
 433         pass
 434
 435     return rejmsg
 436
 437 ################################################################################
 438
 439 def check_hash_fields(what, manifest):
 440     """
 441     check_hash_fields ensures that there are no checksum fields in the
 442     given dict that we do not know about.
 443     """
 444
 445     rejmsg = []
 446     hashes = map(lambda x: x[0], known_hashes)
 447     for field in manifest:
 448         if field.startswith("checksums-"):
 449             hashname = field.split("-",1)[1]
 450             if hashname not in hashes:
 451                 rejmsg.append("Unsupported checksum field for %s "\
 452                     "in %s" % (hashname, what))
 453     return rejmsg
 454
 455 ################################################################################
 456
 457 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 458     if format >= version:
 459         # The version should contain the specified hash.
 460         func = check_hash
 461
 462         # Import hashes from the changes
 463         rejmsg = parse_checksums(".changes", files, changes, hashname)
 464         if len(rejmsg) > 0:
 465             return rejmsg
 466     else:
 467         # We need to calculate the hash because it can't possibly
 468         # be in the file.
 469         func = create_hash
 470     return func(".changes", files, hashname, hashfunc)
 471
 472 # We could add the orig which might be in the pool to the files dict to
 473 # access the checksums easily.
 474
 475 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 476     """
 477     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 478     in the dsc is correct, i.e. identical to the changes file and if necessary
 479     the pool.  The latter task is delegated to check_hash.
 480     """
 481
 482     rejmsg = []
 483     if not dsc.has_key('Checksums-%s' % (hashname,)):
 484         return rejmsg
 485     # Import hashes from the dsc
 486     parse_checksums(".dsc", dsc_files, dsc, hashname)
 487     # And check it...
 488     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 489     return rejmsg
 490
 491 ################################################################################
 492
 493 def parse_checksums(where, files, manifest, hashname):
 494     rejmsg = []
 495     field = 'checksums-%s' % hashname
 496     if not field in manifest:
 497         return rejmsg
 498     for line in manifest[field].split('\n'):
 499         if not line:
 500             break
 501         clist = line.strip().split(' ')
 502         if len(clist) == 3:
 503             checksum, size, checkfile = clist
 504         else:
 505             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 506             continue
 507         if not files.has_key(checkfile):
 508         # TODO: check for the file's entry in the original files dict, not
 509         # the one modified by (auto)byhand and other weird stuff
 510         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 511         #        (file, hashname, where))
 512             continue
 513         if not files[checkfile]["size"] == size:
 514             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 515                 "in %s" % (checkfile, hashname, where))
 516             continue
 517         files[checkfile][hash_key(hashname)] = checksum
 518     for f in files.keys():
 519         if not files[f].has_key(hash_key(hashname)):
 520             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 521     return rejmsg
 522
 523 ################################################################################
 524
 525 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 526
 527 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 528     files = {}
 529
 530     # Make sure we have a Files: field to parse...
 531     if not changes.has_key(field):
 532         raise NoFilesFieldError
 533
 534     # Validate .changes Format: field
 535     if not is_a_dsc:
 536         validate_changes_format(parse_format(changes['format']), field)
 537
 538     includes_section = (not is_a_dsc) and field == "files"
 539
 540     # Parse each entry/line:
 541     for i in changes[field].split('\n'):
 542         if not i:
 543             break
 544         s = i.split()
 545         section = priority = ""
 546         try:
 547             if includes_section:
 548                 (md5, size, section, priority, name) = s
 549             else:
 550                 (md5, size, name) = s
 551         except ValueError:
 552             raise ParseChangesError(i)
 553
 554         if section == "":
 555             section = "-"
 556         if priority == "":
 557             priority = "-"
 558
 559         (section, component) = extract_component_from_section(section)
 560
 561         files[name] = dict(size=size, section=section,
 562                            priority=priority, component=component)
 563         files[name][hashname] = md5
 564
 565     return files
 566
 567 ################################################################################
 568
 569 # see https://bugs.debian.org/619131
 570 def build_package_list(dsc, session = None):
 571     if not dsc.has_key("package-list"):
 572         return {}
 573
 574     packages = {}
 575
 576     for line in dsc["package-list"].split("\n"):
 577         if not line:
 578             break
 579
 580         fields = line.split()
 581         name = fields[0]
 582         package_type = fields[1]
 583         (section, component) = extract_component_from_section(fields[2])
 584         priority = fields[3]
 585
 586         # Validate type if we have a session
 587         if session and get_override_type(package_type, session) is None:
 588             # Maybe just warn and ignore? exit(1) might be a bit hard...
 589             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 590
 591         if name not in packages or packages[name]["type"] == "dsc":
 592             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 593
 594     return packages
 595
 596 ################################################################################
 597
 598 def send_mail (message, filename="", whitelists=None):
 599     """sendmail wrapper, takes _either_ a message string or a file as arguments
 600
 601     @type  whitelists: list of (str or None)
 602     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 603                        everything, otherwise an address is whitelisted if it is
 604                        included in any of the lists.
 605                        In addition a global whitelist can be specified in
 606                        Dinstall::MailWhiteList.
 607     """
 608
 609     maildir = Cnf.get('Dir::Mail')
 610     if maildir:
 611         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 612         path = find_next_free(path)
 613         with open(path, 'w') as fh:
 614             print >>fh, message,
 615
 616     # Check whether we're supposed to be sending mail
 617     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 618         return
 619
 620     # If we've been passed a string dump it into a temporary file
 621     if message:
 622         (fd, filename) = tempfile.mkstemp()
 623         os.write (fd, message)
 624         os.close (fd)
 625
 626     if whitelists is None or None in whitelists:
 627         whitelists = []
 628     if Cnf.get('Dinstall::MailWhiteList', ''):
 629         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 630     if len(whitelists) != 0:
 631         with open_file(filename) as message_in:
 632             message_raw = modemail.message_from_file(message_in)
 633
 634         whitelist = [];
 635         for path in whitelists:
 636           with open_file(path, 'r') as whitelist_in:
 637             for line in whitelist_in:
 638                 if not re_whitespace_comment.match(line):
 639                     if re_re_mark.match(line):
 640                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 641                     else:
 642                         whitelist.append(re.compile(re.escape(line.strip())))
 643
 644         # Fields to check.
 645         fields = ["To", "Bcc", "Cc"]
 646         for field in fields:
 647             # Check each field
 648             value = message_raw.get(field, None)
 649             if value != None:
 650                 match = [];
 651                 for item in value.split(","):
 652                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 653                     mail_whitelisted = 0
 654                     for wr in whitelist:
 655                         if wr.match(email):
 656                             mail_whitelisted = 1
 657                             break
 658                     if not mail_whitelisted:
 659                         print "Skipping {0} since it's not whitelisted".format(item)
 660                         continue
 661                     match.append(item)
 662
 663                 # Doesn't have any mail in whitelist so remove the header
 664                 if len(match) == 0:
 665                     del message_raw[field]
 666                 else:
 667                     message_raw.replace_header(field, ', '.join(match))
 668
 669         # Change message fields in order if we don't have a To header
 670         if not message_raw.has_key("To"):
 671             fields.reverse()
 672             for field in fields:
 673                 if message_raw.has_key(field):
 674                     message_raw[fields[-1]] = message_raw[field]
 675                     del message_raw[field]
 676                     break
 677             else:
 678                 # Clean up any temporary files
 679                 # and return, as we removed all recipients.
 680                 if message:
 681                     os.unlink (filename);
 682                 return;
 683
 684         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 685         os.write (fd, message_raw.as_string(True));
 686         os.close (fd);
 687
 688     # Invoke sendmail
 689     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 690     if (result != 0):
 691         raise SendmailFailedError(output)
 692
 693     # Clean up any temporary files
 694     if message:
 695         os.unlink (filename)
 696
 697 ################################################################################
 698
 699 def poolify (source, component=None):
 700     if source[:3] == "lib":
 701         return source[:4] + '/' + source + '/'
 702     else:
 703         return source[:1] + '/' + source + '/'
 704
 705 ################################################################################
 706
 707 def move (src, dest, overwrite = 0, perms = 0o664):
 708     if os.path.exists(dest) and os.path.isdir(dest):
 709         dest_dir = dest
 710     else:
 711         dest_dir = os.path.dirname(dest)
 712     if not os.path.lexists(dest_dir):
 713         umask = os.umask(00000)
 714         os.makedirs(dest_dir, 0o2775)
 715         os.umask(umask)
 716     #print "Moving %s to %s..." % (src, dest)
 717     if os.path.exists(dest) and os.path.isdir(dest):
 718         dest += '/' + os.path.basename(src)
 719     # Don't overwrite unless forced to
 720     if os.path.lexists(dest):
 721         if not overwrite:
 722             fubar("Can't move %s to %s - file already exists." % (src, dest))
 723         else:
 724             if not os.access(dest, os.W_OK):
 725                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 726     shutil.copy2(src, dest)
 727     os.chmod(dest, perms)
 728     os.unlink(src)
 729
 730 def copy (src, dest, overwrite = 0, perms = 0o664):
 731     if os.path.exists(dest) and os.path.isdir(dest):
 732         dest_dir = dest
 733     else:
 734         dest_dir = os.path.dirname(dest)
 735     if not os.path.exists(dest_dir):
 736         umask = os.umask(00000)
 737         os.makedirs(dest_dir, 0o2775)
 738         os.umask(umask)
 739     #print "Copying %s to %s..." % (src, dest)
 740     if os.path.exists(dest) and os.path.isdir(dest):
 741         dest += '/' + os.path.basename(src)
 742     # Don't overwrite unless forced to
 743     if os.path.lexists(dest):
 744         if not overwrite:
 745             raise FileExistsError
 746         else:
 747             if not os.access(dest, os.W_OK):
 748                 raise CantOverwriteError
 749     shutil.copy2(src, dest)
 750     os.chmod(dest, perms)
 751
 752 ################################################################################
 753
 754 def which_conf_file ():
 755     if os.getenv('DAK_CONFIG'):
 756         return os.getenv('DAK_CONFIG')
 757
 758     res = socket.getfqdn()
 759     # In case we allow local config files per user, try if one exists
 760     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 761         homedir = os.getenv("HOME")
 762         confpath = os.path.join(homedir, "/etc/dak.conf")
 763         if os.path.exists(confpath):
 764             apt_pkg.read_config_file_isc(Cnf,confpath)
 765
 766     # We are still in here, so there is no local config file or we do
 767     # not allow local files. Do the normal stuff.
 768     if Cnf.get("Config::" + res + "::DakConfig"):
 769         return Cnf["Config::" + res + "::DakConfig"]
 770
 771     return default_config
 772
 773 ################################################################################
 774
 775 def TemplateSubst(subst_map, filename):
 776     """ Perform a substition of template """
 777     with open_file(filename) as templatefile:
 778         template = templatefile.read()
 779     for k, v in subst_map.iteritems():
 780         template = template.replace(k, str(v))
 781     return template
 782
 783 ################################################################################
 784
 785 def fubar(msg, exit_code=1):
 786     sys.stderr.write("E: %s\n" % (msg))
 787     sys.exit(exit_code)
 788
 789 def warn(msg):
 790     sys.stderr.write("W: %s\n" % (msg))
 791
 792 ################################################################################
 793
 794 # Returns the user name with a laughable attempt at rfc822 conformancy
 795 # (read: removing stray periods).
 796 def whoami ():
 797     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 798
 799 def getusername ():
 800     return pwd.getpwuid(os.getuid())[0]
 801
 802 ################################################################################
 803
 804 def size_type (c):
 805     t  = " B"
 806     if c > 10240:
 807         c = c / 1024
 808         t = " KB"
 809     if c > 10240:
 810         c = c / 1024
 811         t = " MB"
 812     return ("%d%s" % (c, t))
 813
 814 ################################################################################
 815
 816 def cc_fix_changes (changes):
 817     o = changes.get("architecture", "")
 818     if o:
 819         del changes["architecture"]
 820     changes["architecture"] = {}
 821     for j in o.split():
 822         changes["architecture"][j] = 1
 823
 824 def changes_compare (a, b):
 825     """ Sort by source name, source version, 'have source', and then by filename """
 826     try:
 827         a_changes = parse_changes(a)
 828     except:
 829         return -1
 830
 831     try:
 832         b_changes = parse_changes(b)
 833     except:
 834         return 1
 835
 836     cc_fix_changes (a_changes)
 837     cc_fix_changes (b_changes)
 838
 839     # Sort by source name
 840     a_source = a_changes.get("source")
 841     b_source = b_changes.get("source")
 842     q = cmp (a_source, b_source)
 843     if q:
 844         return q
 845
 846     # Sort by source version
 847     a_version = a_changes.get("version", "0")
 848     b_version = b_changes.get("version", "0")
 849     q = apt_pkg.version_compare(a_version, b_version)
 850     if q:
 851         return q
 852
 853     # Sort by 'have source'
 854     a_has_source = a_changes["architecture"].get("source")
 855     b_has_source = b_changes["architecture"].get("source")
 856     if a_has_source and not b_has_source:
 857         return -1
 858     elif b_has_source and not a_has_source:
 859         return 1
 860
 861     # Fall back to sort by filename
 862     return cmp(a, b)
 863
 864 ################################################################################
 865
 866 def find_next_free (dest, too_many=100):
 867     extra = 0
 868     orig_dest = dest
 869     while os.path.lexists(dest) and extra < too_many:
 870         dest = orig_dest + '.' + repr(extra)
 871         extra += 1
 872     if extra >= too_many:
 873         raise NoFreeFilenameError
 874     return dest
 875
 876 ################################################################################
 877
 878 def result_join (original, sep = '\t'):
 879     resultlist = []
 880     for i in xrange(len(original)):
 881         if original[i] == None:
 882             resultlist.append("")
 883         else:
 884             resultlist.append(original[i])
 885     return sep.join(resultlist)
 886
 887 ################################################################################
 888
 889 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 890     out = ""
 891     for line in str.split('\n'):
 892         line = line.strip()
 893         if line or include_blank_lines:
 894             out += "%s%s\n" % (prefix, line)
 895     # Strip trailing new line
 896     if out:
 897         out = out[:-1]
 898     return out
 899
 900 ################################################################################
 901
 902 def validate_changes_file_arg(filename, require_changes=1):
 903     """
 904     'filename' is either a .changes or .dak file.  If 'filename' is a
 905     .dak file, it's changed to be the corresponding .changes file.  The
 906     function then checks if the .changes file a) exists and b) is
 907     readable and returns the .changes filename if so.  If there's a
 908     problem, the next action depends on the option 'require_changes'
 909     argument:
 910
 911       - If 'require_changes' == -1, errors are ignored and the .changes
 912         filename is returned.
 913       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 914       - If 'require_changes' == 1, a fatal error is raised.
 915
 916     """
 917     error = None
 918
 919     orig_filename = filename
 920     if filename.endswith(".dak"):
 921         filename = filename[:-4]+".changes"
 922
 923     if not filename.endswith(".changes"):
 924         error = "invalid file type; not a changes file"
 925     else:
 926         if not os.access(filename,os.R_OK):
 927             if os.path.exists(filename):
 928                 error = "permission denied"
 929             else:
 930                 error = "file not found"
 931
 932     if error:
 933         if require_changes == 1:
 934             fubar("%s: %s." % (orig_filename, error))
 935         elif require_changes == 0:
 936             warn("Skipping %s - %s" % (orig_filename, error))
 937             return None
 938         else: # We only care about the .dak file
 939             return filename
 940     else:
 941         return filename
 942
 943 ################################################################################
 944
 945 def real_arch(arch):
 946     return (arch != "source" and arch != "all")
 947
 948 ################################################################################
 949
 950 def join_with_commas_and(list):
 951     if len(list) == 0: return "nothing"
 952     if len(list) == 1: return list[0]
 953     return ", ".join(list[:-1]) + " and " + list[-1]
 954
 955 ################################################################################
 956
 957 def pp_deps (deps):
 958     pp_deps = []
 959     for atom in deps:
 960         (pkg, version, constraint) = atom
 961         if constraint:
 962             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 963         else:
 964             pp_dep = pkg
 965         pp_deps.append(pp_dep)
 966     return " |".join(pp_deps)
 967
 968 ################################################################################
 969
 970 def get_conf():
 971     return Cnf
 972
 973 ################################################################################
 974
 975 def parse_args(Options):
 976     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
 977     # XXX: This should go away and everything which calls it be converted
 978     #      to use SQLA properly.  For now, we'll just fix it not to use
 979     #      the old Pg interface though
 980     session = DBConn().session()
 981     # Process suite
 982     if Options["Suite"]:
 983         suite_ids_list = []
 984         for suitename in split_args(Options["Suite"]):
 985             suite = get_suite(suitename, session=session)
 986             if not suite or suite.suite_id is None:
 987                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
 988             else:
 989                 suite_ids_list.append(suite.suite_id)
 990         if suite_ids_list:
 991             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
 992         else:
 993             fubar("No valid suite given.")
 994     else:
 995         con_suites = ""
 996
 997     # Process component
 998     if Options["Component"]:
 999         component_ids_list = []
1000         for componentname in split_args(Options["Component"]):
1001             component = get_component(componentname, session=session)
1002             if component is None:
1003                 warn("component '%s' not recognised." % (componentname))
1004             else:
1005                 component_ids_list.append(component.component_id)
1006         if component_ids_list:
1007             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1008         else:
1009             fubar("No valid component given.")
1010     else:
1011         con_components = ""
1012
1013     # Process architecture
1014     con_architectures = ""
1015     check_source = 0
1016     if Options["Architecture"]:
1017         arch_ids_list = []
1018         for archname in split_args(Options["Architecture"]):
1019             if archname == "source":
1020                 check_source = 1
1021             else:
1022                 arch = get_architecture(archname, session=session)
1023                 if arch is None:
1024                     warn("architecture '%s' not recognised." % (archname))
1025                 else:
1026                     arch_ids_list.append(arch.arch_id)
1027         if arch_ids_list:
1028             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1029         else:
1030             if not check_source:
1031                 fubar("No valid architecture given.")
1032     else:
1033         check_source = 1
1034
1035     return (con_suites, con_architectures, con_components, check_source)
1036
1037 ################################################################################
1038
1039 def arch_compare_sw (a, b):
1040     """
1041     Function for use in sorting lists of architectures.
1042
1043     Sorts normally except that 'source' dominates all others.
1044     """
1045
1046     if a == "source" and b == "source":
1047         return 0
1048     elif a == "source":
1049         return -1
1050     elif b == "source":
1051         return 1
1052
1053     return cmp (a, b)
1054
1055 ################################################################################
1056
1057 def split_args (s, dwim=True):
1058     """
1059     Split command line arguments which can be separated by either commas
1060     or whitespace.  If dwim is set, it will complain about string ending
1061     in comma since this usually means someone did 'dak ls -a i386, m68k
1062     foo' or something and the inevitable confusion resulting from 'm68k'
1063     being treated as an argument is undesirable.
1064     """
1065
1066     if s.find(",") == -1:
1067         return s.split()
1068     else:
1069         if s[-1:] == "," and dwim:
1070             fubar("split_args: found trailing comma, spurious space maybe?")
1071         return s.split(",")
1072
1073 ################################################################################
1074
1075 def gpgv_get_status_output(cmd, status_read, status_write):
1076     """
1077     Our very own version of commands.getouputstatus(), hacked to support
1078     gpgv's status fd.
1079     """
1080
1081     cmd = ['/bin/sh', '-c', cmd]
1082     p2cread, p2cwrite = os.pipe()
1083     c2pread, c2pwrite = os.pipe()
1084     errout, errin = os.pipe()
1085     pid = os.fork()
1086     if pid == 0:
1087         # Child
1088         os.close(0)
1089         os.close(1)
1090         os.dup(p2cread)
1091         os.dup(c2pwrite)
1092         os.close(2)
1093         os.dup(errin)
1094         for i in range(3, 256):
1095             if i != status_write:
1096                 try:
1097                     os.close(i)
1098                 except:
1099                     pass
1100         try:
1101             os.execvp(cmd[0], cmd)
1102         finally:
1103             os._exit(1)
1104
1105     # Parent
1106     os.close(p2cread)
1107     os.dup2(c2pread, c2pwrite)
1108     os.dup2(errout, errin)
1109
1110     output = status = ""
1111     while 1:
1112         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1113         more_data = []
1114         for fd in i:
1115             r = os.read(fd, 8196)
1116             if len(r) > 0:
1117                 more_data.append(fd)
1118                 if fd == c2pwrite or fd == errin:
1119                     output += r
1120                 elif fd == status_read:
1121                     status += r
1122                 else:
1123                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1124         if not more_data:
1125             pid, exit_status = os.waitpid(pid, 0)
1126             try:
1127                 os.close(status_write)
1128                 os.close(status_read)
1129                 os.close(c2pread)
1130                 os.close(c2pwrite)
1131                 os.close(p2cwrite)
1132                 os.close(errin)
1133                 os.close(errout)
1134             except:
1135                 pass
1136             break
1137
1138     return output, status, exit_status
1139
1140 ################################################################################
1141
1142 def process_gpgv_output(status):
1143     # Process the status-fd output
1144     keywords = {}
1145     internal_error = ""
1146     for line in status.split('\n'):
1147         line = line.strip()
1148         if line == "":
1149             continue
1150         split = line.split()
1151         if len(split) < 2:
1152             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1153             continue
1154         (gnupg, keyword) = split[:2]
1155         if gnupg != "[GNUPG:]":
1156             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1157             continue
1158         args = split[2:]
1159         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1160             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1161             continue
1162         else:
1163             keywords[keyword] = args
1164
1165     return (keywords, internal_error)
1166
1167 ################################################################################
1168
1169 def retrieve_key (filename, keyserver=None, keyring=None):
1170     """
1171     Retrieve the key that signed 'filename' from 'keyserver' and
1172     add it to 'keyring'.  Returns nothing on success, or an error message
1173     on error.
1174     """
1175
1176     # Defaults for keyserver and keyring
1177     if not keyserver:
1178         keyserver = Cnf["Dinstall::KeyServer"]
1179     if not keyring:
1180         keyring = get_primary_keyring_path()
1181
1182     # Ensure the filename contains no shell meta-characters or other badness
1183     if not re_taint_free.match(filename):
1184         return "%s: tainted filename" % (filename)
1185
1186     # Invoke gpgv on the file
1187     status_read, status_write = os.pipe()
1188     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1189     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1190
1191     # Process the status-fd output
1192     (keywords, internal_error) = process_gpgv_output(status)
1193     if internal_error:
1194         return internal_error
1195
1196     if not keywords.has_key("NO_PUBKEY"):
1197         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1198
1199     fingerprint = keywords["NO_PUBKEY"][0]
1200     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1201     # it'll try to create a lockfile in /dev.  A better solution might
1202     # be a tempfile or something.
1203     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1204           % (Cnf["Dinstall::SigningKeyring"])
1205     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1206            % (keyring, keyserver, fingerprint)
1207     (result, output) = commands.getstatusoutput(cmd)
1208     if (result != 0):
1209         return "'%s' failed with exit code %s" % (cmd, result)
1210
1211     return ""
1212
1213 ################################################################################
1214
1215 def gpg_keyring_args(keyrings=None):
1216     if not keyrings:
1217         keyrings = get_active_keyring_paths()
1218
1219     return " ".join(["--keyring %s" % x for x in keyrings])
1220
1221 ################################################################################
1222 @session_wrapper
1223 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1224     """
1225     Check the signature of a file and return the fingerprint if the
1226     signature is valid or 'None' if it's not.  The first argument is the
1227     filename whose signature should be checked.  The second argument is a
1228     reject function and is called when an error is found.  The reject()
1229     function must allow for two arguments: the first is the error message,
1230     the second is an optional prefix string.  It's possible for reject()
1231     to be called more than once during an invocation of check_signature().
1232     The third argument is optional and is the name of the files the
1233     detached signature applies to.  The fourth argument is optional and is
1234     a *list* of keyrings to use.  'autofetch' can either be None, True or
1235     False.  If None, the default behaviour specified in the config will be
1236     used.
1237     """
1238
1239     rejects = []
1240
1241     # Ensure the filename contains no shell meta-characters or other badness
1242     if not re_taint_free.match(sig_filename):
1243         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1244         return (None, rejects)
1245
1246     if data_filename and not re_taint_free.match(data_filename):
1247         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1248         return (None, rejects)
1249
1250     if not keyrings:
1251         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1252
1253     # Autofetch the signing key if that's enabled
1254     if autofetch == None:
1255         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1256     if autofetch:
1257         error_msg = retrieve_key(sig_filename)
1258         if error_msg:
1259             rejects.append(error_msg)
1260             return (None, rejects)
1261
1262     # Build the command line
1263     status_read, status_write = os.pipe()
1264     cmd = "gpgv --status-fd %s %s %s %s" % (
1265         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1266
1267     # Invoke gpgv on the file
1268     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1269
1270     # Process the status-fd output
1271     (keywords, internal_error) = process_gpgv_output(status)
1272
1273     # If we failed to parse the status-fd output, let's just whine and bail now
1274     if internal_error:
1275         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1276         rejects.append(internal_error, "")
1277         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1278         return (None, rejects)
1279
1280     # Now check for obviously bad things in the processed output
1281     if keywords.has_key("KEYREVOKED"):
1282         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1283     if keywords.has_key("BADSIG"):
1284         rejects.append("bad signature on %s." % (sig_filename))
1285     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1286         rejects.append("failed to check signature on %s." % (sig_filename))
1287     if keywords.has_key("NO_PUBKEY"):
1288         args = keywords["NO_PUBKEY"]
1289         if len(args) >= 1:
1290             key = args[0]
1291         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1292     if keywords.has_key("BADARMOR"):
1293         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1294     if keywords.has_key("NODATA"):
1295         rejects.append("no signature found in %s." % (sig_filename))
1296     if keywords.has_key("EXPKEYSIG"):
1297         args = keywords["EXPKEYSIG"]
1298         if len(args) >= 1:
1299             key = args[0]
1300         rejects.append("Signature made by expired key 0x%s" % (key))
1301     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1302         args = keywords["KEYEXPIRED"]
1303         expiredate=""
1304         if len(args) >= 1:
1305             timestamp = args[0]
1306             if timestamp.count("T") == 0:
1307                 try:
1308                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1309                 except ValueError:
1310                     expiredate = "unknown (%s)" % (timestamp)
1311             else:
1312                 expiredate = timestamp
1313         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1314
1315     if len(rejects) > 0:
1316         return (None, rejects)
1317
1318     # Next check gpgv exited with a zero return code
1319     if exit_status:
1320         rejects.append("gpgv failed while checking %s." % (sig_filename))
1321         if status.strip():
1322             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1323         else:
1324             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1325         return (None, rejects)
1326
1327     # Sanity check the good stuff we expect
1328     if not keywords.has_key("VALIDSIG"):
1329         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1330     else:
1331         args = keywords["VALIDSIG"]
1332         if len(args) < 1:
1333             rejects.append("internal error while checking signature on %s." % (sig_filename))
1334         else:
1335             fingerprint = args[0]
1336     if not keywords.has_key("GOODSIG"):
1337         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1338     if not keywords.has_key("SIG_ID"):
1339         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1340
1341     # Finally ensure there's not something we don't recognise
1342     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1343                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1344                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1345
1346     for keyword in keywords.keys():
1347         if not known_keywords.has_key(keyword):
1348             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1349
1350     if len(rejects) > 0:
1351         return (None, rejects)
1352     else:
1353         return (fingerprint, [])
1354
1355 ################################################################################
1356
1357 def gpg_get_key_addresses(fingerprint):
1358     """retreive email addresses from gpg key uids for a given fingerprint"""
1359     addresses = key_uid_email_cache.get(fingerprint)
1360     if addresses != None:
1361         return addresses
1362     addresses = list()
1363     try:
1364         with open(os.devnull, "wb") as devnull:
1365             output = daklib.daksubprocess.check_output(
1366                 ["gpg", "--no-default-keyring"] + gpg_keyring_args().split() +
1367                 ["--with-colons", "--list-keys", fingerprint], stderr=devnull)
1368     except subprocess.CalledProcessError:
1369         pass
1370     else:
1371         for l in output.split('\n'):
1372             parts = l.split(':')
1373             if parts[0] not in ("uid", "pub"):
1374                 continue
1375             try:
1376                 uid = parts[9]
1377             except IndexError:
1378                 continue
1379             try:
1380                 # Do not use unicode_escape, because it is locale-specific
1381                 uid = codecs.decode(uid, "string_escape").decode("utf-8")
1382             except UnicodeDecodeError:
1383                 uid = uid.decode("latin1") # does not fail
1384             m = re_parse_maintainer.match(uid)
1385             if not m:
1386                 continue
1387             address = m.group(2)
1388             address = address.encode("utf8") # dak still uses bytes
1389             if address.endswith('@debian.org'):
1390                 # prefer @debian.org addresses
1391                 # TODO: maybe not hardcode the domain
1392                 addresses.insert(0, address)
1393             else:
1394                 addresses.append(address)
1395     key_uid_email_cache[fingerprint] = addresses
1396     return addresses
1397
1398 ################################################################################
1399
1400 def get_logins_from_ldap(fingerprint='*'):
1401     """retrieve login from LDAP linked to a given fingerprint"""
1402
1403     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1404     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1405     l = ldap.open(LDAPServer)
1406     l.simple_bind_s('','')
1407     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1408                        '(keyfingerprint=%s)' % fingerprint,
1409                        ['uid', 'keyfingerprint'])
1410     login = {}
1411     for elem in Attrs:
1412         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1413     return login
1414
1415 ################################################################################
1416
1417 def get_users_from_ldap():
1418     """retrieve login and user names from LDAP"""
1419
1420     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1421     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1422     l = ldap.open(LDAPServer)
1423     l.simple_bind_s('','')
1424     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1425                        '(uid=*)', ['uid', 'cn', 'mn', 'sn'])
1426     users = {}
1427     for elem in Attrs:
1428         elem = elem[1]
1429         name = []
1430         for k in ('cn', 'mn', 'sn'):
1431             try:
1432                 if elem[k][0] != '-':
1433                     name.append(elem[k][0])
1434             except KeyError:
1435                 pass
1436         users[' '.join(name)] = elem['uid'][0]
1437     return users
1438
1439 ################################################################################
1440
1441 def clean_symlink (src, dest, root):
1442     """
1443     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1444     Returns fixed 'src'
1445     """
1446     src = src.replace(root, '', 1)
1447     dest = dest.replace(root, '', 1)
1448     dest = os.path.dirname(dest)
1449     new_src = '../' * len(dest.split('/'))
1450     return new_src + src
1451
1452 ################################################################################
1453
1454 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1455     """
1456     Return a secure and unique filename by pre-creating it.
1457
1458     @type directory: str
1459     @param directory: If non-null it will be the directory the file is pre-created in.
1460
1461     @type prefix: str
1462     @param prefix: The filename will be prefixed with this string
1463
1464     @type suffix: str
1465     @param suffix: The filename will end with this string
1466
1467     @type mode: str
1468     @param mode: If set the file will get chmodded to those permissions
1469
1470     @type group: str
1471     @param group: If set the file will get chgrped to the specified group.
1472
1473     @rtype: list
1474     @return: Returns a pair (fd, name)
1475     """
1476
1477     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1478     if mode:
1479         os.chmod(tfname, mode)
1480     if group:
1481         gid = grp.getgrnam(group).gr_gid
1482         os.chown(tfname, -1, gid)
1483     return (tfd, tfname)
1484
1485 ################################################################################
1486
1487 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1488     """
1489     Return a secure and unique directory by pre-creating it.
1490
1491     @type parent: str
1492     @param parent: If non-null it will be the directory the directory is pre-created in.
1493
1494     @type prefix: str
1495     @param prefix: The filename will be prefixed with this string
1496
1497     @type suffix: str
1498     @param suffix: The filename will end with this string
1499
1500     @type mode: str
1501     @param mode: If set the file will get chmodded to those permissions
1502
1503     @type group: str
1504     @param group: If set the file will get chgrped to the specified group.
1505
1506     @rtype: list
1507     @return: Returns a pair (fd, name)
1508
1509     """
1510
1511     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1512     if mode:
1513         os.chmod(tfname, mode)
1514     if group:
1515         gid = grp.getgrnam(group).gr_gid
1516         os.chown(tfname, -1, gid)
1517     return tfname
1518
1519 ################################################################################
1520
1521 def is_email_alias(email):
1522     """ checks if the user part of the email is listed in the alias file """
1523     global alias_cache
1524     if alias_cache == None:
1525         aliasfn = which_alias_file()
1526         alias_cache = set()
1527         if aliasfn:
1528             for l in open(aliasfn):
1529                 alias_cache.add(l.split(':')[0])
1530     uid = email.split('@')[0]
1531     return uid in alias_cache
1532
1533 ################################################################################
1534
1535 def get_changes_files(from_dir):
1536     """
1537     Takes a directory and lists all .changes files in it (as well as chdir'ing
1538     to the directory; this is due to broken behaviour on the part of p-u/p-a
1539     when you're not in the right place)
1540
1541     Returns a list of filenames
1542     """
1543     try:
1544         # Much of the rest of p-u/p-a depends on being in the right place
1545         os.chdir(from_dir)
1546         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1547     except OSError as e:
1548         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1549
1550     return changes_files
1551
1552 ################################################################################
1553
1554 Cnf = config.Config().Cnf
1555
1556 ################################################################################
1557
1558 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1559     """
1560     Parses the wnpp bug list available at https://qa.debian.org/data/bts/wnpp_rm
1561     Well, actually it parsed a local copy, but let's document the source
1562     somewhere ;)
1563
1564     returns a dict associating source package name with a list of open wnpp
1565     bugs (Yes, there might be more than one)
1566     """
1567
1568     line = []
1569     try:
1570         f = open(file)
1571         lines = f.readlines()
1572     except IOError as e:
1573         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1574         lines = []
1575     wnpp = {}
1576
1577     for line in lines:
1578         splited_line = line.split(": ", 1)
1579         if len(splited_line) > 1:
1580             wnpp[splited_line[0]] = splited_line[1].split("|")
1581
1582     for source in wnpp.keys():
1583         bugs = []
1584         for wnpp_bug in wnpp[source]:
1585             bug_no = re.search("(\d)+", wnpp_bug).group()
1586             if bug_no:
1587                 bugs.append(bug_no)
1588         wnpp[source] = bugs
1589     return wnpp
1590
1591 ################################################################################
1592
1593 def get_packages_from_ftp(root, suite, component, architecture):
1594     """
1595     Returns an object containing apt_pkg-parseable data collected by
1596     aggregating Packages.gz files gathered for each architecture.
1597
1598     @type root: string
1599     @param root: path to ftp archive root directory
1600
1601     @type suite: string
1602     @param suite: suite to extract files from
1603
1604     @type component: string
1605     @param component: component to extract files from
1606
1607     @type architecture: string
1608     @param architecture: architecture to extract files from
1609
1610     @rtype: TagFile
1611     @return: apt_pkg class containing package data
1612     """
1613     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1614     (fd, temp_file) = temp_filename()
1615     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1616     if (result != 0):
1617         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1618     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1619     if os.path.exists(filename):
1620         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1621         if (result != 0):
1622             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1623     packages = open_file(temp_file)
1624     Packages = apt_pkg.TagFile(packages)
1625     os.unlink(temp_file)
1626     return Packages
1627
1628 ################################################################################
1629
1630 def deb_extract_control(fh):
1631     """extract DEBIAN/control from a binary package"""
1632     return apt_inst.DebFile(fh).control.extractdata("control")
1633
1634 ################################################################################
1635
1636 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1637     """mail addresses to contact for an upload
1638
1639     @type  maintainer: str
1640     @param maintainer: Maintainer field of the .changes file
1641
1642     @type  changed_by: str
1643     @param changed_by: Changed-By field of the .changes file
1644
1645     @type  fingerprint: str
1646     @param fingerprint: fingerprint of the key used to sign the upload
1647
1648     @rtype:  list of str
1649     @return: list of RFC 2047-encoded mail addresses to contact regarding
1650              this upload
1651     """
1652     addresses = [maintainer]
1653     if changed_by != maintainer:
1654         addresses.append(changed_by)
1655
1656     fpr_addresses = gpg_get_key_addresses(fingerprint)
1657     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1658         addresses.append(fpr_addresses[0])
1659
1660     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1661     return encoded_addresses
1662
1663 ################################################################################
1664
1665 def call_editor(text="", suffix=".txt"):
1666     """run editor and return the result as a string
1667
1668     @type  text: str
1669     @param text: initial text
1670
1671     @type  suffix: str
1672     @param suffix: extension for temporary file
1673
1674     @rtype:  str
1675     @return: string with the edited text
1676     """
1677     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1678     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1679     try:
1680         print >>tmp, text,
1681         tmp.close()
1682         daklib.daksubprocess.check_call([editor, tmp.name])
1683         return open(tmp.name, 'r').read()
1684     finally:
1685         os.unlink(tmp.name)
1686
1687 ################################################################################
1688
1689 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1690     dbsuite = get_suite(suite, session)
1691     overridesuite = dbsuite
1692     if dbsuite.overridesuite is not None:
1693         overridesuite = get_suite(dbsuite.overridesuite, session)
1694     dep_problem = 0
1695     p2c = {}
1696     all_broken = {}
1697     if arches:
1698         all_arches = set(arches)
1699     else:
1700         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1701     all_arches -= set(["source", "all"])
1702     metakey_d = get_or_set_metadatakey("Depends", session)
1703     metakey_p = get_or_set_metadatakey("Provides", session)
1704     params = {
1705         'suite_id':     dbsuite.suite_id,
1706         'metakey_d_id': metakey_d.key_id,
1707         'metakey_p_id': metakey_p.key_id,
1708     }
1709     for architecture in all_arches | set(['all']):
1710         deps = {}
1711         sources = {}
1712         virtual_packages = {}
1713         params['arch_id'] = get_architecture(architecture, session).arch_id
1714
1715         statement = '''
1716             SELECT b.id, b.package, s.source, c.name as component,
1717                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1718                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1719                 FROM binaries b
1720                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1721                 JOIN source s ON b.source = s.id
1722                 JOIN files_archive_map af ON b.file = af.file_id
1723                 JOIN component c ON af.component_id = c.id
1724                 WHERE b.architecture = :arch_id'''
1725         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1726             from_statement(statement).params(params)
1727         for binary_id, package, source, component, depends, provides in query:
1728             sources[package] = source
1729             p2c[package] = component
1730             if depends is not None:
1731                 deps[package] = depends
1732             # Maintain a counter for each virtual package.  If a
1733             # Provides: exists, set the counter to 0 and count all
1734             # provides by a package not in the list for removal.
1735             # If the counter stays 0 at the end, we know that only
1736             # the to-be-removed packages provided this virtual
1737             # package.
1738             if provides is not None:
1739                 for virtual_pkg in provides.split(","):
1740                     virtual_pkg = virtual_pkg.strip()
1741                     if virtual_pkg == package: continue
1742                     if not virtual_packages.has_key(virtual_pkg):
1743                         virtual_packages[virtual_pkg] = 0
1744                     if package not in removals:
1745                         virtual_packages[virtual_pkg] += 1
1746
1747         # If a virtual package is only provided by the to-be-removed
1748         # packages, treat the virtual package as to-be-removed too.
1749         for virtual_pkg in virtual_packages.keys():
1750             if virtual_packages[virtual_pkg] == 0:
1751                 removals.append(virtual_pkg)
1752
1753         # Check binary dependencies (Depends)
1754         for package in deps.keys():
1755             if package in removals: continue
1756             parsed_dep = []
1757             try:
1758                 parsed_dep += apt_pkg.parse_depends(deps[package])
1759             except ValueError as e:
1760                 print "Error for package %s: %s" % (package, e)
1761             for dep in parsed_dep:
1762                 # Check for partial breakage.  If a package has a ORed
1763                 # dependency, there is only a dependency problem if all
1764                 # packages in the ORed depends will be removed.
1765                 unsat = 0
1766                 for dep_package, _, _ in dep:
1767                     if dep_package in removals:
1768                         unsat += 1
1769                 if unsat == len(dep):
1770                     component = p2c[package]
1771                     source = sources[package]
1772                     if component != "main":
1773                         source = "%s/%s" % (source, component)
1774                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1775                     dep_problem = 1
1776
1777     if all_broken:
1778         if cruft:
1779             print "  - broken Depends:"
1780         else:
1781             print "# Broken Depends:"
1782         for source, bindict in sorted(all_broken.items()):
1783             lines = []
1784             for binary, arches in sorted(bindict.items()):
1785                 if arches == all_arches or 'all' in arches:
1786                     lines.append(binary)
1787                 else:
1788                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1789             if cruft:
1790                 print '    %s: %s' % (source, lines[0])
1791             else:
1792                 print '%s: %s' % (source, lines[0])
1793             for line in lines[1:]:
1794                 if cruft:
1795                     print '    ' + ' ' * (len(source) + 2) + line
1796                 else:
1797                     print ' ' * (len(source) + 2) + line
1798         if not cruft:
1799             print
1800
1801     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1802     all_broken.clear()
1803     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1804     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1805     params = {
1806         'suite_id':    dbsuite.suite_id,
1807         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1808     }
1809     statement = '''
1810         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1811            FROM source s
1812            JOIN source_metadata sm ON s.id = sm.src_id
1813            WHERE s.id in
1814                (SELECT source FROM src_associations
1815                    WHERE suite = :suite_id)
1816                AND sm.key_id in :metakey_ids
1817            GROUP BY s.id, s.source'''
1818     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1819         params(params)
1820     for source_id, source, build_dep in query:
1821         if source in removals: continue
1822         parsed_dep = []
1823         if build_dep is not None:
1824             # Remove [arch] information since we want to see breakage on all arches
1825             build_dep = re_build_dep_arch.sub("", build_dep)
1826             try:
1827                 parsed_dep += apt_pkg.parse_depends(build_dep)
1828             except ValueError as e:
1829                 print "Error for source %s: %s" % (source, e)
1830         for dep in parsed_dep:
1831             unsat = 0
1832             for dep_package, _, _ in dep:
1833                 if dep_package in removals:
1834                     unsat += 1
1835             if unsat == len(dep):
1836                 component, = session.query(Component.component_name) \
1837                     .join(Component.overrides) \
1838                     .filter(Override.suite == overridesuite) \
1839                     .filter(Override.package == re.sub('/(contrib|non-free)$', '', source)) \
1840                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1841                     .first()
1842                 key = source
1843                 if component != "main":
1844                     key = "%s/%s" % (source, component)
1845                 all_broken.setdefault(key, set()).add(pp_deps(dep))
1846                 dep_problem = 1
1847
1848     if all_broken:
1849         if cruft:
1850             print "  - broken Build-Depends:"
1851         else:
1852             print "# Broken Build-Depends:"
1853         for source, bdeps in sorted(all_broken.items()):
1854             bdeps = sorted(bdeps)
1855             if cruft:
1856                 print '    %s: %s' % (source, bdeps[0])
1857             else:
1858                 print '%s: %s' % (source, bdeps[0])
1859             for bdep in bdeps[1:]:
1860                 if cruft:
1861                     print '    ' + ' ' * (len(source) + 2) + bdep
1862                 else:
1863                     print ' ' * (len(source) + 2) + bdep
1864         if not cruft:
1865             print
1866
1867     return dep_problem