daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import grp
  31 import select
  32 import socket
  33 import shutil
  34 import sys
  35 import tempfile
  36 import traceback
  37 import stat
  38 import apt_inst
  39 import apt_pkg
  40 import time
  41 import re
  42 import email as modemail
  43 import subprocess
  44 import ldap
  45 import errno
  46
  47 import daklib.config as config
  48 import daklib.daksubprocess
  49 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  50                    get_override_type, Keyring, session_wrapper, \
  51                    get_active_keyring_paths, get_primary_keyring_path, \
  52                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  53                    Component, Override, OverrideType
  54 from sqlalchemy import desc
  55 from dak_exceptions import *
  56 from gpg import SignedFile
  57 from textutils import fix_maintainer
  58 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  59                     re_multi_line_field, re_srchasver, re_taint_free, \
  60                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  61                     re_is_orig_source, re_build_dep_arch
  62
  63 from formats import parse_format, validate_changes_format
  64 from srcformats import get_format_from_string
  65 from collections import defaultdict
  66
  67 ################################################################################
  68
  69 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  70
  71 alias_cache = None        #: Cache for email alias checks
  72 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  73
  74 # (hashname, function, earliest_changes_version)
  75 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  76                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  77
  78 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  79 # code in lenny's Python. This also affects commands.getoutput and
  80 # commands.getstatus.
  81 def dak_getstatusoutput(cmd):
  82     pipe = daklib.daksubprocess.Popen(cmd, shell=True, universal_newlines=True,
  83         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  84
  85     output = pipe.stdout.read()
  86
  87     pipe.wait()
  88
  89     if output[-1:] == '\n':
  90         output = output[:-1]
  91
  92     ret = pipe.wait()
  93     if ret is None:
  94         ret = 0
  95
  96     return ret, output
  97 commands.getstatusoutput = dak_getstatusoutput
  98
  99 ################################################################################
 100
 101 def html_escape(s):
 102     """ Escape html chars """
 103     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 104
 105 ################################################################################
 106
 107 def open_file(filename, mode='r'):
 108     """
 109     Open C{file}, return fileobject.
 110
 111     @type filename: string
 112     @param filename: path/filename to open
 113
 114     @type mode: string
 115     @param mode: open mode
 116
 117     @rtype: fileobject
 118     @return: open fileobject
 119
 120     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 121
 122     """
 123     try:
 124         f = open(filename, mode)
 125     except IOError:
 126         raise CantOpenError(filename)
 127     return f
 128
 129 ################################################################################
 130
 131 def our_raw_input(prompt=""):
 132     if prompt:
 133         while 1:
 134             try:
 135                 sys.stdout.write(prompt)
 136                 break
 137             except IOError:
 138                 pass
 139     sys.stdout.flush()
 140     try:
 141         ret = raw_input()
 142         return ret
 143     except EOFError:
 144         sys.stderr.write("\nUser interrupt (^D).\n")
 145         raise SystemExit
 146
 147 ################################################################################
 148
 149 def extract_component_from_section(section, session=None):
 150     component = ""
 151
 152     if section.find('/') != -1:
 153         component = section.split('/')[0]
 154
 155     # Expand default component
 156     if component == "":
 157         comp = get_component(section, session)
 158         if comp is None:
 159             component = "main"
 160         else:
 161             component = comp.component_name
 162
 163     return (section, component)
 164
 165 ################################################################################
 166
 167 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 168     require_signature = True
 169     if keyrings == None:
 170         keyrings = []
 171         require_signature = False
 172
 173     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 174     contents = signed_file.contents
 175
 176     error = ""
 177     changes = {}
 178
 179     # Split the lines in the input, keeping the linebreaks.
 180     lines = contents.splitlines(True)
 181
 182     if len(lines) == 0:
 183         raise ParseChangesError("[Empty changes file]")
 184
 185     # Reindex by line number so we can easily verify the format of
 186     # .dsc files...
 187     index = 0
 188     indexed_lines = {}
 189     for line in lines:
 190         index += 1
 191         indexed_lines[index] = line[:-1]
 192
 193     num_of_lines = len(indexed_lines.keys())
 194     index = 0
 195     first = -1
 196     while index < num_of_lines:
 197         index += 1
 198         line = indexed_lines[index]
 199         if line == "" and signing_rules == 1:
 200             if index != num_of_lines:
 201                 raise InvalidDscError(index)
 202             break
 203         slf = re_single_line_field.match(line)
 204         if slf:
 205             field = slf.groups()[0].lower()
 206             changes[field] = slf.groups()[1]
 207             first = 1
 208             continue
 209         if line == " .":
 210             changes[field] += '\n'
 211             continue
 212         mlf = re_multi_line_field.match(line)
 213         if mlf:
 214             if first == -1:
 215                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 216             if first == 1 and changes[field] != "":
 217                 changes[field] += '\n'
 218             first = 0
 219             changes[field] += mlf.groups()[0] + '\n'
 220             continue
 221         error += line
 222
 223     changes["filecontents"] = armored_contents
 224
 225     if changes.has_key("source"):
 226         # Strip the source version in brackets from the source field,
 227         # put it in the "source-version" field instead.
 228         srcver = re_srchasver.search(changes["source"])
 229         if srcver:
 230             changes["source"] = srcver.group(1)
 231             changes["source-version"] = srcver.group(2)
 232
 233     if error:
 234         raise ParseChangesError(error)
 235
 236     return changes
 237
 238 ################################################################################
 239
 240 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 241     """
 242     Parses a changes file and returns a dictionary where each field is a
 243     key.  The mandatory first argument is the filename of the .changes
 244     file.
 245
 246     signing_rules is an optional argument:
 247
 248       - If signing_rules == -1, no signature is required.
 249       - If signing_rules == 0 (the default), a signature is required.
 250       - If signing_rules == 1, it turns on the same strict format checking
 251         as dpkg-source.
 252
 253     The rules for (signing_rules == 1)-mode are:
 254
 255       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 256         followed by any PGP header data and must end with a blank line.
 257
 258       - The data section must end with a blank line and must be followed by
 259         "-----BEGIN PGP SIGNATURE-----".
 260     """
 261
 262     changes_in = open_file(filename)
 263     content = changes_in.read()
 264     changes_in.close()
 265     try:
 266         unicode(content, 'utf-8')
 267     except UnicodeError:
 268         raise ChangesUnicodeError("Changes file not proper utf-8")
 269     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 270
 271
 272     if not dsc_file:
 273         # Finally ensure that everything needed for .changes is there
 274         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 275                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 276
 277         missingfields=[]
 278         for keyword in must_keywords:
 279             if not changes.has_key(keyword.lower()):
 280                 missingfields.append(keyword)
 281
 282                 if len(missingfields):
 283                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 284
 285     return changes
 286
 287 ################################################################################
 288
 289 def hash_key(hashname):
 290     return '%ssum' % hashname
 291
 292 ################################################################################
 293
 294 def create_hash(where, files, hashname, hashfunc):
 295     """
 296     create_hash extends the passed files dict with the given hash by
 297     iterating over all files on disk and passing them to the hashing
 298     function given.
 299     """
 300
 301     rejmsg = []
 302     for f in files.keys():
 303         try:
 304             file_handle = open_file(f)
 305         except CantOpenError:
 306             rejmsg.append("Could not open file %s for checksumming" % (f))
 307             continue
 308
 309         files[f][hash_key(hashname)] = hashfunc(file_handle)
 310
 311         file_handle.close()
 312     return rejmsg
 313
 314 ################################################################################
 315
 316 def check_hash(where, files, hashname, hashfunc):
 317     """
 318     check_hash checks the given hash in the files dict against the actual
 319     files on disk.  The hash values need to be present consistently in
 320     all file entries.  It does not modify its input in any way.
 321     """
 322
 323     rejmsg = []
 324     for f in files.keys():
 325         file_handle = None
 326         try:
 327             try:
 328                 file_handle = open_file(f)
 329
 330                 # Check for the hash entry, to not trigger a KeyError.
 331                 if not files[f].has_key(hash_key(hashname)):
 332                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 333                         where))
 334                     continue
 335
 336                 # Actually check the hash for correctness.
 337                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 338                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 339                         where))
 340             except CantOpenError:
 341                 # TODO: This happens when the file is in the pool.
 342                 # warn("Cannot open file %s" % f)
 343                 continue
 344         finally:
 345             if file_handle:
 346                 file_handle.close()
 347     return rejmsg
 348
 349 ################################################################################
 350
 351 def check_size(where, files):
 352     """
 353     check_size checks the file sizes in the passed files dict against the
 354     files on disk.
 355     """
 356
 357     rejmsg = []
 358     for f in files.keys():
 359         try:
 360             entry = os.stat(f)
 361         except OSError as exc:
 362             if exc.errno == errno.ENOENT:
 363                 # TODO: This happens when the file is in the pool.
 364                 continue
 365             raise
 366
 367         actual_size = entry[stat.ST_SIZE]
 368         size = int(files[f]["size"])
 369         if size != actual_size:
 370             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 371                    % (f, actual_size, size, where))
 372     return rejmsg
 373
 374 ################################################################################
 375
 376 def check_dsc_files(dsc_filename, dsc, dsc_files):
 377     """
 378     Verify that the files listed in the Files field of the .dsc are
 379     those expected given the announced Format.
 380
 381     @type dsc_filename: string
 382     @param dsc_filename: path of .dsc file
 383
 384     @type dsc: dict
 385     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 386
 387     @type dsc_files: dict
 388     @param dsc_files: the file list returned by C{build_file_list()}
 389
 390     @rtype: list
 391     @return: all errors detected
 392     """
 393     rejmsg = []
 394
 395     # Ensure .dsc lists proper set of source files according to the format
 396     # announced
 397     has = defaultdict(lambda: 0)
 398
 399     ftype_lookup = (
 400         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 401         (r'diff.gz',                   ('debian_diff',)),
 402         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 403         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 404         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 405         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 406         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 407     )
 408
 409     for f in dsc_files:
 410         m = re_issource.match(f)
 411         if not m:
 412             rejmsg.append("%s: %s in Files field not recognised as source."
 413                           % (dsc_filename, f))
 414             continue
 415
 416         # Populate 'has' dictionary by resolving keys in lookup table
 417         matched = False
 418         for regex, keys in ftype_lookup:
 419             if re.match(regex, m.group(3)):
 420                 matched = True
 421                 for key in keys:
 422                     has[key] += 1
 423                 break
 424
 425         # File does not match anything in lookup table; reject
 426         if not matched:
 427             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 428
 429     # Check for multiple files
 430     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 431         if has[file_type] > 1:
 432             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 433
 434     # Source format specific tests
 435     try:
 436         format = get_format_from_string(dsc['format'])
 437         rejmsg.extend([
 438             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 439         ])
 440
 441     except UnknownFormatError:
 442         # Not an error here for now
 443         pass
 444
 445     return rejmsg
 446
 447 ################################################################################
 448
 449 def check_hash_fields(what, manifest):
 450     """
 451     check_hash_fields ensures that there are no checksum fields in the
 452     given dict that we do not know about.
 453     """
 454
 455     rejmsg = []
 456     hashes = map(lambda x: x[0], known_hashes)
 457     for field in manifest:
 458         if field.startswith("checksums-"):
 459             hashname = field.split("-",1)[1]
 460             if hashname not in hashes:
 461                 rejmsg.append("Unsupported checksum field for %s "\
 462                     "in %s" % (hashname, what))
 463     return rejmsg
 464
 465 ################################################################################
 466
 467 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 468     if format >= version:
 469         # The version should contain the specified hash.
 470         func = check_hash
 471
 472         # Import hashes from the changes
 473         rejmsg = parse_checksums(".changes", files, changes, hashname)
 474         if len(rejmsg) > 0:
 475             return rejmsg
 476     else:
 477         # We need to calculate the hash because it can't possibly
 478         # be in the file.
 479         func = create_hash
 480     return func(".changes", files, hashname, hashfunc)
 481
 482 # We could add the orig which might be in the pool to the files dict to
 483 # access the checksums easily.
 484
 485 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 486     """
 487     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 488     in the dsc is correct, i.e. identical to the changes file and if necessary
 489     the pool.  The latter task is delegated to check_hash.
 490     """
 491
 492     rejmsg = []
 493     if not dsc.has_key('Checksums-%s' % (hashname,)):
 494         return rejmsg
 495     # Import hashes from the dsc
 496     parse_checksums(".dsc", dsc_files, dsc, hashname)
 497     # And check it...
 498     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 499     return rejmsg
 500
 501 ################################################################################
 502
 503 def parse_checksums(where, files, manifest, hashname):
 504     rejmsg = []
 505     field = 'checksums-%s' % hashname
 506     if not field in manifest:
 507         return rejmsg
 508     for line in manifest[field].split('\n'):
 509         if not line:
 510             break
 511         clist = line.strip().split(' ')
 512         if len(clist) == 3:
 513             checksum, size, checkfile = clist
 514         else:
 515             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 516             continue
 517         if not files.has_key(checkfile):
 518         # TODO: check for the file's entry in the original files dict, not
 519         # the one modified by (auto)byhand and other weird stuff
 520         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 521         #        (file, hashname, where))
 522             continue
 523         if not files[checkfile]["size"] == size:
 524             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 525                 "in %s" % (checkfile, hashname, where))
 526             continue
 527         files[checkfile][hash_key(hashname)] = checksum
 528     for f in files.keys():
 529         if not files[f].has_key(hash_key(hashname)):
 530             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 531     return rejmsg
 532
 533 ################################################################################
 534
 535 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 536
 537 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 538     files = {}
 539
 540     # Make sure we have a Files: field to parse...
 541     if not changes.has_key(field):
 542         raise NoFilesFieldError
 543
 544     # Validate .changes Format: field
 545     if not is_a_dsc:
 546         validate_changes_format(parse_format(changes['format']), field)
 547
 548     includes_section = (not is_a_dsc) and field == "files"
 549
 550     # Parse each entry/line:
 551     for i in changes[field].split('\n'):
 552         if not i:
 553             break
 554         s = i.split()
 555         section = priority = ""
 556         try:
 557             if includes_section:
 558                 (md5, size, section, priority, name) = s
 559             else:
 560                 (md5, size, name) = s
 561         except ValueError:
 562             raise ParseChangesError(i)
 563
 564         if section == "":
 565             section = "-"
 566         if priority == "":
 567             priority = "-"
 568
 569         (section, component) = extract_component_from_section(section)
 570
 571         files[name] = dict(size=size, section=section,
 572                            priority=priority, component=component)
 573         files[name][hashname] = md5
 574
 575     return files
 576
 577 ################################################################################
 578
 579 # see http://bugs.debian.org/619131
 580 def build_package_list(dsc, session = None):
 581     if not dsc.has_key("package-list"):
 582         return {}
 583
 584     packages = {}
 585
 586     for line in dsc["package-list"].split("\n"):
 587         if not line:
 588             break
 589
 590         fields = line.split()
 591         name = fields[0]
 592         package_type = fields[1]
 593         (section, component) = extract_component_from_section(fields[2])
 594         priority = fields[3]
 595
 596         # Validate type if we have a session
 597         if session and get_override_type(package_type, session) is None:
 598             # Maybe just warn and ignore? exit(1) might be a bit hard...
 599             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 600
 601         if name not in packages or packages[name]["type"] == "dsc":
 602             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 603
 604     return packages
 605
 606 ################################################################################
 607
 608 def send_mail (message, filename="", whitelists=None):
 609     """sendmail wrapper, takes _either_ a message string or a file as arguments
 610
 611     @type  whitelists: list of (str or None)
 612     @param whitelists: path to whitelists. C{None} or an empty list whitelists
 613                        everything, otherwise an address is whitelisted if it is
 614                        included in any of the lists.
 615                        In addition a global whitelist can be specified in
 616                        Dinstall::MailWhiteList.
 617     """
 618
 619     maildir = Cnf.get('Dir::Mail')
 620     if maildir:
 621         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 622         path = find_next_free(path)
 623         fh = open(path, 'w')
 624         print >>fh, message,
 625         fh.close()
 626
 627     # Check whether we're supposed to be sending mail
 628     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 629         return
 630
 631     # If we've been passed a string dump it into a temporary file
 632     if message:
 633         (fd, filename) = tempfile.mkstemp()
 634         os.write (fd, message)
 635         os.close (fd)
 636
 637     if whitelists is None or None in whitelists:
 638         whitelists = []
 639     if Cnf.get('Dinstall::MailWhiteList', ''):
 640         whitelists.append(Cnf['Dinstall::MailWhiteList'])
 641     if len(whitelists) != 0:
 642         message_in = open_file(filename)
 643         message_raw = modemail.message_from_file(message_in)
 644         message_in.close();
 645
 646         whitelist = [];
 647         for path in whitelists:
 648           with open_file(path, 'r') as whitelist_in:
 649             for line in whitelist_in:
 650                 if not re_whitespace_comment.match(line):
 651                     if re_re_mark.match(line):
 652                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 653                     else:
 654                         whitelist.append(re.compile(re.escape(line.strip())))
 655
 656         # Fields to check.
 657         fields = ["To", "Bcc", "Cc"]
 658         for field in fields:
 659             # Check each field
 660             value = message_raw.get(field, None)
 661             if value != None:
 662                 match = [];
 663                 for item in value.split(","):
 664                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 665                     mail_whitelisted = 0
 666                     for wr in whitelist:
 667                         if wr.match(email):
 668                             mail_whitelisted = 1
 669                             break
 670                     if not mail_whitelisted:
 671                         print "Skipping {0} since it's not whitelisted".format(item)
 672                         continue
 673                     match.append(item)
 674
 675                 # Doesn't have any mail in whitelist so remove the header
 676                 if len(match) == 0:
 677                     del message_raw[field]
 678                 else:
 679                     message_raw.replace_header(field, ', '.join(match))
 680
 681         # Change message fields in order if we don't have a To header
 682         if not message_raw.has_key("To"):
 683             fields.reverse()
 684             for field in fields:
 685                 if message_raw.has_key(field):
 686                     message_raw[fields[-1]] = message_raw[field]
 687                     del message_raw[field]
 688                     break
 689             else:
 690                 # Clean up any temporary files
 691                 # and return, as we removed all recipients.
 692                 if message:
 693                     os.unlink (filename);
 694                 return;
 695
 696         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 697         os.write (fd, message_raw.as_string(True));
 698         os.close (fd);
 699
 700     # Invoke sendmail
 701     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 702     if (result != 0):
 703         raise SendmailFailedError(output)
 704
 705     # Clean up any temporary files
 706     if message:
 707         os.unlink (filename)
 708
 709 ################################################################################
 710
 711 def poolify (source, component=None):
 712     if source[:3] == "lib":
 713         return source[:4] + '/' + source + '/'
 714     else:
 715         return source[:1] + '/' + source + '/'
 716
 717 ################################################################################
 718
 719 def move (src, dest, overwrite = 0, perms = 0o664):
 720     if os.path.exists(dest) and os.path.isdir(dest):
 721         dest_dir = dest
 722     else:
 723         dest_dir = os.path.dirname(dest)
 724     if not os.path.lexists(dest_dir):
 725         umask = os.umask(00000)
 726         os.makedirs(dest_dir, 0o2775)
 727         os.umask(umask)
 728     #print "Moving %s to %s..." % (src, dest)
 729     if os.path.exists(dest) and os.path.isdir(dest):
 730         dest += '/' + os.path.basename(src)
 731     # Don't overwrite unless forced to
 732     if os.path.lexists(dest):
 733         if not overwrite:
 734             fubar("Can't move %s to %s - file already exists." % (src, dest))
 735         else:
 736             if not os.access(dest, os.W_OK):
 737                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 738     shutil.copy2(src, dest)
 739     os.chmod(dest, perms)
 740     os.unlink(src)
 741
 742 def copy (src, dest, overwrite = 0, perms = 0o664):
 743     if os.path.exists(dest) and os.path.isdir(dest):
 744         dest_dir = dest
 745     else:
 746         dest_dir = os.path.dirname(dest)
 747     if not os.path.exists(dest_dir):
 748         umask = os.umask(00000)
 749         os.makedirs(dest_dir, 0o2775)
 750         os.umask(umask)
 751     #print "Copying %s to %s..." % (src, dest)
 752     if os.path.exists(dest) and os.path.isdir(dest):
 753         dest += '/' + os.path.basename(src)
 754     # Don't overwrite unless forced to
 755     if os.path.lexists(dest):
 756         if not overwrite:
 757             raise FileExistsError
 758         else:
 759             if not os.access(dest, os.W_OK):
 760                 raise CantOverwriteError
 761     shutil.copy2(src, dest)
 762     os.chmod(dest, perms)
 763
 764 ################################################################################
 765
 766 def which_conf_file ():
 767     if os.getenv('DAK_CONFIG'):
 768         return os.getenv('DAK_CONFIG')
 769
 770     res = socket.getfqdn()
 771     # In case we allow local config files per user, try if one exists
 772     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 773         homedir = os.getenv("HOME")
 774         confpath = os.path.join(homedir, "/etc/dak.conf")
 775         if os.path.exists(confpath):
 776             apt_pkg.read_config_file_isc(Cnf,confpath)
 777
 778     # We are still in here, so there is no local config file or we do
 779     # not allow local files. Do the normal stuff.
 780     if Cnf.get("Config::" + res + "::DakConfig"):
 781         return Cnf["Config::" + res + "::DakConfig"]
 782
 783     return default_config
 784
 785 ################################################################################
 786
 787 def TemplateSubst(subst_map, filename):
 788     """ Perform a substition of template """
 789     templatefile = open_file(filename)
 790     template = templatefile.read()
 791     for k, v in subst_map.iteritems():
 792         template = template.replace(k, str(v))
 793     templatefile.close()
 794     return template
 795
 796 ################################################################################
 797
 798 def fubar(msg, exit_code=1):
 799     sys.stderr.write("E: %s\n" % (msg))
 800     sys.exit(exit_code)
 801
 802 def warn(msg):
 803     sys.stderr.write("W: %s\n" % (msg))
 804
 805 ################################################################################
 806
 807 # Returns the user name with a laughable attempt at rfc822 conformancy
 808 # (read: removing stray periods).
 809 def whoami ():
 810     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 811
 812 def getusername ():
 813     return pwd.getpwuid(os.getuid())[0]
 814
 815 ################################################################################
 816
 817 def size_type (c):
 818     t  = " B"
 819     if c > 10240:
 820         c = c / 1024
 821         t = " KB"
 822     if c > 10240:
 823         c = c / 1024
 824         t = " MB"
 825     return ("%d%s" % (c, t))
 826
 827 ################################################################################
 828
 829 def cc_fix_changes (changes):
 830     o = changes.get("architecture", "")
 831     if o:
 832         del changes["architecture"]
 833     changes["architecture"] = {}
 834     for j in o.split():
 835         changes["architecture"][j] = 1
 836
 837 def changes_compare (a, b):
 838     """ Sort by source name, source version, 'have source', and then by filename """
 839     try:
 840         a_changes = parse_changes(a)
 841     except:
 842         return -1
 843
 844     try:
 845         b_changes = parse_changes(b)
 846     except:
 847         return 1
 848
 849     cc_fix_changes (a_changes)
 850     cc_fix_changes (b_changes)
 851
 852     # Sort by source name
 853     a_source = a_changes.get("source")
 854     b_source = b_changes.get("source")
 855     q = cmp (a_source, b_source)
 856     if q:
 857         return q
 858
 859     # Sort by source version
 860     a_version = a_changes.get("version", "0")
 861     b_version = b_changes.get("version", "0")
 862     q = apt_pkg.version_compare(a_version, b_version)
 863     if q:
 864         return q
 865
 866     # Sort by 'have source'
 867     a_has_source = a_changes["architecture"].get("source")
 868     b_has_source = b_changes["architecture"].get("source")
 869     if a_has_source and not b_has_source:
 870         return -1
 871     elif b_has_source and not a_has_source:
 872         return 1
 873
 874     # Fall back to sort by filename
 875     return cmp(a, b)
 876
 877 ################################################################################
 878
 879 def find_next_free (dest, too_many=100):
 880     extra = 0
 881     orig_dest = dest
 882     while os.path.lexists(dest) and extra < too_many:
 883         dest = orig_dest + '.' + repr(extra)
 884         extra += 1
 885     if extra >= too_many:
 886         raise NoFreeFilenameError
 887     return dest
 888
 889 ################################################################################
 890
 891 def result_join (original, sep = '\t'):
 892     resultlist = []
 893     for i in xrange(len(original)):
 894         if original[i] == None:
 895             resultlist.append("")
 896         else:
 897             resultlist.append(original[i])
 898     return sep.join(resultlist)
 899
 900 ################################################################################
 901
 902 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 903     out = ""
 904     for line in str.split('\n'):
 905         line = line.strip()
 906         if line or include_blank_lines:
 907             out += "%s%s\n" % (prefix, line)
 908     # Strip trailing new line
 909     if out:
 910         out = out[:-1]
 911     return out
 912
 913 ################################################################################
 914
 915 def validate_changes_file_arg(filename, require_changes=1):
 916     """
 917     'filename' is either a .changes or .dak file.  If 'filename' is a
 918     .dak file, it's changed to be the corresponding .changes file.  The
 919     function then checks if the .changes file a) exists and b) is
 920     readable and returns the .changes filename if so.  If there's a
 921     problem, the next action depends on the option 'require_changes'
 922     argument:
 923
 924       - If 'require_changes' == -1, errors are ignored and the .changes
 925         filename is returned.
 926       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 927       - If 'require_changes' == 1, a fatal error is raised.
 928
 929     """
 930     error = None
 931
 932     orig_filename = filename
 933     if filename.endswith(".dak"):
 934         filename = filename[:-4]+".changes"
 935
 936     if not filename.endswith(".changes"):
 937         error = "invalid file type; not a changes file"
 938     else:
 939         if not os.access(filename,os.R_OK):
 940             if os.path.exists(filename):
 941                 error = "permission denied"
 942             else:
 943                 error = "file not found"
 944
 945     if error:
 946         if require_changes == 1:
 947             fubar("%s: %s." % (orig_filename, error))
 948         elif require_changes == 0:
 949             warn("Skipping %s - %s" % (orig_filename, error))
 950             return None
 951         else: # We only care about the .dak file
 952             return filename
 953     else:
 954         return filename
 955
 956 ################################################################################
 957
 958 def real_arch(arch):
 959     return (arch != "source" and arch != "all")
 960
 961 ################################################################################
 962
 963 def join_with_commas_and(list):
 964     if len(list) == 0: return "nothing"
 965     if len(list) == 1: return list[0]
 966     return ", ".join(list[:-1]) + " and " + list[-1]
 967
 968 ################################################################################
 969
 970 def pp_deps (deps):
 971     pp_deps = []
 972     for atom in deps:
 973         (pkg, version, constraint) = atom
 974         if constraint:
 975             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 976         else:
 977             pp_dep = pkg
 978         pp_deps.append(pp_dep)
 979     return " |".join(pp_deps)
 980
 981 ################################################################################
 982
 983 def get_conf():
 984     return Cnf
 985
 986 ################################################################################
 987
 988 def parse_args(Options):
 989     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
 990     # XXX: This should go away and everything which calls it be converted
 991     #      to use SQLA properly.  For now, we'll just fix it not to use
 992     #      the old Pg interface though
 993     session = DBConn().session()
 994     # Process suite
 995     if Options["Suite"]:
 996         suite_ids_list = []
 997         for suitename in split_args(Options["Suite"]):
 998             suite = get_suite(suitename, session=session)
 999             if not suite or suite.suite_id is None:
1000                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1001             else:
1002                 suite_ids_list.append(suite.suite_id)
1003         if suite_ids_list:
1004             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1005         else:
1006             fubar("No valid suite given.")
1007     else:
1008         con_suites = ""
1009
1010     # Process component
1011     if Options["Component"]:
1012         component_ids_list = []
1013         for componentname in split_args(Options["Component"]):
1014             component = get_component(componentname, session=session)
1015             if component is None:
1016                 warn("component '%s' not recognised." % (componentname))
1017             else:
1018                 component_ids_list.append(component.component_id)
1019         if component_ids_list:
1020             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1021         else:
1022             fubar("No valid component given.")
1023     else:
1024         con_components = ""
1025
1026     # Process architecture
1027     con_architectures = ""
1028     check_source = 0
1029     if Options["Architecture"]:
1030         arch_ids_list = []
1031         for archname in split_args(Options["Architecture"]):
1032             if archname == "source":
1033                 check_source = 1
1034             else:
1035                 arch = get_architecture(archname, session=session)
1036                 if arch is None:
1037                     warn("architecture '%s' not recognised." % (archname))
1038                 else:
1039                     arch_ids_list.append(arch.arch_id)
1040         if arch_ids_list:
1041             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1042         else:
1043             if not check_source:
1044                 fubar("No valid architecture given.")
1045     else:
1046         check_source = 1
1047
1048     return (con_suites, con_architectures, con_components, check_source)
1049
1050 ################################################################################
1051
1052 def arch_compare_sw (a, b):
1053     """
1054     Function for use in sorting lists of architectures.
1055
1056     Sorts normally except that 'source' dominates all others.
1057     """
1058
1059     if a == "source" and b == "source":
1060         return 0
1061     elif a == "source":
1062         return -1
1063     elif b == "source":
1064         return 1
1065
1066     return cmp (a, b)
1067
1068 ################################################################################
1069
1070 def split_args (s, dwim=1):
1071     """
1072     Split command line arguments which can be separated by either commas
1073     or whitespace.  If dwim is set, it will complain about string ending
1074     in comma since this usually means someone did 'dak ls -a i386, m68k
1075     foo' or something and the inevitable confusion resulting from 'm68k'
1076     being treated as an argument is undesirable.
1077     """
1078
1079     if s.find(",") == -1:
1080         return s.split()
1081     else:
1082         if s[-1:] == "," and dwim:
1083             fubar("split_args: found trailing comma, spurious space maybe?")
1084         return s.split(",")
1085
1086 ################################################################################
1087
1088 def gpgv_get_status_output(cmd, status_read, status_write):
1089     """
1090     Our very own version of commands.getouputstatus(), hacked to support
1091     gpgv's status fd.
1092     """
1093
1094     cmd = ['/bin/sh', '-c', cmd]
1095     p2cread, p2cwrite = os.pipe()
1096     c2pread, c2pwrite = os.pipe()
1097     errout, errin = os.pipe()
1098     pid = os.fork()
1099     if pid == 0:
1100         # Child
1101         os.close(0)
1102         os.close(1)
1103         os.dup(p2cread)
1104         os.dup(c2pwrite)
1105         os.close(2)
1106         os.dup(errin)
1107         for i in range(3, 256):
1108             if i != status_write:
1109                 try:
1110                     os.close(i)
1111                 except:
1112                     pass
1113         try:
1114             os.execvp(cmd[0], cmd)
1115         finally:
1116             os._exit(1)
1117
1118     # Parent
1119     os.close(p2cread)
1120     os.dup2(c2pread, c2pwrite)
1121     os.dup2(errout, errin)
1122
1123     output = status = ""
1124     while 1:
1125         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1126         more_data = []
1127         for fd in i:
1128             r = os.read(fd, 8196)
1129             if len(r) > 0:
1130                 more_data.append(fd)
1131                 if fd == c2pwrite or fd == errin:
1132                     output += r
1133                 elif fd == status_read:
1134                     status += r
1135                 else:
1136                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1137         if not more_data:
1138             pid, exit_status = os.waitpid(pid, 0)
1139             try:
1140                 os.close(status_write)
1141                 os.close(status_read)
1142                 os.close(c2pread)
1143                 os.close(c2pwrite)
1144                 os.close(p2cwrite)
1145                 os.close(errin)
1146                 os.close(errout)
1147             except:
1148                 pass
1149             break
1150
1151     return output, status, exit_status
1152
1153 ################################################################################
1154
1155 def process_gpgv_output(status):
1156     # Process the status-fd output
1157     keywords = {}
1158     internal_error = ""
1159     for line in status.split('\n'):
1160         line = line.strip()
1161         if line == "":
1162             continue
1163         split = line.split()
1164         if len(split) < 2:
1165             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1166             continue
1167         (gnupg, keyword) = split[:2]
1168         if gnupg != "[GNUPG:]":
1169             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1170             continue
1171         args = split[2:]
1172         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1173             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1174             continue
1175         else:
1176             keywords[keyword] = args
1177
1178     return (keywords, internal_error)
1179
1180 ################################################################################
1181
1182 def retrieve_key (filename, keyserver=None, keyring=None):
1183     """
1184     Retrieve the key that signed 'filename' from 'keyserver' and
1185     add it to 'keyring'.  Returns nothing on success, or an error message
1186     on error.
1187     """
1188
1189     # Defaults for keyserver and keyring
1190     if not keyserver:
1191         keyserver = Cnf["Dinstall::KeyServer"]
1192     if not keyring:
1193         keyring = get_primary_keyring_path()
1194
1195     # Ensure the filename contains no shell meta-characters or other badness
1196     if not re_taint_free.match(filename):
1197         return "%s: tainted filename" % (filename)
1198
1199     # Invoke gpgv on the file
1200     status_read, status_write = os.pipe()
1201     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1202     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1203
1204     # Process the status-fd output
1205     (keywords, internal_error) = process_gpgv_output(status)
1206     if internal_error:
1207         return internal_error
1208
1209     if not keywords.has_key("NO_PUBKEY"):
1210         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1211
1212     fingerprint = keywords["NO_PUBKEY"][0]
1213     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1214     # it'll try to create a lockfile in /dev.  A better solution might
1215     # be a tempfile or something.
1216     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1217           % (Cnf["Dinstall::SigningKeyring"])
1218     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1219            % (keyring, keyserver, fingerprint)
1220     (result, output) = commands.getstatusoutput(cmd)
1221     if (result != 0):
1222         return "'%s' failed with exit code %s" % (cmd, result)
1223
1224     return ""
1225
1226 ################################################################################
1227
1228 def gpg_keyring_args(keyrings=None):
1229     if not keyrings:
1230         keyrings = get_active_keyring_paths()
1231
1232     return " ".join(["--keyring %s" % x for x in keyrings])
1233
1234 ################################################################################
1235 @session_wrapper
1236 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1237     """
1238     Check the signature of a file and return the fingerprint if the
1239     signature is valid or 'None' if it's not.  The first argument is the
1240     filename whose signature should be checked.  The second argument is a
1241     reject function and is called when an error is found.  The reject()
1242     function must allow for two arguments: the first is the error message,
1243     the second is an optional prefix string.  It's possible for reject()
1244     to be called more than once during an invocation of check_signature().
1245     The third argument is optional and is the name of the files the
1246     detached signature applies to.  The fourth argument is optional and is
1247     a *list* of keyrings to use.  'autofetch' can either be None, True or
1248     False.  If None, the default behaviour specified in the config will be
1249     used.
1250     """
1251
1252     rejects = []
1253
1254     # Ensure the filename contains no shell meta-characters or other badness
1255     if not re_taint_free.match(sig_filename):
1256         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1257         return (None, rejects)
1258
1259     if data_filename and not re_taint_free.match(data_filename):
1260         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1261         return (None, rejects)
1262
1263     if not keyrings:
1264         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1265
1266     # Autofetch the signing key if that's enabled
1267     if autofetch == None:
1268         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1269     if autofetch:
1270         error_msg = retrieve_key(sig_filename)
1271         if error_msg:
1272             rejects.append(error_msg)
1273             return (None, rejects)
1274
1275     # Build the command line
1276     status_read, status_write = os.pipe()
1277     cmd = "gpgv --status-fd %s %s %s %s" % (
1278         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1279
1280     # Invoke gpgv on the file
1281     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1282
1283     # Process the status-fd output
1284     (keywords, internal_error) = process_gpgv_output(status)
1285
1286     # If we failed to parse the status-fd output, let's just whine and bail now
1287     if internal_error:
1288         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1289         rejects.append(internal_error, "")
1290         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1291         return (None, rejects)
1292
1293     # Now check for obviously bad things in the processed output
1294     if keywords.has_key("KEYREVOKED"):
1295         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1296     if keywords.has_key("BADSIG"):
1297         rejects.append("bad signature on %s." % (sig_filename))
1298     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1299         rejects.append("failed to check signature on %s." % (sig_filename))
1300     if keywords.has_key("NO_PUBKEY"):
1301         args = keywords["NO_PUBKEY"]
1302         if len(args) >= 1:
1303             key = args[0]
1304         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1305     if keywords.has_key("BADARMOR"):
1306         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1307     if keywords.has_key("NODATA"):
1308         rejects.append("no signature found in %s." % (sig_filename))
1309     if keywords.has_key("EXPKEYSIG"):
1310         args = keywords["EXPKEYSIG"]
1311         if len(args) >= 1:
1312             key = args[0]
1313         rejects.append("Signature made by expired key 0x%s" % (key))
1314     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1315         args = keywords["KEYEXPIRED"]
1316         expiredate=""
1317         if len(args) >= 1:
1318             timestamp = args[0]
1319             if timestamp.count("T") == 0:
1320                 try:
1321                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1322                 except ValueError:
1323                     expiredate = "unknown (%s)" % (timestamp)
1324             else:
1325                 expiredate = timestamp
1326         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1327
1328     if len(rejects) > 0:
1329         return (None, rejects)
1330
1331     # Next check gpgv exited with a zero return code
1332     if exit_status:
1333         rejects.append("gpgv failed while checking %s." % (sig_filename))
1334         if status.strip():
1335             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1336         else:
1337             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1338         return (None, rejects)
1339
1340     # Sanity check the good stuff we expect
1341     if not keywords.has_key("VALIDSIG"):
1342         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1343     else:
1344         args = keywords["VALIDSIG"]
1345         if len(args) < 1:
1346             rejects.append("internal error while checking signature on %s." % (sig_filename))
1347         else:
1348             fingerprint = args[0]
1349     if not keywords.has_key("GOODSIG"):
1350         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1351     if not keywords.has_key("SIG_ID"):
1352         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1353
1354     # Finally ensure there's not something we don't recognise
1355     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1356                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1357                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1358
1359     for keyword in keywords.keys():
1360         if not known_keywords.has_key(keyword):
1361             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1362
1363     if len(rejects) > 0:
1364         return (None, rejects)
1365     else:
1366         return (fingerprint, [])
1367
1368 ################################################################################
1369
1370 def gpg_get_key_addresses(fingerprint):
1371     """retreive email addresses from gpg key uids for a given fingerprint"""
1372     addresses = key_uid_email_cache.get(fingerprint)
1373     if addresses != None:
1374         return addresses
1375     addresses = list()
1376     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1377                 % (gpg_keyring_args(), fingerprint)
1378     (result, output) = commands.getstatusoutput(cmd)
1379     if result == 0:
1380         for l in output.split('\n'):
1381             m = re_gpg_uid.match(l)
1382             if not m:
1383                 continue
1384             address = m.group(1)
1385             if address.endswith('@debian.org'):
1386                 # prefer @debian.org addresses
1387                 # TODO: maybe not hardcode the domain
1388                 addresses.insert(0, address)
1389             else:
1390                 addresses.append(m.group(1))
1391     key_uid_email_cache[fingerprint] = addresses
1392     return addresses
1393
1394 ################################################################################
1395
1396 def get_logins_from_ldap(fingerprint='*'):
1397     """retrieve login from LDAP linked to a given fingerprint"""
1398
1399     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1400     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1401     l = ldap.open(LDAPServer)
1402     l.simple_bind_s('','')
1403     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1404                        '(keyfingerprint=%s)' % fingerprint,
1405                        ['uid', 'keyfingerprint'])
1406     login = {}
1407     for elem in Attrs:
1408         login[elem[1]['keyFingerPrint'][0]] = elem[1]['uid'][0]
1409     return login
1410
1411 ################################################################################
1412
1413 def get_users_from_ldap():
1414     """retrieve login and user names from LDAP"""
1415
1416     LDAPDn = Cnf['Import-LDAP-Fingerprints::LDAPDn']
1417     LDAPServer = Cnf['Import-LDAP-Fingerprints::LDAPServer']
1418     l = ldap.open(LDAPServer)
1419     l.simple_bind_s('','')
1420     Attrs = l.search_s(LDAPDn, ldap.SCOPE_ONELEVEL,
1421                        '(uid=*)', ['uid', 'cn', 'mn', 'sn'])
1422     users = {}
1423     for elem in Attrs:
1424         elem = elem[1]
1425         name = []
1426         for k in ('cn', 'mn', 'sn'):
1427             try:
1428                 if elem[k][0] != '-':
1429                     name.append(elem[k][0])
1430             except KeyError:
1431                 pass
1432         users[' '.join(name)] = elem['uid'][0]
1433     return users
1434
1435 ################################################################################
1436
1437 def clean_symlink (src, dest, root):
1438     """
1439     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1440     Returns fixed 'src'
1441     """
1442     src = src.replace(root, '', 1)
1443     dest = dest.replace(root, '', 1)
1444     dest = os.path.dirname(dest)
1445     new_src = '../' * len(dest.split('/'))
1446     return new_src + src
1447
1448 ################################################################################
1449
1450 def temp_filename(directory=None, prefix="dak", suffix="", mode=None, group=None):
1451     """
1452     Return a secure and unique filename by pre-creating it.
1453
1454     @type directory: str
1455     @param directory: If non-null it will be the directory the file is pre-created in.
1456
1457     @type prefix: str
1458     @param prefix: The filename will be prefixed with this string
1459
1460     @type suffix: str
1461     @param suffix: The filename will end with this string
1462
1463     @type mode: str
1464     @param mode: If set the file will get chmodded to those permissions
1465
1466     @type group: str
1467     @param group: If set the file will get chgrped to the specified group.
1468
1469     @rtype: list
1470     @return: Returns a pair (fd, name)
1471     """
1472
1473     (tfd, tfname) = tempfile.mkstemp(suffix, prefix, directory)
1474     if mode:
1475         os.chmod(tfname, mode)
1476     if group:
1477         gid = grp.getgrnam(group).gr_gid
1478         os.chown(tfname, -1, gid)
1479     return (tfd, tfname)
1480
1481 ################################################################################
1482
1483 def temp_dirname(parent=None, prefix="dak", suffix="", mode=None, group=None):
1484     """
1485     Return a secure and unique directory by pre-creating it.
1486
1487     @type parent: str
1488     @param parent: If non-null it will be the directory the directory is pre-created in.
1489
1490     @type prefix: str
1491     @param prefix: The filename will be prefixed with this string
1492
1493     @type suffix: str
1494     @param suffix: The filename will end with this string
1495
1496     @type mode: str
1497     @param mode: If set the file will get chmodded to those permissions
1498
1499     @type group: str
1500     @param group: If set the file will get chgrped to the specified group.
1501
1502     @rtype: list
1503     @return: Returns a pair (fd, name)
1504
1505     """
1506
1507     tfname = tempfile.mkdtemp(suffix, prefix, parent)
1508     if mode:
1509         os.chmod(tfname, mode)
1510     if group:
1511         gid = grp.getgrnam(group).gr_gid
1512         os.chown(tfname, -1, gid)
1513     return tfname
1514
1515 ################################################################################
1516
1517 def is_email_alias(email):
1518     """ checks if the user part of the email is listed in the alias file """
1519     global alias_cache
1520     if alias_cache == None:
1521         aliasfn = which_alias_file()
1522         alias_cache = set()
1523         if aliasfn:
1524             for l in open(aliasfn):
1525                 alias_cache.add(l.split(':')[0])
1526     uid = email.split('@')[0]
1527     return uid in alias_cache
1528
1529 ################################################################################
1530
1531 def get_changes_files(from_dir):
1532     """
1533     Takes a directory and lists all .changes files in it (as well as chdir'ing
1534     to the directory; this is due to broken behaviour on the part of p-u/p-a
1535     when you're not in the right place)
1536
1537     Returns a list of filenames
1538     """
1539     try:
1540         # Much of the rest of p-u/p-a depends on being in the right place
1541         os.chdir(from_dir)
1542         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1543     except OSError as e:
1544         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1545
1546     return changes_files
1547
1548 ################################################################################
1549
1550 Cnf = config.Config().Cnf
1551
1552 ################################################################################
1553
1554 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1555     """
1556     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1557     Well, actually it parsed a local copy, but let's document the source
1558     somewhere ;)
1559
1560     returns a dict associating source package name with a list of open wnpp
1561     bugs (Yes, there might be more than one)
1562     """
1563
1564     line = []
1565     try:
1566         f = open(file)
1567         lines = f.readlines()
1568     except IOError as e:
1569         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1570         lines = []
1571     wnpp = {}
1572
1573     for line in lines:
1574         splited_line = line.split(": ", 1)
1575         if len(splited_line) > 1:
1576             wnpp[splited_line[0]] = splited_line[1].split("|")
1577
1578     for source in wnpp.keys():
1579         bugs = []
1580         for wnpp_bug in wnpp[source]:
1581             bug_no = re.search("(\d)+", wnpp_bug).group()
1582             if bug_no:
1583                 bugs.append(bug_no)
1584         wnpp[source] = bugs
1585     return wnpp
1586
1587 ################################################################################
1588
1589 def get_packages_from_ftp(root, suite, component, architecture):
1590     """
1591     Returns an object containing apt_pkg-parseable data collected by
1592     aggregating Packages.gz files gathered for each architecture.
1593
1594     @type root: string
1595     @param root: path to ftp archive root directory
1596
1597     @type suite: string
1598     @param suite: suite to extract files from
1599
1600     @type component: string
1601     @param component: component to extract files from
1602
1603     @type architecture: string
1604     @param architecture: architecture to extract files from
1605
1606     @rtype: TagFile
1607     @return: apt_pkg class containing package data
1608     """
1609     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1610     (fd, temp_file) = temp_filename()
1611     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1612     if (result != 0):
1613         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1614     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1615     if os.path.exists(filename):
1616         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1617         if (result != 0):
1618             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1619     packages = open_file(temp_file)
1620     Packages = apt_pkg.TagFile(packages)
1621     os.unlink(temp_file)
1622     return Packages
1623
1624 ################################################################################
1625
1626 def deb_extract_control(fh):
1627     """extract DEBIAN/control from a binary package"""
1628     return apt_inst.DebFile(fh).control.extractdata("control")
1629
1630 ################################################################################
1631
1632 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1633     """mail addresses to contact for an upload
1634
1635     @type  maintainer: str
1636     @param maintainer: Maintainer field of the .changes file
1637
1638     @type  changed_by: str
1639     @param changed_by: Changed-By field of the .changes file
1640
1641     @type  fingerprint: str
1642     @param fingerprint: fingerprint of the key used to sign the upload
1643
1644     @rtype:  list of str
1645     @return: list of RFC 2047-encoded mail addresses to contact regarding
1646              this upload
1647     """
1648     addresses = [maintainer]
1649     if changed_by != maintainer:
1650         addresses.append(changed_by)
1651
1652     fpr_addresses = gpg_get_key_addresses(fingerprint)
1653     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1654         addresses.append(fpr_addresses[0])
1655
1656     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1657     return encoded_addresses
1658
1659 ################################################################################
1660
1661 def call_editor(text="", suffix=".txt"):
1662     """run editor and return the result as a string
1663
1664     @type  text: str
1665     @param text: initial text
1666
1667     @type  suffix: str
1668     @param suffix: extension for temporary file
1669
1670     @rtype:  str
1671     @return: string with the edited text
1672     """
1673     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1674     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1675     try:
1676         print >>tmp, text,
1677         tmp.close()
1678         daklib.daksubprocess.check_call([editor, tmp.name])
1679         return open(tmp.name, 'r').read()
1680     finally:
1681         os.unlink(tmp.name)
1682
1683 ################################################################################
1684
1685 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1686     dbsuite = get_suite(suite, session)
1687     overridesuite = dbsuite
1688     if dbsuite.overridesuite is not None:
1689         overridesuite = get_suite(dbsuite.overridesuite, session)
1690     dep_problem = 0
1691     p2c = {}
1692     all_broken = {}
1693     if arches:
1694         all_arches = set(arches)
1695     else:
1696         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1697     all_arches -= set(["source", "all"])
1698     metakey_d = get_or_set_metadatakey("Depends", session)
1699     metakey_p = get_or_set_metadatakey("Provides", session)
1700     params = {
1701         'suite_id':     dbsuite.suite_id,
1702         'metakey_d_id': metakey_d.key_id,
1703         'metakey_p_id': metakey_p.key_id,
1704     }
1705     for architecture in all_arches | set(['all']):
1706         deps = {}
1707         sources = {}
1708         virtual_packages = {}
1709         params['arch_id'] = get_architecture(architecture, session).arch_id
1710
1711         statement = '''
1712             SELECT b.id, b.package, s.source, c.name as component,
1713                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1714                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1715                 FROM binaries b
1716                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1717                 JOIN source s ON b.source = s.id
1718                 JOIN files_archive_map af ON b.file = af.file_id
1719                 JOIN component c ON af.component_id = c.id
1720                 WHERE b.architecture = :arch_id'''
1721         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1722             from_statement(statement).params(params)
1723         for binary_id, package, source, component, depends, provides in query:
1724             sources[package] = source
1725             p2c[package] = component
1726             if depends is not None:
1727                 deps[package] = depends
1728             # Maintain a counter for each virtual package.  If a
1729             # Provides: exists, set the counter to 0 and count all
1730             # provides by a package not in the list for removal.
1731             # If the counter stays 0 at the end, we know that only
1732             # the to-be-removed packages provided this virtual
1733             # package.
1734             if provides is not None:
1735                 for virtual_pkg in provides.split(","):
1736                     virtual_pkg = virtual_pkg.strip()
1737                     if virtual_pkg == package: continue
1738                     if not virtual_packages.has_key(virtual_pkg):
1739                         virtual_packages[virtual_pkg] = 0
1740                     if package not in removals:
1741                         virtual_packages[virtual_pkg] += 1
1742
1743         # If a virtual package is only provided by the to-be-removed
1744         # packages, treat the virtual package as to-be-removed too.
1745         for virtual_pkg in virtual_packages.keys():
1746             if virtual_packages[virtual_pkg] == 0:
1747                 removals.append(virtual_pkg)
1748
1749         # Check binary dependencies (Depends)
1750         for package in deps.keys():
1751             if package in removals: continue
1752             parsed_dep = []
1753             try:
1754                 parsed_dep += apt_pkg.parse_depends(deps[package])
1755             except ValueError as e:
1756                 print "Error for package %s: %s" % (package, e)
1757             for dep in parsed_dep:
1758                 # Check for partial breakage.  If a package has a ORed
1759                 # dependency, there is only a dependency problem if all
1760                 # packages in the ORed depends will be removed.
1761                 unsat = 0
1762                 for dep_package, _, _ in dep:
1763                     if dep_package in removals:
1764                         unsat += 1
1765                 if unsat == len(dep):
1766                     component = p2c[package]
1767                     source = sources[package]
1768                     if component != "main":
1769                         source = "%s/%s" % (source, component)
1770                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1771                     dep_problem = 1
1772
1773     if all_broken:
1774         if cruft:
1775             print "  - broken Depends:"
1776         else:
1777             print "# Broken Depends:"
1778         for source, bindict in sorted(all_broken.items()):
1779             lines = []
1780             for binary, arches in sorted(bindict.items()):
1781                 if arches == all_arches or 'all' in arches:
1782                     lines.append(binary)
1783                 else:
1784                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1785             if cruft:
1786                 print '    %s: %s' % (source, lines[0])
1787             else:
1788                 print '%s: %s' % (source, lines[0])
1789             for line in lines[1:]:
1790                 if cruft:
1791                     print '    ' + ' ' * (len(source) + 2) + line
1792                 else:
1793                     print ' ' * (len(source) + 2) + line
1794         if not cruft:
1795             print
1796
1797     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1798     all_broken.clear()
1799     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1800     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1801     params = {
1802         'suite_id':    dbsuite.suite_id,
1803         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1804     }
1805     statement = '''
1806         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1807            FROM source s
1808            JOIN source_metadata sm ON s.id = sm.src_id
1809            WHERE s.id in
1810                (SELECT source FROM src_associations
1811                    WHERE suite = :suite_id)
1812                AND sm.key_id in :metakey_ids
1813            GROUP BY s.id, s.source'''
1814     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1815         params(params)
1816     for source_id, source, build_dep in query:
1817         if source in removals: continue
1818         parsed_dep = []
1819         if build_dep is not None:
1820             # Remove [arch] information since we want to see breakage on all arches
1821             build_dep = re_build_dep_arch.sub("", build_dep)
1822             try:
1823                 parsed_dep += apt_pkg.parse_depends(build_dep)
1824             except ValueError as e:
1825                 print "Error for source %s: %s" % (source, e)
1826         for dep in parsed_dep:
1827             unsat = 0
1828             for dep_package, _, _ in dep:
1829                 if dep_package in removals:
1830                     unsat += 1
1831             if unsat == len(dep):
1832                 component, = session.query(Component.component_name) \
1833                     .join(Component.overrides) \
1834                     .filter(Override.suite == overridesuite) \
1835                     .filter(Override.package == re.sub('/(contrib|non-free)$', '', source)) \
1836                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1837                     .first()
1838                 key = source
1839                 if component != "main":
1840                     key = "%s/%s" % (source, component)
1841                 all_broken.setdefault(key, set()).add(pp_deps(dep))
1842                 dep_problem = 1
1843
1844     if all_broken:
1845         if cruft:
1846             print "  - broken Build-Depends:"
1847         else:
1848             print "# Broken Build-Depends:"
1849         for source, bdeps in sorted(all_broken.items()):
1850             bdeps = sorted(bdeps)
1851             if cruft:
1852                 print '    %s: %s' % (source, bdeps[0])
1853             else:
1854                 print '%s: %s' % (source, bdeps[0])
1855             for bdep in bdeps[1:]:
1856                 if cruft:
1857                     print '    ' + ' ' * (len(source) + 2) + bdep
1858                 else:
1859                     print ' ' * (len(source) + 2) + bdep
1860         if not cruft:
1861             print
1862
1863     return dep_problem