daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import select
  31 import socket
  32 import shutil
  33 import sys
  34 import tempfile
  35 import traceback
  36 import stat
  37 import apt_inst
  38 import apt_pkg
  39 import time
  40 import re
  41 import email as modemail
  42 import subprocess
  43
  44 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  45                    get_override_type, Keyring, session_wrapper, \
  46                    get_active_keyring_paths, get_primary_keyring_path, \
  47                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  48                    Component, Override, OverrideType
  49 from sqlalchemy import desc
  50 from dak_exceptions import *
  51 from gpg import SignedFile
  52 from textutils import fix_maintainer
  53 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  54                     re_multi_line_field, re_srchasver, re_taint_free, \
  55                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  56                     re_is_orig_source, re_build_dep_arch
  57
  58 from formats import parse_format, validate_changes_format
  59 from srcformats import get_format_from_string
  60 from collections import defaultdict
  61
  62 ################################################################################
  63
  64 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  65 default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
  66
  67 alias_cache = None        #: Cache for email alias checks
  68 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  69
  70 # (hashname, function, earliest_changes_version)
  71 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  72                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  73
  74 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  75 # code in lenny's Python. This also affects commands.getoutput and
  76 # commands.getstatus.
  77 def dak_getstatusoutput(cmd):
  78     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  79         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  80
  81     output = pipe.stdout.read()
  82
  83     pipe.wait()
  84
  85     if output[-1:] == '\n':
  86         output = output[:-1]
  87
  88     ret = pipe.wait()
  89     if ret is None:
  90         ret = 0
  91
  92     return ret, output
  93 commands.getstatusoutput = dak_getstatusoutput
  94
  95 ################################################################################
  96
  97 def html_escape(s):
  98     """ Escape html chars """
  99     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 100
 101 ################################################################################
 102
 103 def open_file(filename, mode='r'):
 104     """
 105     Open C{file}, return fileobject.
 106
 107     @type filename: string
 108     @param filename: path/filename to open
 109
 110     @type mode: string
 111     @param mode: open mode
 112
 113     @rtype: fileobject
 114     @return: open fileobject
 115
 116     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 117
 118     """
 119     try:
 120         f = open(filename, mode)
 121     except IOError:
 122         raise CantOpenError(filename)
 123     return f
 124
 125 ################################################################################
 126
 127 def our_raw_input(prompt=""):
 128     if prompt:
 129         while 1:
 130             try:
 131                 sys.stdout.write(prompt)
 132                 break
 133             except IOError:
 134                 pass
 135     sys.stdout.flush()
 136     try:
 137         ret = raw_input()
 138         return ret
 139     except EOFError:
 140         sys.stderr.write("\nUser interrupt (^D).\n")
 141         raise SystemExit
 142
 143 ################################################################################
 144
 145 def extract_component_from_section(section, session=None):
 146     component = ""
 147
 148     if section.find('/') != -1:
 149         component = section.split('/')[0]
 150
 151     # Expand default component
 152     if component == "":
 153         comp = get_component(section, session)
 154         if comp is None:
 155             component = "main"
 156         else:
 157             component = comp.component_name
 158
 159     return (section, component)
 160
 161 ################################################################################
 162
 163 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 164     require_signature = True
 165     if keyrings == None:
 166         keyrings = []
 167         require_signature = False
 168
 169     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 170     contents = signed_file.contents
 171
 172     error = ""
 173     changes = {}
 174
 175     # Split the lines in the input, keeping the linebreaks.
 176     lines = contents.splitlines(True)
 177
 178     if len(lines) == 0:
 179         raise ParseChangesError("[Empty changes file]")
 180
 181     # Reindex by line number so we can easily verify the format of
 182     # .dsc files...
 183     index = 0
 184     indexed_lines = {}
 185     for line in lines:
 186         index += 1
 187         indexed_lines[index] = line[:-1]
 188
 189     num_of_lines = len(indexed_lines.keys())
 190     index = 0
 191     first = -1
 192     while index < num_of_lines:
 193         index += 1
 194         line = indexed_lines[index]
 195         if line == "" and signing_rules == 1:
 196             if index != num_of_lines:
 197                 raise InvalidDscError(index)
 198             break
 199         slf = re_single_line_field.match(line)
 200         if slf:
 201             field = slf.groups()[0].lower()
 202             changes[field] = slf.groups()[1]
 203             first = 1
 204             continue
 205         if line == " .":
 206             changes[field] += '\n'
 207             continue
 208         mlf = re_multi_line_field.match(line)
 209         if mlf:
 210             if first == -1:
 211                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 212             if first == 1 and changes[field] != "":
 213                 changes[field] += '\n'
 214             first = 0
 215             changes[field] += mlf.groups()[0] + '\n'
 216             continue
 217         error += line
 218
 219     changes["filecontents"] = armored_contents
 220
 221     if changes.has_key("source"):
 222         # Strip the source version in brackets from the source field,
 223         # put it in the "source-version" field instead.
 224         srcver = re_srchasver.search(changes["source"])
 225         if srcver:
 226             changes["source"] = srcver.group(1)
 227             changes["source-version"] = srcver.group(2)
 228
 229     if error:
 230         raise ParseChangesError(error)
 231
 232     return changes
 233
 234 ################################################################################
 235
 236 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 237     """
 238     Parses a changes file and returns a dictionary where each field is a
 239     key.  The mandatory first argument is the filename of the .changes
 240     file.
 241
 242     signing_rules is an optional argument:
 243
 244       - If signing_rules == -1, no signature is required.
 245       - If signing_rules == 0 (the default), a signature is required.
 246       - If signing_rules == 1, it turns on the same strict format checking
 247         as dpkg-source.
 248
 249     The rules for (signing_rules == 1)-mode are:
 250
 251       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 252         followed by any PGP header data and must end with a blank line.
 253
 254       - The data section must end with a blank line and must be followed by
 255         "-----BEGIN PGP SIGNATURE-----".
 256     """
 257
 258     changes_in = open_file(filename)
 259     content = changes_in.read()
 260     changes_in.close()
 261     try:
 262         unicode(content, 'utf-8')
 263     except UnicodeError:
 264         raise ChangesUnicodeError("Changes file not proper utf-8")
 265     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 266
 267
 268     if not dsc_file:
 269         # Finally ensure that everything needed for .changes is there
 270         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 271                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 272
 273         missingfields=[]
 274         for keyword in must_keywords:
 275             if not changes.has_key(keyword.lower()):
 276                 missingfields.append(keyword)
 277
 278                 if len(missingfields):
 279                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 280
 281     return changes
 282
 283 ################################################################################
 284
 285 def hash_key(hashname):
 286     return '%ssum' % hashname
 287
 288 ################################################################################
 289
 290 def create_hash(where, files, hashname, hashfunc):
 291     """
 292     create_hash extends the passed files dict with the given hash by
 293     iterating over all files on disk and passing them to the hashing
 294     function given.
 295     """
 296
 297     rejmsg = []
 298     for f in files.keys():
 299         try:
 300             file_handle = open_file(f)
 301         except CantOpenError:
 302             rejmsg.append("Could not open file %s for checksumming" % (f))
 303             continue
 304
 305         files[f][hash_key(hashname)] = hashfunc(file_handle)
 306
 307         file_handle.close()
 308     return rejmsg
 309
 310 ################################################################################
 311
 312 def check_hash(where, files, hashname, hashfunc):
 313     """
 314     check_hash checks the given hash in the files dict against the actual
 315     files on disk.  The hash values need to be present consistently in
 316     all file entries.  It does not modify its input in any way.
 317     """
 318
 319     rejmsg = []
 320     for f in files.keys():
 321         file_handle = None
 322         try:
 323             try:
 324                 file_handle = open_file(f)
 325
 326                 # Check for the hash entry, to not trigger a KeyError.
 327                 if not files[f].has_key(hash_key(hashname)):
 328                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 329                         where))
 330                     continue
 331
 332                 # Actually check the hash for correctness.
 333                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 334                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 335                         where))
 336             except CantOpenError:
 337                 # TODO: This happens when the file is in the pool.
 338                 # warn("Cannot open file %s" % f)
 339                 continue
 340         finally:
 341             if file_handle:
 342                 file_handle.close()
 343     return rejmsg
 344
 345 ################################################################################
 346
 347 def check_size(where, files):
 348     """
 349     check_size checks the file sizes in the passed files dict against the
 350     files on disk.
 351     """
 352
 353     rejmsg = []
 354     for f in files.keys():
 355         try:
 356             entry = os.stat(f)
 357         except OSError as exc:
 358             if exc.errno == 2:
 359                 # TODO: This happens when the file is in the pool.
 360                 continue
 361             raise
 362
 363         actual_size = entry[stat.ST_SIZE]
 364         size = int(files[f]["size"])
 365         if size != actual_size:
 366             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 367                    % (f, actual_size, size, where))
 368     return rejmsg
 369
 370 ################################################################################
 371
 372 def check_dsc_files(dsc_filename, dsc, dsc_files):
 373     """
 374     Verify that the files listed in the Files field of the .dsc are
 375     those expected given the announced Format.
 376
 377     @type dsc_filename: string
 378     @param dsc_filename: path of .dsc file
 379
 380     @type dsc: dict
 381     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 382
 383     @type dsc_files: dict
 384     @param dsc_files: the file list returned by C{build_file_list()}
 385
 386     @rtype: list
 387     @return: all errors detected
 388     """
 389     rejmsg = []
 390
 391     # Ensure .dsc lists proper set of source files according to the format
 392     # announced
 393     has = defaultdict(lambda: 0)
 394
 395     ftype_lookup = (
 396         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 397         (r'diff.gz',                   ('debian_diff',)),
 398         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 399         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 400         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 401         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 402         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 403     )
 404
 405     for f in dsc_files:
 406         m = re_issource.match(f)
 407         if not m:
 408             rejmsg.append("%s: %s in Files field not recognised as source."
 409                           % (dsc_filename, f))
 410             continue
 411
 412         # Populate 'has' dictionary by resolving keys in lookup table
 413         matched = False
 414         for regex, keys in ftype_lookup:
 415             if re.match(regex, m.group(3)):
 416                 matched = True
 417                 for key in keys:
 418                     has[key] += 1
 419                 break
 420
 421         # File does not match anything in lookup table; reject
 422         if not matched:
 423             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 424
 425     # Check for multiple files
 426     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 427         if has[file_type] > 1:
 428             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 429
 430     # Source format specific tests
 431     try:
 432         format = get_format_from_string(dsc['format'])
 433         rejmsg.extend([
 434             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 435         ])
 436
 437     except UnknownFormatError:
 438         # Not an error here for now
 439         pass
 440
 441     return rejmsg
 442
 443 ################################################################################
 444
 445 def check_hash_fields(what, manifest):
 446     """
 447     check_hash_fields ensures that there are no checksum fields in the
 448     given dict that we do not know about.
 449     """
 450
 451     rejmsg = []
 452     hashes = map(lambda x: x[0], known_hashes)
 453     for field in manifest:
 454         if field.startswith("checksums-"):
 455             hashname = field.split("-",1)[1]
 456             if hashname not in hashes:
 457                 rejmsg.append("Unsupported checksum field for %s "\
 458                     "in %s" % (hashname, what))
 459     return rejmsg
 460
 461 ################################################################################
 462
 463 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 464     if format >= version:
 465         # The version should contain the specified hash.
 466         func = check_hash
 467
 468         # Import hashes from the changes
 469         rejmsg = parse_checksums(".changes", files, changes, hashname)
 470         if len(rejmsg) > 0:
 471             return rejmsg
 472     else:
 473         # We need to calculate the hash because it can't possibly
 474         # be in the file.
 475         func = create_hash
 476     return func(".changes", files, hashname, hashfunc)
 477
 478 # We could add the orig which might be in the pool to the files dict to
 479 # access the checksums easily.
 480
 481 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 482     """
 483     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 484     in the dsc is correct, i.e. identical to the changes file and if necessary
 485     the pool.  The latter task is delegated to check_hash.
 486     """
 487
 488     rejmsg = []
 489     if not dsc.has_key('Checksums-%s' % (hashname,)):
 490         return rejmsg
 491     # Import hashes from the dsc
 492     parse_checksums(".dsc", dsc_files, dsc, hashname)
 493     # And check it...
 494     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 495     return rejmsg
 496
 497 ################################################################################
 498
 499 def parse_checksums(where, files, manifest, hashname):
 500     rejmsg = []
 501     field = 'checksums-%s' % hashname
 502     if not field in manifest:
 503         return rejmsg
 504     for line in manifest[field].split('\n'):
 505         if not line:
 506             break
 507         clist = line.strip().split(' ')
 508         if len(clist) == 3:
 509             checksum, size, checkfile = clist
 510         else:
 511             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 512             continue
 513         if not files.has_key(checkfile):
 514         # TODO: check for the file's entry in the original files dict, not
 515         # the one modified by (auto)byhand and other weird stuff
 516         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 517         #        (file, hashname, where))
 518             continue
 519         if not files[checkfile]["size"] == size:
 520             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 521                 "in %s" % (checkfile, hashname, where))
 522             continue
 523         files[checkfile][hash_key(hashname)] = checksum
 524     for f in files.keys():
 525         if not files[f].has_key(hash_key(hashname)):
 526             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 527     return rejmsg
 528
 529 ################################################################################
 530
 531 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 532
 533 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 534     files = {}
 535
 536     # Make sure we have a Files: field to parse...
 537     if not changes.has_key(field):
 538         raise NoFilesFieldError
 539
 540     # Validate .changes Format: field
 541     if not is_a_dsc:
 542         validate_changes_format(parse_format(changes['format']), field)
 543
 544     includes_section = (not is_a_dsc) and field == "files"
 545
 546     # Parse each entry/line:
 547     for i in changes[field].split('\n'):
 548         if not i:
 549             break
 550         s = i.split()
 551         section = priority = ""
 552         try:
 553             if includes_section:
 554                 (md5, size, section, priority, name) = s
 555             else:
 556                 (md5, size, name) = s
 557         except ValueError:
 558             raise ParseChangesError(i)
 559
 560         if section == "":
 561             section = "-"
 562         if priority == "":
 563             priority = "-"
 564
 565         (section, component) = extract_component_from_section(section)
 566
 567         files[name] = dict(size=size, section=section,
 568                            priority=priority, component=component)
 569         files[name][hashname] = md5
 570
 571     return files
 572
 573 ################################################################################
 574
 575 # see http://bugs.debian.org/619131
 576 def build_package_list(dsc, session = None):
 577     if not dsc.has_key("package-list"):
 578         return {}
 579
 580     packages = {}
 581
 582     for line in dsc["package-list"].split("\n"):
 583         if not line:
 584             break
 585
 586         fields = line.split()
 587         name = fields[0]
 588         package_type = fields[1]
 589         (section, component) = extract_component_from_section(fields[2])
 590         priority = fields[3]
 591
 592         # Validate type if we have a session
 593         if session and get_override_type(package_type, session) is None:
 594             # Maybe just warn and ignore? exit(1) might be a bit hard...
 595             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 596
 597         if name not in packages or packages[name]["type"] == "dsc":
 598             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 599
 600     return packages
 601
 602 ################################################################################
 603
 604 def send_mail (message, filename=""):
 605     """sendmail wrapper, takes _either_ a message string or a file as arguments"""
 606
 607     maildir = Cnf.get('Dir::Mail')
 608     if maildir:
 609         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 610         path = find_next_free(path)
 611         fh = open(path, 'w')
 612         print >>fh, message,
 613         fh.close()
 614
 615     # Check whether we're supposed to be sending mail
 616     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 617         return
 618
 619     # If we've been passed a string dump it into a temporary file
 620     if message:
 621         (fd, filename) = tempfile.mkstemp()
 622         os.write (fd, message)
 623         os.close (fd)
 624
 625     if Cnf.has_key("Dinstall::MailWhiteList") and \
 626            Cnf["Dinstall::MailWhiteList"] != "":
 627         message_in = open_file(filename)
 628         message_raw = modemail.message_from_file(message_in)
 629         message_in.close();
 630
 631         whitelist = [];
 632         whitelist_in = open_file(Cnf["Dinstall::MailWhiteList"])
 633         try:
 634             for line in whitelist_in:
 635                 if not re_whitespace_comment.match(line):
 636                     if re_re_mark.match(line):
 637                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 638                     else:
 639                         whitelist.append(re.compile(re.escape(line.strip())))
 640         finally:
 641             whitelist_in.close()
 642
 643         # Fields to check.
 644         fields = ["To", "Bcc", "Cc"]
 645         for field in fields:
 646             # Check each field
 647             value = message_raw.get(field, None)
 648             if value != None:
 649                 match = [];
 650                 for item in value.split(","):
 651                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 652                     mail_whitelisted = 0
 653                     for wr in whitelist:
 654                         if wr.match(email):
 655                             mail_whitelisted = 1
 656                             break
 657                     if not mail_whitelisted:
 658                         print "Skipping %s since it's not in %s" % (item, Cnf["Dinstall::MailWhiteList"])
 659                         continue
 660                     match.append(item)
 661
 662                 # Doesn't have any mail in whitelist so remove the header
 663                 if len(match) == 0:
 664                     del message_raw[field]
 665                 else:
 666                     message_raw.replace_header(field, ', '.join(match))
 667
 668         # Change message fields in order if we don't have a To header
 669         if not message_raw.has_key("To"):
 670             fields.reverse()
 671             for field in fields:
 672                 if message_raw.has_key(field):
 673                     message_raw[fields[-1]] = message_raw[field]
 674                     del message_raw[field]
 675                     break
 676             else:
 677                 # Clean up any temporary files
 678                 # and return, as we removed all recipients.
 679                 if message:
 680                     os.unlink (filename);
 681                 return;
 682
 683         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 684         os.write (fd, message_raw.as_string(True));
 685         os.close (fd);
 686
 687     # Invoke sendmail
 688     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 689     if (result != 0):
 690         raise SendmailFailedError(output)
 691
 692     # Clean up any temporary files
 693     if message:
 694         os.unlink (filename)
 695
 696 ################################################################################
 697
 698 def poolify (source, component=None):
 699     if source[:3] == "lib":
 700         return source[:4] + '/' + source + '/'
 701     else:
 702         return source[:1] + '/' + source + '/'
 703
 704 ################################################################################
 705
 706 def move (src, dest, overwrite = 0, perms = 0o664):
 707     if os.path.exists(dest) and os.path.isdir(dest):
 708         dest_dir = dest
 709     else:
 710         dest_dir = os.path.dirname(dest)
 711     if not os.path.exists(dest_dir):
 712         umask = os.umask(00000)
 713         os.makedirs(dest_dir, 0o2775)
 714         os.umask(umask)
 715     #print "Moving %s to %s..." % (src, dest)
 716     if os.path.exists(dest) and os.path.isdir(dest):
 717         dest += '/' + os.path.basename(src)
 718     # Don't overwrite unless forced to
 719     if os.path.exists(dest):
 720         if not overwrite:
 721             fubar("Can't move %s to %s - file already exists." % (src, dest))
 722         else:
 723             if not os.access(dest, os.W_OK):
 724                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 725     shutil.copy2(src, dest)
 726     os.chmod(dest, perms)
 727     os.unlink(src)
 728
 729 def copy (src, dest, overwrite = 0, perms = 0o664):
 730     if os.path.exists(dest) and os.path.isdir(dest):
 731         dest_dir = dest
 732     else:
 733         dest_dir = os.path.dirname(dest)
 734     if not os.path.exists(dest_dir):
 735         umask = os.umask(00000)
 736         os.makedirs(dest_dir, 0o2775)
 737         os.umask(umask)
 738     #print "Copying %s to %s..." % (src, dest)
 739     if os.path.exists(dest) and os.path.isdir(dest):
 740         dest += '/' + os.path.basename(src)
 741     # Don't overwrite unless forced to
 742     if os.path.exists(dest):
 743         if not overwrite:
 744             raise FileExistsError
 745         else:
 746             if not os.access(dest, os.W_OK):
 747                 raise CantOverwriteError
 748     shutil.copy2(src, dest)
 749     os.chmod(dest, perms)
 750
 751 ################################################################################
 752
 753 def where_am_i ():
 754     res = socket.getfqdn()
 755     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 756     if database_hostname:
 757         return database_hostname
 758     else:
 759         return res
 760
 761 def which_conf_file ():
 762     if os.getenv('DAK_CONFIG'):
 763         return os.getenv('DAK_CONFIG')
 764
 765     res = socket.getfqdn()
 766     # In case we allow local config files per user, try if one exists
 767     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 768         homedir = os.getenv("HOME")
 769         confpath = os.path.join(homedir, "/etc/dak.conf")
 770         if os.path.exists(confpath):
 771             apt_pkg.ReadConfigFileISC(Cnf,confpath)
 772
 773     # We are still in here, so there is no local config file or we do
 774     # not allow local files. Do the normal stuff.
 775     if Cnf.get("Config::" + res + "::DakConfig"):
 776         return Cnf["Config::" + res + "::DakConfig"]
 777
 778     return default_config
 779
 780 def which_apt_conf_file ():
 781     res = socket.getfqdn()
 782     # In case we allow local config files per user, try if one exists
 783     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 784         homedir = os.getenv("HOME")
 785         confpath = os.path.join(homedir, "/etc/dak.conf")
 786         if os.path.exists(confpath):
 787             apt_pkg.ReadConfigFileISC(Cnf,default_config)
 788
 789     if Cnf.get("Config::" + res + "::AptConfig"):
 790         return Cnf["Config::" + res + "::AptConfig"]
 791     else:
 792         return default_apt_config
 793
 794 def which_alias_file():
 795     hostname = socket.getfqdn()
 796     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 797     if os.path.exists(aliasfn):
 798         return aliasfn
 799     else:
 800         return None
 801
 802 ################################################################################
 803
 804 def TemplateSubst(subst_map, filename):
 805     """ Perform a substition of template """
 806     templatefile = open_file(filename)
 807     template = templatefile.read()
 808     for k, v in subst_map.iteritems():
 809         template = template.replace(k, str(v))
 810     templatefile.close()
 811     return template
 812
 813 ################################################################################
 814
 815 def fubar(msg, exit_code=1):
 816     sys.stderr.write("E: %s\n" % (msg))
 817     sys.exit(exit_code)
 818
 819 def warn(msg):
 820     sys.stderr.write("W: %s\n" % (msg))
 821
 822 ################################################################################
 823
 824 # Returns the user name with a laughable attempt at rfc822 conformancy
 825 # (read: removing stray periods).
 826 def whoami ():
 827     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 828
 829 def getusername ():
 830     return pwd.getpwuid(os.getuid())[0]
 831
 832 ################################################################################
 833
 834 def size_type (c):
 835     t  = " B"
 836     if c > 10240:
 837         c = c / 1024
 838         t = " KB"
 839     if c > 10240:
 840         c = c / 1024
 841         t = " MB"
 842     return ("%d%s" % (c, t))
 843
 844 ################################################################################
 845
 846 def cc_fix_changes (changes):
 847     o = changes.get("architecture", "")
 848     if o:
 849         del changes["architecture"]
 850     changes["architecture"] = {}
 851     for j in o.split():
 852         changes["architecture"][j] = 1
 853
 854 def changes_compare (a, b):
 855     """ Sort by source name, source version, 'have source', and then by filename """
 856     try:
 857         a_changes = parse_changes(a)
 858     except:
 859         return -1
 860
 861     try:
 862         b_changes = parse_changes(b)
 863     except:
 864         return 1
 865
 866     cc_fix_changes (a_changes)
 867     cc_fix_changes (b_changes)
 868
 869     # Sort by source name
 870     a_source = a_changes.get("source")
 871     b_source = b_changes.get("source")
 872     q = cmp (a_source, b_source)
 873     if q:
 874         return q
 875
 876     # Sort by source version
 877     a_version = a_changes.get("version", "0")
 878     b_version = b_changes.get("version", "0")
 879     q = apt_pkg.version_compare(a_version, b_version)
 880     if q:
 881         return q
 882
 883     # Sort by 'have source'
 884     a_has_source = a_changes["architecture"].get("source")
 885     b_has_source = b_changes["architecture"].get("source")
 886     if a_has_source and not b_has_source:
 887         return -1
 888     elif b_has_source and not a_has_source:
 889         return 1
 890
 891     # Fall back to sort by filename
 892     return cmp(a, b)
 893
 894 ################################################################################
 895
 896 def find_next_free (dest, too_many=100):
 897     extra = 0
 898     orig_dest = dest
 899     while os.path.exists(dest) and extra < too_many:
 900         dest = orig_dest + '.' + repr(extra)
 901         extra += 1
 902     if extra >= too_many:
 903         raise NoFreeFilenameError
 904     return dest
 905
 906 ################################################################################
 907
 908 def result_join (original, sep = '\t'):
 909     resultlist = []
 910     for i in xrange(len(original)):
 911         if original[i] == None:
 912             resultlist.append("")
 913         else:
 914             resultlist.append(original[i])
 915     return sep.join(resultlist)
 916
 917 ################################################################################
 918
 919 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 920     out = ""
 921     for line in str.split('\n'):
 922         line = line.strip()
 923         if line or include_blank_lines:
 924             out += "%s%s\n" % (prefix, line)
 925     # Strip trailing new line
 926     if out:
 927         out = out[:-1]
 928     return out
 929
 930 ################################################################################
 931
 932 def validate_changes_file_arg(filename, require_changes=1):
 933     """
 934     'filename' is either a .changes or .dak file.  If 'filename' is a
 935     .dak file, it's changed to be the corresponding .changes file.  The
 936     function then checks if the .changes file a) exists and b) is
 937     readable and returns the .changes filename if so.  If there's a
 938     problem, the next action depends on the option 'require_changes'
 939     argument:
 940
 941       - If 'require_changes' == -1, errors are ignored and the .changes
 942         filename is returned.
 943       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 944       - If 'require_changes' == 1, a fatal error is raised.
 945
 946     """
 947     error = None
 948
 949     orig_filename = filename
 950     if filename.endswith(".dak"):
 951         filename = filename[:-4]+".changes"
 952
 953     if not filename.endswith(".changes"):
 954         error = "invalid file type; not a changes file"
 955     else:
 956         if not os.access(filename,os.R_OK):
 957             if os.path.exists(filename):
 958                 error = "permission denied"
 959             else:
 960                 error = "file not found"
 961
 962     if error:
 963         if require_changes == 1:
 964             fubar("%s: %s." % (orig_filename, error))
 965         elif require_changes == 0:
 966             warn("Skipping %s - %s" % (orig_filename, error))
 967             return None
 968         else: # We only care about the .dak file
 969             return filename
 970     else:
 971         return filename
 972
 973 ################################################################################
 974
 975 def real_arch(arch):
 976     return (arch != "source" and arch != "all")
 977
 978 ################################################################################
 979
 980 def join_with_commas_and(list):
 981     if len(list) == 0: return "nothing"
 982     if len(list) == 1: return list[0]
 983     return ", ".join(list[:-1]) + " and " + list[-1]
 984
 985 ################################################################################
 986
 987 def pp_deps (deps):
 988     pp_deps = []
 989     for atom in deps:
 990         (pkg, version, constraint) = atom
 991         if constraint:
 992             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 993         else:
 994             pp_dep = pkg
 995         pp_deps.append(pp_dep)
 996     return " |".join(pp_deps)
 997
 998 ################################################################################
 999
1000 def get_conf():
1001     return Cnf
1002
1003 ################################################################################
1004
1005 def parse_args(Options):
1006     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1007     # XXX: This should go away and everything which calls it be converted
1008     #      to use SQLA properly.  For now, we'll just fix it not to use
1009     #      the old Pg interface though
1010     session = DBConn().session()
1011     # Process suite
1012     if Options["Suite"]:
1013         suite_ids_list = []
1014         for suitename in split_args(Options["Suite"]):
1015             suite = get_suite(suitename, session=session)
1016             if not suite or suite.suite_id is None:
1017                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1018             else:
1019                 suite_ids_list.append(suite.suite_id)
1020         if suite_ids_list:
1021             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1022         else:
1023             fubar("No valid suite given.")
1024     else:
1025         con_suites = ""
1026
1027     # Process component
1028     if Options["Component"]:
1029         component_ids_list = []
1030         for componentname in split_args(Options["Component"]):
1031             component = get_component(componentname, session=session)
1032             if component is None:
1033                 warn("component '%s' not recognised." % (componentname))
1034             else:
1035                 component_ids_list.append(component.component_id)
1036         if component_ids_list:
1037             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1038         else:
1039             fubar("No valid component given.")
1040     else:
1041         con_components = ""
1042
1043     # Process architecture
1044     con_architectures = ""
1045     check_source = 0
1046     if Options["Architecture"]:
1047         arch_ids_list = []
1048         for archname in split_args(Options["Architecture"]):
1049             if archname == "source":
1050                 check_source = 1
1051             else:
1052                 arch = get_architecture(archname, session=session)
1053                 if arch is None:
1054                     warn("architecture '%s' not recognised." % (archname))
1055                 else:
1056                     arch_ids_list.append(arch.arch_id)
1057         if arch_ids_list:
1058             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1059         else:
1060             if not check_source:
1061                 fubar("No valid architecture given.")
1062     else:
1063         check_source = 1
1064
1065     return (con_suites, con_architectures, con_components, check_source)
1066
1067 ################################################################################
1068
1069 def arch_compare_sw (a, b):
1070     """
1071     Function for use in sorting lists of architectures.
1072
1073     Sorts normally except that 'source' dominates all others.
1074     """
1075
1076     if a == "source" and b == "source":
1077         return 0
1078     elif a == "source":
1079         return -1
1080     elif b == "source":
1081         return 1
1082
1083     return cmp (a, b)
1084
1085 ################################################################################
1086
1087 def split_args (s, dwim=1):
1088     """
1089     Split command line arguments which can be separated by either commas
1090     or whitespace.  If dwim is set, it will complain about string ending
1091     in comma since this usually means someone did 'dak ls -a i386, m68k
1092     foo' or something and the inevitable confusion resulting from 'm68k'
1093     being treated as an argument is undesirable.
1094     """
1095
1096     if s.find(",") == -1:
1097         return s.split()
1098     else:
1099         if s[-1:] == "," and dwim:
1100             fubar("split_args: found trailing comma, spurious space maybe?")
1101         return s.split(",")
1102
1103 ################################################################################
1104
1105 def gpgv_get_status_output(cmd, status_read, status_write):
1106     """
1107     Our very own version of commands.getouputstatus(), hacked to support
1108     gpgv's status fd.
1109     """
1110
1111     cmd = ['/bin/sh', '-c', cmd]
1112     p2cread, p2cwrite = os.pipe()
1113     c2pread, c2pwrite = os.pipe()
1114     errout, errin = os.pipe()
1115     pid = os.fork()
1116     if pid == 0:
1117         # Child
1118         os.close(0)
1119         os.close(1)
1120         os.dup(p2cread)
1121         os.dup(c2pwrite)
1122         os.close(2)
1123         os.dup(errin)
1124         for i in range(3, 256):
1125             if i != status_write:
1126                 try:
1127                     os.close(i)
1128                 except:
1129                     pass
1130         try:
1131             os.execvp(cmd[0], cmd)
1132         finally:
1133             os._exit(1)
1134
1135     # Parent
1136     os.close(p2cread)
1137     os.dup2(c2pread, c2pwrite)
1138     os.dup2(errout, errin)
1139
1140     output = status = ""
1141     while 1:
1142         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1143         more_data = []
1144         for fd in i:
1145             r = os.read(fd, 8196)
1146             if len(r) > 0:
1147                 more_data.append(fd)
1148                 if fd == c2pwrite or fd == errin:
1149                     output += r
1150                 elif fd == status_read:
1151                     status += r
1152                 else:
1153                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1154         if not more_data:
1155             pid, exit_status = os.waitpid(pid, 0)
1156             try:
1157                 os.close(status_write)
1158                 os.close(status_read)
1159                 os.close(c2pread)
1160                 os.close(c2pwrite)
1161                 os.close(p2cwrite)
1162                 os.close(errin)
1163                 os.close(errout)
1164             except:
1165                 pass
1166             break
1167
1168     return output, status, exit_status
1169
1170 ################################################################################
1171
1172 def process_gpgv_output(status):
1173     # Process the status-fd output
1174     keywords = {}
1175     internal_error = ""
1176     for line in status.split('\n'):
1177         line = line.strip()
1178         if line == "":
1179             continue
1180         split = line.split()
1181         if len(split) < 2:
1182             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1183             continue
1184         (gnupg, keyword) = split[:2]
1185         if gnupg != "[GNUPG:]":
1186             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1187             continue
1188         args = split[2:]
1189         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1190             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1191             continue
1192         else:
1193             keywords[keyword] = args
1194
1195     return (keywords, internal_error)
1196
1197 ################################################################################
1198
1199 def retrieve_key (filename, keyserver=None, keyring=None):
1200     """
1201     Retrieve the key that signed 'filename' from 'keyserver' and
1202     add it to 'keyring'.  Returns nothing on success, or an error message
1203     on error.
1204     """
1205
1206     # Defaults for keyserver and keyring
1207     if not keyserver:
1208         keyserver = Cnf["Dinstall::KeyServer"]
1209     if not keyring:
1210         keyring = get_primary_keyring_path()
1211
1212     # Ensure the filename contains no shell meta-characters or other badness
1213     if not re_taint_free.match(filename):
1214         return "%s: tainted filename" % (filename)
1215
1216     # Invoke gpgv on the file
1217     status_read, status_write = os.pipe()
1218     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1219     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1220
1221     # Process the status-fd output
1222     (keywords, internal_error) = process_gpgv_output(status)
1223     if internal_error:
1224         return internal_error
1225
1226     if not keywords.has_key("NO_PUBKEY"):
1227         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1228
1229     fingerprint = keywords["NO_PUBKEY"][0]
1230     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1231     # it'll try to create a lockfile in /dev.  A better solution might
1232     # be a tempfile or something.
1233     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1234           % (Cnf["Dinstall::SigningKeyring"])
1235     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1236            % (keyring, keyserver, fingerprint)
1237     (result, output) = commands.getstatusoutput(cmd)
1238     if (result != 0):
1239         return "'%s' failed with exit code %s" % (cmd, result)
1240
1241     return ""
1242
1243 ################################################################################
1244
1245 def gpg_keyring_args(keyrings=None):
1246     if not keyrings:
1247         keyrings = get_active_keyring_paths()
1248
1249     return " ".join(["--keyring %s" % x for x in keyrings])
1250
1251 ################################################################################
1252 @session_wrapper
1253 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1254     """
1255     Check the signature of a file and return the fingerprint if the
1256     signature is valid or 'None' if it's not.  The first argument is the
1257     filename whose signature should be checked.  The second argument is a
1258     reject function and is called when an error is found.  The reject()
1259     function must allow for two arguments: the first is the error message,
1260     the second is an optional prefix string.  It's possible for reject()
1261     to be called more than once during an invocation of check_signature().
1262     The third argument is optional and is the name of the files the
1263     detached signature applies to.  The fourth argument is optional and is
1264     a *list* of keyrings to use.  'autofetch' can either be None, True or
1265     False.  If None, the default behaviour specified in the config will be
1266     used.
1267     """
1268
1269     rejects = []
1270
1271     # Ensure the filename contains no shell meta-characters or other badness
1272     if not re_taint_free.match(sig_filename):
1273         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1274         return (None, rejects)
1275
1276     if data_filename and not re_taint_free.match(data_filename):
1277         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1278         return (None, rejects)
1279
1280     if not keyrings:
1281         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1282
1283     # Autofetch the signing key if that's enabled
1284     if autofetch == None:
1285         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1286     if autofetch:
1287         error_msg = retrieve_key(sig_filename)
1288         if error_msg:
1289             rejects.append(error_msg)
1290             return (None, rejects)
1291
1292     # Build the command line
1293     status_read, status_write = os.pipe()
1294     cmd = "gpgv --status-fd %s %s %s %s" % (
1295         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1296
1297     # Invoke gpgv on the file
1298     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1299
1300     # Process the status-fd output
1301     (keywords, internal_error) = process_gpgv_output(status)
1302
1303     # If we failed to parse the status-fd output, let's just whine and bail now
1304     if internal_error:
1305         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1306         rejects.append(internal_error, "")
1307         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1308         return (None, rejects)
1309
1310     # Now check for obviously bad things in the processed output
1311     if keywords.has_key("KEYREVOKED"):
1312         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1313     if keywords.has_key("BADSIG"):
1314         rejects.append("bad signature on %s." % (sig_filename))
1315     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1316         rejects.append("failed to check signature on %s." % (sig_filename))
1317     if keywords.has_key("NO_PUBKEY"):
1318         args = keywords["NO_PUBKEY"]
1319         if len(args) >= 1:
1320             key = args[0]
1321         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1322     if keywords.has_key("BADARMOR"):
1323         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1324     if keywords.has_key("NODATA"):
1325         rejects.append("no signature found in %s." % (sig_filename))
1326     if keywords.has_key("EXPKEYSIG"):
1327         args = keywords["EXPKEYSIG"]
1328         if len(args) >= 1:
1329             key = args[0]
1330         rejects.append("Signature made by expired key 0x%s" % (key))
1331     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1332         args = keywords["KEYEXPIRED"]
1333         expiredate=""
1334         if len(args) >= 1:
1335             timestamp = args[0]
1336             if timestamp.count("T") == 0:
1337                 try:
1338                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1339                 except ValueError:
1340                     expiredate = "unknown (%s)" % (timestamp)
1341             else:
1342                 expiredate = timestamp
1343         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1344
1345     if len(rejects) > 0:
1346         return (None, rejects)
1347
1348     # Next check gpgv exited with a zero return code
1349     if exit_status:
1350         rejects.append("gpgv failed while checking %s." % (sig_filename))
1351         if status.strip():
1352             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1353         else:
1354             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1355         return (None, rejects)
1356
1357     # Sanity check the good stuff we expect
1358     if not keywords.has_key("VALIDSIG"):
1359         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1360     else:
1361         args = keywords["VALIDSIG"]
1362         if len(args) < 1:
1363             rejects.append("internal error while checking signature on %s." % (sig_filename))
1364         else:
1365             fingerprint = args[0]
1366     if not keywords.has_key("GOODSIG"):
1367         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1368     if not keywords.has_key("SIG_ID"):
1369         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1370
1371     # Finally ensure there's not something we don't recognise
1372     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1373                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1374                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1375
1376     for keyword in keywords.keys():
1377         if not known_keywords.has_key(keyword):
1378             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1379
1380     if len(rejects) > 0:
1381         return (None, rejects)
1382     else:
1383         return (fingerprint, [])
1384
1385 ################################################################################
1386
1387 def gpg_get_key_addresses(fingerprint):
1388     """retreive email addresses from gpg key uids for a given fingerprint"""
1389     addresses = key_uid_email_cache.get(fingerprint)
1390     if addresses != None:
1391         return addresses
1392     addresses = list()
1393     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1394                 % (gpg_keyring_args(), fingerprint)
1395     (result, output) = commands.getstatusoutput(cmd)
1396     if result == 0:
1397         for l in output.split('\n'):
1398             m = re_gpg_uid.match(l)
1399             if not m:
1400                 continue
1401             address = m.group(1)
1402             if address.endswith('@debian.org'):
1403                 # prefer @debian.org addresses
1404                 # TODO: maybe not hardcode the domain
1405                 addresses.insert(0, address)
1406             else:
1407                 addresses.append(m.group(1))
1408     key_uid_email_cache[fingerprint] = addresses
1409     return addresses
1410
1411 ################################################################################
1412
1413 def clean_symlink (src, dest, root):
1414     """
1415     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1416     Returns fixed 'src'
1417     """
1418     src = src.replace(root, '', 1)
1419     dest = dest.replace(root, '', 1)
1420     dest = os.path.dirname(dest)
1421     new_src = '../' * len(dest.split('/'))
1422     return new_src + src
1423
1424 ################################################################################
1425
1426 def temp_filename(directory=None, prefix="dak", suffix=""):
1427     """
1428     Return a secure and unique filename by pre-creating it.
1429     If 'directory' is non-null, it will be the directory the file is pre-created in.
1430     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1431     If 'suffix' is non-null, the filename will end with it.
1432
1433     Returns a pair (fd, name).
1434     """
1435
1436     return tempfile.mkstemp(suffix, prefix, directory)
1437
1438 ################################################################################
1439
1440 def temp_dirname(parent=None, prefix="dak", suffix=""):
1441     """
1442     Return a secure and unique directory by pre-creating it.
1443     If 'parent' is non-null, it will be the directory the directory is pre-created in.
1444     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1445     If 'suffix' is non-null, the filename will end with it.
1446
1447     Returns a pathname to the new directory
1448     """
1449
1450     return tempfile.mkdtemp(suffix, prefix, parent)
1451
1452 ################################################################################
1453
1454 def is_email_alias(email):
1455     """ checks if the user part of the email is listed in the alias file """
1456     global alias_cache
1457     if alias_cache == None:
1458         aliasfn = which_alias_file()
1459         alias_cache = set()
1460         if aliasfn:
1461             for l in open(aliasfn):
1462                 alias_cache.add(l.split(':')[0])
1463     uid = email.split('@')[0]
1464     return uid in alias_cache
1465
1466 ################################################################################
1467
1468 def get_changes_files(from_dir):
1469     """
1470     Takes a directory and lists all .changes files in it (as well as chdir'ing
1471     to the directory; this is due to broken behaviour on the part of p-u/p-a
1472     when you're not in the right place)
1473
1474     Returns a list of filenames
1475     """
1476     try:
1477         # Much of the rest of p-u/p-a depends on being in the right place
1478         os.chdir(from_dir)
1479         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1480     except OSError as e:
1481         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1482
1483     return changes_files
1484
1485 ################################################################################
1486
1487 apt_pkg.init()
1488
1489 Cnf = apt_pkg.Configuration()
1490 if not os.getenv("DAK_TEST"):
1491     apt_pkg.read_config_file_isc(Cnf,default_config)
1492
1493 if which_conf_file() != default_config:
1494     apt_pkg.read_config_file_isc(Cnf,which_conf_file())
1495
1496 ################################################################################
1497
1498 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1499     """
1500     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1501     Well, actually it parsed a local copy, but let's document the source
1502     somewhere ;)
1503
1504     returns a dict associating source package name with a list of open wnpp
1505     bugs (Yes, there might be more than one)
1506     """
1507
1508     line = []
1509     try:
1510         f = open(file)
1511         lines = f.readlines()
1512     except IOError as e:
1513         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1514         lines = []
1515     wnpp = {}
1516
1517     for line in lines:
1518         splited_line = line.split(": ", 1)
1519         if len(splited_line) > 1:
1520             wnpp[splited_line[0]] = splited_line[1].split("|")
1521
1522     for source in wnpp.keys():
1523         bugs = []
1524         for wnpp_bug in wnpp[source]:
1525             bug_no = re.search("(\d)+", wnpp_bug).group()
1526             if bug_no:
1527                 bugs.append(bug_no)
1528         wnpp[source] = bugs
1529     return wnpp
1530
1531 ################################################################################
1532
1533 def get_packages_from_ftp(root, suite, component, architecture):
1534     """
1535     Returns an object containing apt_pkg-parseable data collected by
1536     aggregating Packages.gz files gathered for each architecture.
1537
1538     @type root: string
1539     @param root: path to ftp archive root directory
1540
1541     @type suite: string
1542     @param suite: suite to extract files from
1543
1544     @type component: string
1545     @param component: component to extract files from
1546
1547     @type architecture: string
1548     @param architecture: architecture to extract files from
1549
1550     @rtype: TagFile
1551     @return: apt_pkg class containing package data
1552     """
1553     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1554     (fd, temp_file) = temp_filename()
1555     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1556     if (result != 0):
1557         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1558     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1559     if os.path.exists(filename):
1560         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1561         if (result != 0):
1562             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1563     packages = open_file(temp_file)
1564     Packages = apt_pkg.ParseTagFile(packages)
1565     os.unlink(temp_file)
1566     return Packages
1567
1568 ################################################################################
1569
1570 def deb_extract_control(fh):
1571     """extract DEBIAN/control from a binary package"""
1572     return apt_inst.DebFile(fh).control.extractdata("control")
1573
1574 ################################################################################
1575
1576 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1577     """mail addresses to contact for an upload
1578
1579     @type  maintainer: str
1580     @param maintainer: Maintainer field of the .changes file
1581
1582     @type  changed_by: str
1583     @param changed_by: Changed-By field of the .changes file
1584
1585     @type  fingerprint: str
1586     @param fingerprint: fingerprint of the key used to sign the upload
1587
1588     @rtype:  list of str
1589     @return: list of RFC 2047-encoded mail addresses to contact regarding
1590              this upload
1591     """
1592     addresses = [maintainer]
1593     if changed_by != maintainer:
1594         addresses.append(changed_by)
1595
1596     fpr_addresses = gpg_get_key_addresses(fingerprint)
1597     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1598         addresses.append(fpr_addresses[0])
1599
1600     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1601     return encoded_addresses
1602
1603 ################################################################################
1604
1605 def call_editor(text="", suffix=".txt"):
1606     """run editor and return the result as a string
1607
1608     @type  text: str
1609     @param text: initial text
1610
1611     @type  suffix: str
1612     @param suffix: extension for temporary file
1613
1614     @rtype:  str
1615     @return: string with the edited text
1616     """
1617     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1618     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1619     try:
1620         print >>tmp, text,
1621         tmp.close()
1622         subprocess.check_call([editor, tmp.name])
1623         return open(tmp.name, 'r').read()
1624     finally:
1625         os.unlink(tmp.name)
1626
1627 ################################################################################
1628
1629 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1630     dbsuite = get_suite(suite, session)
1631     overridesuite = dbsuite
1632     if dbsuite.overridesuite is not None:
1633         overridesuite = get_suite(dbsuite.overridesuite, session)
1634     dep_problem = 0
1635     p2c = {}
1636     all_broken = {}
1637     if arches:
1638         all_arches = set(arches)
1639     else:
1640         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1641     all_arches -= set(["source", "all"])
1642     metakey_d = get_or_set_metadatakey("Depends", session)
1643     metakey_p = get_or_set_metadatakey("Provides", session)
1644     params = {
1645         'suite_id':     dbsuite.suite_id,
1646         'metakey_d_id': metakey_d.key_id,
1647         'metakey_p_id': metakey_p.key_id,
1648     }
1649     for architecture in all_arches | set(['all']):
1650         deps = {}
1651         sources = {}
1652         virtual_packages = {}
1653         params['arch_id'] = get_architecture(architecture, session).arch_id
1654
1655         statement = '''
1656             SELECT b.id, b.package, s.source, c.name as component,
1657                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1658                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1659                 FROM binaries b
1660                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1661                 JOIN source s ON b.source = s.id
1662                 JOIN files_archive_map af ON b.file = af.file_id
1663                 JOIN component c ON af.component_id = c.id
1664                 WHERE b.architecture = :arch_id'''
1665         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1666             from_statement(statement).params(params)
1667         for binary_id, package, source, component, depends, provides in query:
1668             sources[package] = source
1669             p2c[package] = component
1670             if depends is not None:
1671                 deps[package] = depends
1672             # Maintain a counter for each virtual package.  If a
1673             # Provides: exists, set the counter to 0 and count all
1674             # provides by a package not in the list for removal.
1675             # If the counter stays 0 at the end, we know that only
1676             # the to-be-removed packages provided this virtual
1677             # package.
1678             if provides is not None:
1679                 for virtual_pkg in provides.split(","):
1680                     virtual_pkg = virtual_pkg.strip()
1681                     if virtual_pkg == package: continue
1682                     if not virtual_packages.has_key(virtual_pkg):
1683                         virtual_packages[virtual_pkg] = 0
1684                     if package not in removals:
1685                         virtual_packages[virtual_pkg] += 1
1686
1687         # If a virtual package is only provided by the to-be-removed
1688         # packages, treat the virtual package as to-be-removed too.
1689         for virtual_pkg in virtual_packages.keys():
1690             if virtual_packages[virtual_pkg] == 0:
1691                 removals.append(virtual_pkg)
1692
1693         # Check binary dependencies (Depends)
1694         for package in deps.keys():
1695             if package in removals: continue
1696             parsed_dep = []
1697             try:
1698                 parsed_dep += apt_pkg.ParseDepends(deps[package])
1699             except ValueError as e:
1700                 print "Error for package %s: %s" % (package, e)
1701             for dep in parsed_dep:
1702                 # Check for partial breakage.  If a package has a ORed
1703                 # dependency, there is only a dependency problem if all
1704                 # packages in the ORed depends will be removed.
1705                 unsat = 0
1706                 for dep_package, _, _ in dep:
1707                     if dep_package in removals:
1708                         unsat += 1
1709                 if unsat == len(dep):
1710                     component = p2c[package]
1711                     source = sources[package]
1712                     if component != "main":
1713                         source = "%s/%s" % (source, component)
1714                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1715                     dep_problem = 1
1716
1717     if all_broken:
1718         if cruft:
1719             print "  - broken Depends:"
1720         else:
1721             print "# Broken Depends:"
1722         for source, bindict in sorted(all_broken.items()):
1723             lines = []
1724             for binary, arches in sorted(bindict.items()):
1725                 if arches == all_arches or 'all' in arches:
1726                     lines.append(binary)
1727                 else:
1728                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1729             if cruft:
1730                 print '    %s: %s' % (source, lines[0])
1731             else:
1732                 print '%s: %s' % (source, lines[0])
1733             for line in lines[1:]:
1734                 if cruft:
1735                     print '    ' + ' ' * (len(source) + 2) + line
1736                 else:
1737                     print ' ' * (len(source) + 2) + line
1738         if not cruft:
1739             print
1740
1741     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1742     all_broken.clear()
1743     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1744     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1745     params = {
1746         'suite_id':    dbsuite.suite_id,
1747         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1748     }
1749     statement = '''
1750         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1751            FROM source s
1752            JOIN source_metadata sm ON s.id = sm.src_id
1753            WHERE s.id in
1754                (SELECT source FROM src_associations
1755                    WHERE suite = :suite_id)
1756                AND sm.key_id in :metakey_ids
1757            GROUP BY s.id, s.source'''
1758     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1759         params(params)
1760     for source_id, source, build_dep in query:
1761         if source in removals: continue
1762         parsed_dep = []
1763         if build_dep is not None:
1764             # Remove [arch] information since we want to see breakage on all arches
1765             build_dep = re_build_dep_arch.sub("", build_dep)
1766             try:
1767                 parsed_dep += apt_pkg.ParseDepends(build_dep)
1768             except ValueError as e:
1769                 print "Error for source %s: %s" % (source, e)
1770         for dep in parsed_dep:
1771             unsat = 0
1772             for dep_package, _, _ in dep:
1773                 if dep_package in removals:
1774                     unsat += 1
1775             if unsat == len(dep):
1776                 component, = session.query(Component.component_name) \
1777                     .join(Component.overrides) \
1778                     .filter(Override.suite == overridesuite) \
1779                     .filter(Override.package == source) \
1780                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1781                     .first()
1782                 if component != "main":
1783                     source = "%s/%s" % (source, component)
1784                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1785                 dep_problem = 1
1786
1787     if all_broken:
1788         if cruft:
1789             print "  - broken Build-Depends:"
1790         else:
1791             print "# Broken Build-Depends:"
1792         for source, bdeps in sorted(all_broken.items()):
1793             bdeps = sorted(bdeps)
1794             if cruft:
1795                 print '    %s: %s' % (source, bdeps[0])
1796             else:
1797                 print '%s: %s' % (source, bdeps[0])
1798             for bdep in bdeps[1:]:
1799                 if cruft:
1800                     print '    ' + ' ' * (len(source) + 2) + bdep
1801                 else:
1802                     print ' ' * (len(source) + 2) + bdep
1803         if not cruft:
1804             print
1805
1806     return dep_problem