daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import select
  31 import socket
  32 import shutil
  33 import sys
  34 import tempfile
  35 import traceback
  36 import stat
  37 import apt_inst
  38 import apt_pkg
  39 import time
  40 import re
  41 import email as modemail
  42 import subprocess
  43
  44 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  45                    get_override_type, Keyring, session_wrapper, \
  46                    get_active_keyring_paths, get_primary_keyring_path, \
  47                    get_suite_architectures, get_or_set_metadatakey, DBSource, \
  48                    Component, Override, OverrideType
  49 from sqlalchemy import desc
  50 from dak_exceptions import *
  51 from gpg import SignedFile
  52 from textutils import fix_maintainer
  53 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  54                     re_multi_line_field, re_srchasver, re_taint_free, \
  55                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  56                     re_is_orig_source, re_build_dep_arch
  57
  58 from formats import parse_format, validate_changes_format
  59 from srcformats import get_format_from_string
  60 from collections import defaultdict
  61
  62 ################################################################################
  63
  64 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  65 default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
  66
  67 alias_cache = None        #: Cache for email alias checks
  68 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  69
  70 # (hashname, function, earliest_changes_version)
  71 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  72                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  73
  74 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  75 # code in lenny's Python. This also affects commands.getoutput and
  76 # commands.getstatus.
  77 def dak_getstatusoutput(cmd):
  78     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  79         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  80
  81     output = pipe.stdout.read()
  82
  83     pipe.wait()
  84
  85     if output[-1:] == '\n':
  86         output = output[:-1]
  87
  88     ret = pipe.wait()
  89     if ret is None:
  90         ret = 0
  91
  92     return ret, output
  93 commands.getstatusoutput = dak_getstatusoutput
  94
  95 ################################################################################
  96
  97 def html_escape(s):
  98     """ Escape html chars """
  99     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
 100
 101 ################################################################################
 102
 103 def open_file(filename, mode='r'):
 104     """
 105     Open C{file}, return fileobject.
 106
 107     @type filename: string
 108     @param filename: path/filename to open
 109
 110     @type mode: string
 111     @param mode: open mode
 112
 113     @rtype: fileobject
 114     @return: open fileobject
 115
 116     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 117
 118     """
 119     try:
 120         f = open(filename, mode)
 121     except IOError:
 122         raise CantOpenError(filename)
 123     return f
 124
 125 ################################################################################
 126
 127 def our_raw_input(prompt=""):
 128     if prompt:
 129         while 1:
 130             try:
 131                 sys.stdout.write(prompt)
 132                 break
 133             except IOError:
 134                 pass
 135     sys.stdout.flush()
 136     try:
 137         ret = raw_input()
 138         return ret
 139     except EOFError:
 140         sys.stderr.write("\nUser interrupt (^D).\n")
 141         raise SystemExit
 142
 143 ################################################################################
 144
 145 def extract_component_from_section(section, session=None):
 146     component = ""
 147
 148     if section.find('/') != -1:
 149         component = section.split('/')[0]
 150
 151     # Expand default component
 152     if component == "":
 153         comp = get_component(section, session)
 154         if comp is None:
 155             component = "main"
 156         else:
 157             component = comp.component_name
 158
 159     return (section, component)
 160
 161 ################################################################################
 162
 163 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 164     require_signature = True
 165     if keyrings == None:
 166         keyrings = []
 167         require_signature = False
 168
 169     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 170     contents = signed_file.contents
 171
 172     error = ""
 173     changes = {}
 174
 175     # Split the lines in the input, keeping the linebreaks.
 176     lines = contents.splitlines(True)
 177
 178     if len(lines) == 0:
 179         raise ParseChangesError("[Empty changes file]")
 180
 181     # Reindex by line number so we can easily verify the format of
 182     # .dsc files...
 183     index = 0
 184     indexed_lines = {}
 185     for line in lines:
 186         index += 1
 187         indexed_lines[index] = line[:-1]
 188
 189     num_of_lines = len(indexed_lines.keys())
 190     index = 0
 191     first = -1
 192     while index < num_of_lines:
 193         index += 1
 194         line = indexed_lines[index]
 195         if line == "" and signing_rules == 1:
 196             if index != num_of_lines:
 197                 raise InvalidDscError(index)
 198             break
 199         slf = re_single_line_field.match(line)
 200         if slf:
 201             field = slf.groups()[0].lower()
 202             changes[field] = slf.groups()[1]
 203             first = 1
 204             continue
 205         if line == " .":
 206             changes[field] += '\n'
 207             continue
 208         mlf = re_multi_line_field.match(line)
 209         if mlf:
 210             if first == -1:
 211                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 212             if first == 1 and changes[field] != "":
 213                 changes[field] += '\n'
 214             first = 0
 215             changes[field] += mlf.groups()[0] + '\n'
 216             continue
 217         error += line
 218
 219     changes["filecontents"] = armored_contents
 220
 221     if changes.has_key("source"):
 222         # Strip the source version in brackets from the source field,
 223         # put it in the "source-version" field instead.
 224         srcver = re_srchasver.search(changes["source"])
 225         if srcver:
 226             changes["source"] = srcver.group(1)
 227             changes["source-version"] = srcver.group(2)
 228
 229     if error:
 230         raise ParseChangesError(error)
 231
 232     return changes
 233
 234 ################################################################################
 235
 236 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 237     """
 238     Parses a changes file and returns a dictionary where each field is a
 239     key.  The mandatory first argument is the filename of the .changes
 240     file.
 241
 242     signing_rules is an optional argument:
 243
 244       - If signing_rules == -1, no signature is required.
 245       - If signing_rules == 0 (the default), a signature is required.
 246       - If signing_rules == 1, it turns on the same strict format checking
 247         as dpkg-source.
 248
 249     The rules for (signing_rules == 1)-mode are:
 250
 251       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 252         followed by any PGP header data and must end with a blank line.
 253
 254       - The data section must end with a blank line and must be followed by
 255         "-----BEGIN PGP SIGNATURE-----".
 256     """
 257
 258     changes_in = open_file(filename)
 259     content = changes_in.read()
 260     changes_in.close()
 261     try:
 262         unicode(content, 'utf-8')
 263     except UnicodeError:
 264         raise ChangesUnicodeError("Changes file not proper utf-8")
 265     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 266
 267
 268     if not dsc_file:
 269         # Finally ensure that everything needed for .changes is there
 270         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 271                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 272
 273         missingfields=[]
 274         for keyword in must_keywords:
 275             if not changes.has_key(keyword.lower()):
 276                 missingfields.append(keyword)
 277
 278                 if len(missingfields):
 279                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 280
 281     return changes
 282
 283 ################################################################################
 284
 285 def hash_key(hashname):
 286     return '%ssum' % hashname
 287
 288 ################################################################################
 289
 290 def create_hash(where, files, hashname, hashfunc):
 291     """
 292     create_hash extends the passed files dict with the given hash by
 293     iterating over all files on disk and passing them to the hashing
 294     function given.
 295     """
 296
 297     rejmsg = []
 298     for f in files.keys():
 299         try:
 300             file_handle = open_file(f)
 301         except CantOpenError:
 302             rejmsg.append("Could not open file %s for checksumming" % (f))
 303             continue
 304
 305         files[f][hash_key(hashname)] = hashfunc(file_handle)
 306
 307         file_handle.close()
 308     return rejmsg
 309
 310 ################################################################################
 311
 312 def check_hash(where, files, hashname, hashfunc):
 313     """
 314     check_hash checks the given hash in the files dict against the actual
 315     files on disk.  The hash values need to be present consistently in
 316     all file entries.  It does not modify its input in any way.
 317     """
 318
 319     rejmsg = []
 320     for f in files.keys():
 321         file_handle = None
 322         try:
 323             try:
 324                 file_handle = open_file(f)
 325
 326                 # Check for the hash entry, to not trigger a KeyError.
 327                 if not files[f].has_key(hash_key(hashname)):
 328                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 329                         where))
 330                     continue
 331
 332                 # Actually check the hash for correctness.
 333                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 334                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 335                         where))
 336             except CantOpenError:
 337                 # TODO: This happens when the file is in the pool.
 338                 # warn("Cannot open file %s" % f)
 339                 continue
 340         finally:
 341             if file_handle:
 342                 file_handle.close()
 343     return rejmsg
 344
 345 ################################################################################
 346
 347 def check_size(where, files):
 348     """
 349     check_size checks the file sizes in the passed files dict against the
 350     files on disk.
 351     """
 352
 353     rejmsg = []
 354     for f in files.keys():
 355         try:
 356             entry = os.stat(f)
 357         except OSError as exc:
 358             if exc.errno == 2:
 359                 # TODO: This happens when the file is in the pool.
 360                 continue
 361             raise
 362
 363         actual_size = entry[stat.ST_SIZE]
 364         size = int(files[f]["size"])
 365         if size != actual_size:
 366             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 367                    % (f, actual_size, size, where))
 368     return rejmsg
 369
 370 ################################################################################
 371
 372 def check_dsc_files(dsc_filename, dsc, dsc_files):
 373     """
 374     Verify that the files listed in the Files field of the .dsc are
 375     those expected given the announced Format.
 376
 377     @type dsc_filename: string
 378     @param dsc_filename: path of .dsc file
 379
 380     @type dsc: dict
 381     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 382
 383     @type dsc_files: dict
 384     @param dsc_files: the file list returned by C{build_file_list()}
 385
 386     @rtype: list
 387     @return: all errors detected
 388     """
 389     rejmsg = []
 390
 391     # Ensure .dsc lists proper set of source files according to the format
 392     # announced
 393     has = defaultdict(lambda: 0)
 394
 395     ftype_lookup = (
 396         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 397         (r'diff.gz',                   ('debian_diff',)),
 398         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 399         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 400         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 401         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 402         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 403     )
 404
 405     for f in dsc_files:
 406         m = re_issource.match(f)
 407         if not m:
 408             rejmsg.append("%s: %s in Files field not recognised as source."
 409                           % (dsc_filename, f))
 410             continue
 411
 412         # Populate 'has' dictionary by resolving keys in lookup table
 413         matched = False
 414         for regex, keys in ftype_lookup:
 415             if re.match(regex, m.group(3)):
 416                 matched = True
 417                 for key in keys:
 418                     has[key] += 1
 419                 break
 420
 421         # File does not match anything in lookup table; reject
 422         if not matched:
 423             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 424
 425     # Check for multiple files
 426     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 427         if has[file_type] > 1:
 428             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 429
 430     # Source format specific tests
 431     try:
 432         format = get_format_from_string(dsc['format'])
 433         rejmsg.extend([
 434             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 435         ])
 436
 437     except UnknownFormatError:
 438         # Not an error here for now
 439         pass
 440
 441     return rejmsg
 442
 443 ################################################################################
 444
 445 def check_hash_fields(what, manifest):
 446     """
 447     check_hash_fields ensures that there are no checksum fields in the
 448     given dict that we do not know about.
 449     """
 450
 451     rejmsg = []
 452     hashes = map(lambda x: x[0], known_hashes)
 453     for field in manifest:
 454         if field.startswith("checksums-"):
 455             hashname = field.split("-",1)[1]
 456             if hashname not in hashes:
 457                 rejmsg.append("Unsupported checksum field for %s "\
 458                     "in %s" % (hashname, what))
 459     return rejmsg
 460
 461 ################################################################################
 462
 463 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 464     if format >= version:
 465         # The version should contain the specified hash.
 466         func = check_hash
 467
 468         # Import hashes from the changes
 469         rejmsg = parse_checksums(".changes", files, changes, hashname)
 470         if len(rejmsg) > 0:
 471             return rejmsg
 472     else:
 473         # We need to calculate the hash because it can't possibly
 474         # be in the file.
 475         func = create_hash
 476     return func(".changes", files, hashname, hashfunc)
 477
 478 # We could add the orig which might be in the pool to the files dict to
 479 # access the checksums easily.
 480
 481 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 482     """
 483     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 484     in the dsc is correct, i.e. identical to the changes file and if necessary
 485     the pool.  The latter task is delegated to check_hash.
 486     """
 487
 488     rejmsg = []
 489     if not dsc.has_key('Checksums-%s' % (hashname,)):
 490         return rejmsg
 491     # Import hashes from the dsc
 492     parse_checksums(".dsc", dsc_files, dsc, hashname)
 493     # And check it...
 494     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 495     return rejmsg
 496
 497 ################################################################################
 498
 499 def parse_checksums(where, files, manifest, hashname):
 500     rejmsg = []
 501     field = 'checksums-%s' % hashname
 502     if not field in manifest:
 503         return rejmsg
 504     for line in manifest[field].split('\n'):
 505         if not line:
 506             break
 507         clist = line.strip().split(' ')
 508         if len(clist) == 3:
 509             checksum, size, checkfile = clist
 510         else:
 511             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 512             continue
 513         if not files.has_key(checkfile):
 514         # TODO: check for the file's entry in the original files dict, not
 515         # the one modified by (auto)byhand and other weird stuff
 516         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 517         #        (file, hashname, where))
 518             continue
 519         if not files[checkfile]["size"] == size:
 520             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 521                 "in %s" % (checkfile, hashname, where))
 522             continue
 523         files[checkfile][hash_key(hashname)] = checksum
 524     for f in files.keys():
 525         if not files[f].has_key(hash_key(hashname)):
 526             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 527     return rejmsg
 528
 529 ################################################################################
 530
 531 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 532
 533 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 534     files = {}
 535
 536     # Make sure we have a Files: field to parse...
 537     if not changes.has_key(field):
 538         raise NoFilesFieldError
 539
 540     # Validate .changes Format: field
 541     if not is_a_dsc:
 542         validate_changes_format(parse_format(changes['format']), field)
 543
 544     includes_section = (not is_a_dsc) and field == "files"
 545
 546     # Parse each entry/line:
 547     for i in changes[field].split('\n'):
 548         if not i:
 549             break
 550         s = i.split()
 551         section = priority = ""
 552         try:
 553             if includes_section:
 554                 (md5, size, section, priority, name) = s
 555             else:
 556                 (md5, size, name) = s
 557         except ValueError:
 558             raise ParseChangesError(i)
 559
 560         if section == "":
 561             section = "-"
 562         if priority == "":
 563             priority = "-"
 564
 565         (section, component) = extract_component_from_section(section)
 566
 567         files[name] = dict(size=size, section=section,
 568                            priority=priority, component=component)
 569         files[name][hashname] = md5
 570
 571     return files
 572
 573 ################################################################################
 574
 575 # see http://bugs.debian.org/619131
 576 def build_package_list(dsc, session = None):
 577     if not dsc.has_key("package-list"):
 578         return {}
 579
 580     packages = {}
 581
 582     for line in dsc["package-list"].split("\n"):
 583         if not line:
 584             break
 585
 586         fields = line.split()
 587         name = fields[0]
 588         package_type = fields[1]
 589         (section, component) = extract_component_from_section(fields[2])
 590         priority = fields[3]
 591
 592         # Validate type if we have a session
 593         if session and get_override_type(package_type, session) is None:
 594             # Maybe just warn and ignore? exit(1) might be a bit hard...
 595             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 596
 597         if name not in packages or packages[name]["type"] == "dsc":
 598             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 599
 600     return packages
 601
 602 ################################################################################
 603
 604 def send_mail (message, filename=""):
 605     """sendmail wrapper, takes _either_ a message string or a file as arguments"""
 606
 607     maildir = Cnf.get('Dir::Mail')
 608     if maildir:
 609         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 610         path = find_next_free(path)
 611         fh = open(path, 'w')
 612         print >>fh, message,
 613         fh.close()
 614
 615     # Check whether we're supposed to be sending mail
 616     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 617         return
 618
 619     # If we've been passed a string dump it into a temporary file
 620     if message:
 621         (fd, filename) = tempfile.mkstemp()
 622         os.write (fd, message)
 623         os.close (fd)
 624
 625     if Cnf.has_key("Dinstall::MailWhiteList") and \
 626            Cnf["Dinstall::MailWhiteList"] != "":
 627         message_in = open_file(filename)
 628         message_raw = modemail.message_from_file(message_in)
 629         message_in.close();
 630
 631         whitelist = [];
 632         whitelist_in = open_file(Cnf["Dinstall::MailWhiteList"])
 633         try:
 634             for line in whitelist_in:
 635                 if not re_whitespace_comment.match(line):
 636                     if re_re_mark.match(line):
 637                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 638                     else:
 639                         whitelist.append(re.compile(re.escape(line.strip())))
 640         finally:
 641             whitelist_in.close()
 642
 643         # Fields to check.
 644         fields = ["To", "Bcc", "Cc"]
 645         for field in fields:
 646             # Check each field
 647             value = message_raw.get(field, None)
 648             if value != None:
 649                 match = [];
 650                 for item in value.split(","):
 651                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 652                     mail_whitelisted = 0
 653                     for wr in whitelist:
 654                         if wr.match(email):
 655                             mail_whitelisted = 1
 656                             break
 657                     if not mail_whitelisted:
 658                         print "Skipping %s since it's not in %s" % (item, Cnf["Dinstall::MailWhiteList"])
 659                         continue
 660                     match.append(item)
 661
 662                 # Doesn't have any mail in whitelist so remove the header
 663                 if len(match) == 0:
 664                     del message_raw[field]
 665                 else:
 666                     message_raw.replace_header(field, ', '.join(match))
 667
 668         # Change message fields in order if we don't have a To header
 669         if not message_raw.has_key("To"):
 670             fields.reverse()
 671             for field in fields:
 672                 if message_raw.has_key(field):
 673                     message_raw[fields[-1]] = message_raw[field]
 674                     del message_raw[field]
 675                     break
 676             else:
 677                 # Clean up any temporary files
 678                 # and return, as we removed all recipients.
 679                 if message:
 680                     os.unlink (filename);
 681                 return;
 682
 683         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 684         os.write (fd, message_raw.as_string(True));
 685         os.close (fd);
 686
 687     # Invoke sendmail
 688     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 689     if (result != 0):
 690         raise SendmailFailedError(output)
 691
 692     # Clean up any temporary files
 693     if message:
 694         os.unlink (filename)
 695
 696 ################################################################################
 697
 698 def poolify (source, component=None):
 699     if source[:3] == "lib":
 700         return source[:4] + '/' + source + '/'
 701     else:
 702         return source[:1] + '/' + source + '/'
 703
 704 ################################################################################
 705
 706 def move (src, dest, overwrite = 0, perms = 0o664):
 707     if os.path.exists(dest) and os.path.isdir(dest):
 708         dest_dir = dest
 709     else:
 710         dest_dir = os.path.dirname(dest)
 711     if not os.path.exists(dest_dir):
 712         umask = os.umask(00000)
 713         os.makedirs(dest_dir, 0o2775)
 714         os.umask(umask)
 715     #print "Moving %s to %s..." % (src, dest)
 716     if os.path.exists(dest) and os.path.isdir(dest):
 717         dest += '/' + os.path.basename(src)
 718     # Don't overwrite unless forced to
 719     if os.path.exists(dest):
 720         if not overwrite:
 721             fubar("Can't move %s to %s - file already exists." % (src, dest))
 722         else:
 723             if not os.access(dest, os.W_OK):
 724                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 725     shutil.copy2(src, dest)
 726     os.chmod(dest, perms)
 727     os.unlink(src)
 728
 729 def copy (src, dest, overwrite = 0, perms = 0o664):
 730     if os.path.exists(dest) and os.path.isdir(dest):
 731         dest_dir = dest
 732     else:
 733         dest_dir = os.path.dirname(dest)
 734     if not os.path.exists(dest_dir):
 735         umask = os.umask(00000)
 736         os.makedirs(dest_dir, 0o2775)
 737         os.umask(umask)
 738     #print "Copying %s to %s..." % (src, dest)
 739     if os.path.exists(dest) and os.path.isdir(dest):
 740         dest += '/' + os.path.basename(src)
 741     # Don't overwrite unless forced to
 742     if os.path.exists(dest):
 743         if not overwrite:
 744             raise FileExistsError
 745         else:
 746             if not os.access(dest, os.W_OK):
 747                 raise CantOverwriteError
 748     shutil.copy2(src, dest)
 749     os.chmod(dest, perms)
 750
 751 ################################################################################
 752
 753 def where_am_i ():
 754     res = socket.getfqdn()
 755     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 756     if database_hostname:
 757         return database_hostname
 758     else:
 759         return res
 760
 761 def which_conf_file ():
 762     if os.getenv('DAK_CONFIG'):
 763         return os.getenv('DAK_CONFIG')
 764
 765     res = socket.getfqdn()
 766     # In case we allow local config files per user, try if one exists
 767     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 768         homedir = os.getenv("HOME")
 769         confpath = os.path.join(homedir, "/etc/dak.conf")
 770         if os.path.exists(confpath):
 771             apt_pkg.ReadConfigFileISC(Cnf,confpath)
 772
 773     # We are still in here, so there is no local config file or we do
 774     # not allow local files. Do the normal stuff.
 775     if Cnf.get("Config::" + res + "::DakConfig"):
 776         return Cnf["Config::" + res + "::DakConfig"]
 777
 778     return default_config
 779
 780 def which_apt_conf_file ():
 781     res = socket.getfqdn()
 782     # In case we allow local config files per user, try if one exists
 783     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 784         homedir = os.getenv("HOME")
 785         confpath = os.path.join(homedir, "/etc/dak.conf")
 786         if os.path.exists(confpath):
 787             apt_pkg.ReadConfigFileISC(Cnf,default_config)
 788
 789     if Cnf.get("Config::" + res + "::AptConfig"):
 790         return Cnf["Config::" + res + "::AptConfig"]
 791     else:
 792         return default_apt_config
 793
 794 def which_alias_file():
 795     hostname = socket.getfqdn()
 796     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 797     if os.path.exists(aliasfn):
 798         return aliasfn
 799     else:
 800         return None
 801
 802 ################################################################################
 803
 804 def TemplateSubst(subst_map, filename):
 805     """ Perform a substition of template """
 806     templatefile = open_file(filename)
 807     template = templatefile.read()
 808     for k, v in subst_map.iteritems():
 809         template = template.replace(k, str(v))
 810     templatefile.close()
 811     return template
 812
 813 ################################################################################
 814
 815 def fubar(msg, exit_code=1):
 816     sys.stderr.write("E: %s\n" % (msg))
 817     sys.exit(exit_code)
 818
 819 def warn(msg):
 820     sys.stderr.write("W: %s\n" % (msg))
 821
 822 ################################################################################
 823
 824 # Returns the user name with a laughable attempt at rfc822 conformancy
 825 # (read: removing stray periods).
 826 def whoami ():
 827     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 828
 829 def getusername ():
 830     return pwd.getpwuid(os.getuid())[0]
 831
 832 ################################################################################
 833
 834 def size_type (c):
 835     t  = " B"
 836     if c > 10240:
 837         c = c / 1024
 838         t = " KB"
 839     if c > 10240:
 840         c = c / 1024
 841         t = " MB"
 842     return ("%d%s" % (c, t))
 843
 844 ################################################################################
 845
 846 def cc_fix_changes (changes):
 847     o = changes.get("architecture", "")
 848     if o:
 849         del changes["architecture"]
 850     changes["architecture"] = {}
 851     for j in o.split():
 852         changes["architecture"][j] = 1
 853
 854 def changes_compare (a, b):
 855     """ Sort by source name, source version, 'have source', and then by filename """
 856     try:
 857         a_changes = parse_changes(a)
 858     except:
 859         return -1
 860
 861     try:
 862         b_changes = parse_changes(b)
 863     except:
 864         return 1
 865
 866     cc_fix_changes (a_changes)
 867     cc_fix_changes (b_changes)
 868
 869     # Sort by source name
 870     a_source = a_changes.get("source")
 871     b_source = b_changes.get("source")
 872     q = cmp (a_source, b_source)
 873     if q:
 874         return q
 875
 876     # Sort by source version
 877     a_version = a_changes.get("version", "0")
 878     b_version = b_changes.get("version", "0")
 879     q = apt_pkg.version_compare(a_version, b_version)
 880     if q:
 881         return q
 882
 883     # Sort by 'have source'
 884     a_has_source = a_changes["architecture"].get("source")
 885     b_has_source = b_changes["architecture"].get("source")
 886     if a_has_source and not b_has_source:
 887         return -1
 888     elif b_has_source and not a_has_source:
 889         return 1
 890
 891     # Fall back to sort by filename
 892     return cmp(a, b)
 893
 894 ################################################################################
 895
 896 def find_next_free (dest, too_many=100):
 897     extra = 0
 898     orig_dest = dest
 899     while os.path.exists(dest) and extra < too_many:
 900         dest = orig_dest + '.' + repr(extra)
 901         extra += 1
 902     if extra >= too_many:
 903         raise NoFreeFilenameError
 904     return dest
 905
 906 ################################################################################
 907
 908 def result_join (original, sep = '\t'):
 909     resultlist = []
 910     for i in xrange(len(original)):
 911         if original[i] == None:
 912             resultlist.append("")
 913         else:
 914             resultlist.append(original[i])
 915     return sep.join(resultlist)
 916
 917 ################################################################################
 918
 919 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 920     out = ""
 921     for line in str.split('\n'):
 922         line = line.strip()
 923         if line or include_blank_lines:
 924             out += "%s%s\n" % (prefix, line)
 925     # Strip trailing new line
 926     if out:
 927         out = out[:-1]
 928     return out
 929
 930 ################################################################################
 931
 932 def validate_changes_file_arg(filename, require_changes=1):
 933     """
 934     'filename' is either a .changes or .dak file.  If 'filename' is a
 935     .dak file, it's changed to be the corresponding .changes file.  The
 936     function then checks if the .changes file a) exists and b) is
 937     readable and returns the .changes filename if so.  If there's a
 938     problem, the next action depends on the option 'require_changes'
 939     argument:
 940
 941       - If 'require_changes' == -1, errors are ignored and the .changes
 942         filename is returned.
 943       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 944       - If 'require_changes' == 1, a fatal error is raised.
 945
 946     """
 947     error = None
 948
 949     orig_filename = filename
 950     if filename.endswith(".dak"):
 951         filename = filename[:-4]+".changes"
 952
 953     if not filename.endswith(".changes"):
 954         error = "invalid file type; not a changes file"
 955     else:
 956         if not os.access(filename,os.R_OK):
 957             if os.path.exists(filename):
 958                 error = "permission denied"
 959             else:
 960                 error = "file not found"
 961
 962     if error:
 963         if require_changes == 1:
 964             fubar("%s: %s." % (orig_filename, error))
 965         elif require_changes == 0:
 966             warn("Skipping %s - %s" % (orig_filename, error))
 967             return None
 968         else: # We only care about the .dak file
 969             return filename
 970     else:
 971         return filename
 972
 973 ################################################################################
 974
 975 def real_arch(arch):
 976     return (arch != "source" and arch != "all")
 977
 978 ################################################################################
 979
 980 def join_with_commas_and(list):
 981     if len(list) == 0: return "nothing"
 982     if len(list) == 1: return list[0]
 983     return ", ".join(list[:-1]) + " and " + list[-1]
 984
 985 ################################################################################
 986
 987 def pp_deps (deps):
 988     pp_deps = []
 989     for atom in deps:
 990         (pkg, version, constraint) = atom
 991         if constraint:
 992             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 993         else:
 994             pp_dep = pkg
 995         pp_deps.append(pp_dep)
 996     return " |".join(pp_deps)
 997
 998 ################################################################################
 999
1000 def get_conf():
1001     return Cnf
1002
1003 ################################################################################
1004
1005 def parse_args(Options):
1006     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1007     # XXX: This should go away and everything which calls it be converted
1008     #      to use SQLA properly.  For now, we'll just fix it not to use
1009     #      the old Pg interface though
1010     session = DBConn().session()
1011     # Process suite
1012     if Options["Suite"]:
1013         suite_ids_list = []
1014         for suitename in split_args(Options["Suite"]):
1015             suite = get_suite(suitename, session=session)
1016             if not suite or suite.suite_id is None:
1017                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1018             else:
1019                 suite_ids_list.append(suite.suite_id)
1020         if suite_ids_list:
1021             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1022         else:
1023             fubar("No valid suite given.")
1024     else:
1025         con_suites = ""
1026
1027     # Process component
1028     if Options["Component"]:
1029         component_ids_list = []
1030         for componentname in split_args(Options["Component"]):
1031             component = get_component(componentname, session=session)
1032             if component is None:
1033                 warn("component '%s' not recognised." % (componentname))
1034             else:
1035                 component_ids_list.append(component.component_id)
1036         if component_ids_list:
1037             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1038         else:
1039             fubar("No valid component given.")
1040     else:
1041         con_components = ""
1042
1043     # Process architecture
1044     con_architectures = ""
1045     check_source = 0
1046     if Options["Architecture"]:
1047         arch_ids_list = []
1048         for archname in split_args(Options["Architecture"]):
1049             if archname == "source":
1050                 check_source = 1
1051             else:
1052                 arch = get_architecture(archname, session=session)
1053                 if arch is None:
1054                     warn("architecture '%s' not recognised." % (archname))
1055                 else:
1056                     arch_ids_list.append(arch.arch_id)
1057         if arch_ids_list:
1058             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1059         else:
1060             if not check_source:
1061                 fubar("No valid architecture given.")
1062     else:
1063         check_source = 1
1064
1065     return (con_suites, con_architectures, con_components, check_source)
1066
1067 ################################################################################
1068
1069 def arch_compare_sw (a, b):
1070     """
1071     Function for use in sorting lists of architectures.
1072
1073     Sorts normally except that 'source' dominates all others.
1074     """
1075
1076     if a == "source" and b == "source":
1077         return 0
1078     elif a == "source":
1079         return -1
1080     elif b == "source":
1081         return 1
1082
1083     return cmp (a, b)
1084
1085 ################################################################################
1086
1087 def split_args (s, dwim=1):
1088     """
1089     Split command line arguments which can be separated by either commas
1090     or whitespace.  If dwim is set, it will complain about string ending
1091     in comma since this usually means someone did 'dak ls -a i386, m68k
1092     foo' or something and the inevitable confusion resulting from 'm68k'
1093     being treated as an argument is undesirable.
1094     """
1095
1096     if s.find(",") == -1:
1097         return s.split()
1098     else:
1099         if s[-1:] == "," and dwim:
1100             fubar("split_args: found trailing comma, spurious space maybe?")
1101         return s.split(",")
1102
1103 ################################################################################
1104
1105 def gpgv_get_status_output(cmd, status_read, status_write):
1106     """
1107     Our very own version of commands.getouputstatus(), hacked to support
1108     gpgv's status fd.
1109     """
1110
1111     cmd = ['/bin/sh', '-c', cmd]
1112     p2cread, p2cwrite = os.pipe()
1113     c2pread, c2pwrite = os.pipe()
1114     errout, errin = os.pipe()
1115     pid = os.fork()
1116     if pid == 0:
1117         # Child
1118         os.close(0)
1119         os.close(1)
1120         os.dup(p2cread)
1121         os.dup(c2pwrite)
1122         os.close(2)
1123         os.dup(errin)
1124         for i in range(3, 256):
1125             if i != status_write:
1126                 try:
1127                     os.close(i)
1128                 except:
1129                     pass
1130         try:
1131             os.execvp(cmd[0], cmd)
1132         finally:
1133             os._exit(1)
1134
1135     # Parent
1136     os.close(p2cread)
1137     os.dup2(c2pread, c2pwrite)
1138     os.dup2(errout, errin)
1139
1140     output = status = ""
1141     while 1:
1142         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1143         more_data = []
1144         for fd in i:
1145             r = os.read(fd, 8196)
1146             if len(r) > 0:
1147                 more_data.append(fd)
1148                 if fd == c2pwrite or fd == errin:
1149                     output += r
1150                 elif fd == status_read:
1151                     status += r
1152                 else:
1153                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1154         if not more_data:
1155             pid, exit_status = os.waitpid(pid, 0)
1156             try:
1157                 os.close(status_write)
1158                 os.close(status_read)
1159                 os.close(c2pread)
1160                 os.close(c2pwrite)
1161                 os.close(p2cwrite)
1162                 os.close(errin)
1163                 os.close(errout)
1164             except:
1165                 pass
1166             break
1167
1168     return output, status, exit_status
1169
1170 ################################################################################
1171
1172 def process_gpgv_output(status):
1173     # Process the status-fd output
1174     keywords = {}
1175     internal_error = ""
1176     for line in status.split('\n'):
1177         line = line.strip()
1178         if line == "":
1179             continue
1180         split = line.split()
1181         if len(split) < 2:
1182             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1183             continue
1184         (gnupg, keyword) = split[:2]
1185         if gnupg != "[GNUPG:]":
1186             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1187             continue
1188         args = split[2:]
1189         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1190             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1191             continue
1192         else:
1193             keywords[keyword] = args
1194
1195     return (keywords, internal_error)
1196
1197 ################################################################################
1198
1199 def retrieve_key (filename, keyserver=None, keyring=None):
1200     """
1201     Retrieve the key that signed 'filename' from 'keyserver' and
1202     add it to 'keyring'.  Returns nothing on success, or an error message
1203     on error.
1204     """
1205
1206     # Defaults for keyserver and keyring
1207     if not keyserver:
1208         keyserver = Cnf["Dinstall::KeyServer"]
1209     if not keyring:
1210         keyring = get_primary_keyring_path()
1211
1212     # Ensure the filename contains no shell meta-characters or other badness
1213     if not re_taint_free.match(filename):
1214         return "%s: tainted filename" % (filename)
1215
1216     # Invoke gpgv on the file
1217     status_read, status_write = os.pipe()
1218     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1219     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1220
1221     # Process the status-fd output
1222     (keywords, internal_error) = process_gpgv_output(status)
1223     if internal_error:
1224         return internal_error
1225
1226     if not keywords.has_key("NO_PUBKEY"):
1227         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1228
1229     fingerprint = keywords["NO_PUBKEY"][0]
1230     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1231     # it'll try to create a lockfile in /dev.  A better solution might
1232     # be a tempfile or something.
1233     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1234           % (Cnf["Dinstall::SigningKeyring"])
1235     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1236            % (keyring, keyserver, fingerprint)
1237     (result, output) = commands.getstatusoutput(cmd)
1238     if (result != 0):
1239         return "'%s' failed with exit code %s" % (cmd, result)
1240
1241     return ""
1242
1243 ################################################################################
1244
1245 def gpg_keyring_args(keyrings=None):
1246     if not keyrings:
1247         keyrings = get_active_keyring_paths()
1248
1249     return " ".join(["--keyring %s" % x for x in keyrings])
1250
1251 ################################################################################
1252 @session_wrapper
1253 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1254     """
1255     Check the signature of a file and return the fingerprint if the
1256     signature is valid or 'None' if it's not.  The first argument is the
1257     filename whose signature should be checked.  The second argument is a
1258     reject function and is called when an error is found.  The reject()
1259     function must allow for two arguments: the first is the error message,
1260     the second is an optional prefix string.  It's possible for reject()
1261     to be called more than once during an invocation of check_signature().
1262     The third argument is optional and is the name of the files the
1263     detached signature applies to.  The fourth argument is optional and is
1264     a *list* of keyrings to use.  'autofetch' can either be None, True or
1265     False.  If None, the default behaviour specified in the config will be
1266     used.
1267     """
1268
1269     rejects = []
1270
1271     # Ensure the filename contains no shell meta-characters or other badness
1272     if not re_taint_free.match(sig_filename):
1273         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1274         return (None, rejects)
1275
1276     if data_filename and not re_taint_free.match(data_filename):
1277         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1278         return (None, rejects)
1279
1280     if not keyrings:
1281         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1282
1283     # Autofetch the signing key if that's enabled
1284     if autofetch == None:
1285         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1286     if autofetch:
1287         error_msg = retrieve_key(sig_filename)
1288         if error_msg:
1289             rejects.append(error_msg)
1290             return (None, rejects)
1291
1292     # Build the command line
1293     status_read, status_write = os.pipe()
1294     cmd = "gpgv --status-fd %s %s %s %s" % (
1295         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1296
1297     # Invoke gpgv on the file
1298     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1299
1300     # Process the status-fd output
1301     (keywords, internal_error) = process_gpgv_output(status)
1302
1303     # If we failed to parse the status-fd output, let's just whine and bail now
1304     if internal_error:
1305         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1306         rejects.append(internal_error, "")
1307         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1308         return (None, rejects)
1309
1310     # Now check for obviously bad things in the processed output
1311     if keywords.has_key("KEYREVOKED"):
1312         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1313     if keywords.has_key("BADSIG"):
1314         rejects.append("bad signature on %s." % (sig_filename))
1315     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1316         rejects.append("failed to check signature on %s." % (sig_filename))
1317     if keywords.has_key("NO_PUBKEY"):
1318         args = keywords["NO_PUBKEY"]
1319         if len(args) >= 1:
1320             key = args[0]
1321         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1322     if keywords.has_key("BADARMOR"):
1323         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1324     if keywords.has_key("NODATA"):
1325         rejects.append("no signature found in %s." % (sig_filename))
1326     if keywords.has_key("EXPKEYSIG"):
1327         args = keywords["EXPKEYSIG"]
1328         if len(args) >= 1:
1329             key = args[0]
1330         rejects.append("Signature made by expired key 0x%s" % (key))
1331     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1332         args = keywords["KEYEXPIRED"]
1333         expiredate=""
1334         if len(args) >= 1:
1335             timestamp = args[0]
1336             if timestamp.count("T") == 0:
1337                 try:
1338                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1339                 except ValueError:
1340                     expiredate = "unknown (%s)" % (timestamp)
1341             else:
1342                 expiredate = timestamp
1343         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1344
1345     if len(rejects) > 0:
1346         return (None, rejects)
1347
1348     # Next check gpgv exited with a zero return code
1349     if exit_status:
1350         rejects.append("gpgv failed while checking %s." % (sig_filename))
1351         if status.strip():
1352             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1353         else:
1354             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1355         return (None, rejects)
1356
1357     # Sanity check the good stuff we expect
1358     if not keywords.has_key("VALIDSIG"):
1359         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1360     else:
1361         args = keywords["VALIDSIG"]
1362         if len(args) < 1:
1363             rejects.append("internal error while checking signature on %s." % (sig_filename))
1364         else:
1365             fingerprint = args[0]
1366     if not keywords.has_key("GOODSIG"):
1367         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1368     if not keywords.has_key("SIG_ID"):
1369         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1370
1371     # Finally ensure there's not something we don't recognise
1372     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1373                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1374                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1375
1376     for keyword in keywords.keys():
1377         if not known_keywords.has_key(keyword):
1378             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1379
1380     if len(rejects) > 0:
1381         return (None, rejects)
1382     else:
1383         return (fingerprint, [])
1384
1385 ################################################################################
1386
1387 def gpg_get_key_addresses(fingerprint):
1388     """retreive email addresses from gpg key uids for a given fingerprint"""
1389     addresses = key_uid_email_cache.get(fingerprint)
1390     if addresses != None:
1391         return addresses
1392     addresses = list()
1393     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1394                 % (gpg_keyring_args(), fingerprint)
1395     (result, output) = commands.getstatusoutput(cmd)
1396     if result == 0:
1397         for l in output.split('\n'):
1398             m = re_gpg_uid.match(l)
1399             if m:
1400                 addresses.append(m.group(1))
1401     key_uid_email_cache[fingerprint] = addresses
1402     return addresses
1403
1404 ################################################################################
1405
1406 def clean_symlink (src, dest, root):
1407     """
1408     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1409     Returns fixed 'src'
1410     """
1411     src = src.replace(root, '', 1)
1412     dest = dest.replace(root, '', 1)
1413     dest = os.path.dirname(dest)
1414     new_src = '../' * len(dest.split('/'))
1415     return new_src + src
1416
1417 ################################################################################
1418
1419 def temp_filename(directory=None, prefix="dak", suffix=""):
1420     """
1421     Return a secure and unique filename by pre-creating it.
1422     If 'directory' is non-null, it will be the directory the file is pre-created in.
1423     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1424     If 'suffix' is non-null, the filename will end with it.
1425
1426     Returns a pair (fd, name).
1427     """
1428
1429     return tempfile.mkstemp(suffix, prefix, directory)
1430
1431 ################################################################################
1432
1433 def temp_dirname(parent=None, prefix="dak", suffix=""):
1434     """
1435     Return a secure and unique directory by pre-creating it.
1436     If 'parent' is non-null, it will be the directory the directory is pre-created in.
1437     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1438     If 'suffix' is non-null, the filename will end with it.
1439
1440     Returns a pathname to the new directory
1441     """
1442
1443     return tempfile.mkdtemp(suffix, prefix, parent)
1444
1445 ################################################################################
1446
1447 def is_email_alias(email):
1448     """ checks if the user part of the email is listed in the alias file """
1449     global alias_cache
1450     if alias_cache == None:
1451         aliasfn = which_alias_file()
1452         alias_cache = set()
1453         if aliasfn:
1454             for l in open(aliasfn):
1455                 alias_cache.add(l.split(':')[0])
1456     uid = email.split('@')[0]
1457     return uid in alias_cache
1458
1459 ################################################################################
1460
1461 def get_changes_files(from_dir):
1462     """
1463     Takes a directory and lists all .changes files in it (as well as chdir'ing
1464     to the directory; this is due to broken behaviour on the part of p-u/p-a
1465     when you're not in the right place)
1466
1467     Returns a list of filenames
1468     """
1469     try:
1470         # Much of the rest of p-u/p-a depends on being in the right place
1471         os.chdir(from_dir)
1472         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1473     except OSError as e:
1474         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1475
1476     return changes_files
1477
1478 ################################################################################
1479
1480 apt_pkg.init()
1481
1482 Cnf = apt_pkg.Configuration()
1483 if not os.getenv("DAK_TEST"):
1484     apt_pkg.read_config_file_isc(Cnf,default_config)
1485
1486 if which_conf_file() != default_config:
1487     apt_pkg.read_config_file_isc(Cnf,which_conf_file())
1488
1489 ################################################################################
1490
1491 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1492     """
1493     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1494     Well, actually it parsed a local copy, but let's document the source
1495     somewhere ;)
1496
1497     returns a dict associating source package name with a list of open wnpp
1498     bugs (Yes, there might be more than one)
1499     """
1500
1501     line = []
1502     try:
1503         f = open(file)
1504         lines = f.readlines()
1505     except IOError as e:
1506         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1507         lines = []
1508     wnpp = {}
1509
1510     for line in lines:
1511         splited_line = line.split(": ", 1)
1512         if len(splited_line) > 1:
1513             wnpp[splited_line[0]] = splited_line[1].split("|")
1514
1515     for source in wnpp.keys():
1516         bugs = []
1517         for wnpp_bug in wnpp[source]:
1518             bug_no = re.search("(\d)+", wnpp_bug).group()
1519             if bug_no:
1520                 bugs.append(bug_no)
1521         wnpp[source] = bugs
1522     return wnpp
1523
1524 ################################################################################
1525
1526 def get_packages_from_ftp(root, suite, component, architecture):
1527     """
1528     Returns an object containing apt_pkg-parseable data collected by
1529     aggregating Packages.gz files gathered for each architecture.
1530
1531     @type root: string
1532     @param root: path to ftp archive root directory
1533
1534     @type suite: string
1535     @param suite: suite to extract files from
1536
1537     @type component: string
1538     @param component: component to extract files from
1539
1540     @type architecture: string
1541     @param architecture: architecture to extract files from
1542
1543     @rtype: TagFile
1544     @return: apt_pkg class containing package data
1545     """
1546     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1547     (fd, temp_file) = temp_filename()
1548     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1549     if (result != 0):
1550         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1551     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1552     if os.path.exists(filename):
1553         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1554         if (result != 0):
1555             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1556     packages = open_file(temp_file)
1557     Packages = apt_pkg.ParseTagFile(packages)
1558     os.unlink(temp_file)
1559     return Packages
1560
1561 ################################################################################
1562
1563 def deb_extract_control(fh):
1564     """extract DEBIAN/control from a binary package"""
1565     return apt_inst.DebFile(fh).control.extractdata("control")
1566
1567 ################################################################################
1568
1569 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1570     """mail addresses to contact for an upload
1571
1572     @type  maintainer: str
1573     @param maintainer: Maintainer field of the .changes file
1574
1575     @type  changed_by: str
1576     @param changed_by: Changed-By field of the .changes file
1577
1578     @type  fingerprint: str
1579     @param fingerprint: fingerprint of the key used to sign the upload
1580
1581     @rtype:  list of str
1582     @return: list of RFC 2047-encoded mail addresses to contact regarding
1583              this upload
1584     """
1585     addresses = [maintainer]
1586     if changed_by != maintainer:
1587         addresses.append(changed_by)
1588
1589     fpr_addresses = gpg_get_key_addresses(fingerprint)
1590     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1591         addresses.append(fpr_addresses[0])
1592
1593     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1594     return encoded_addresses
1595
1596 ################################################################################
1597
1598 def call_editor(text="", suffix=".txt"):
1599     """run editor and return the result as a string
1600
1601     @type  text: str
1602     @param text: initial text
1603
1604     @type  suffix: str
1605     @param suffix: extension for temporary file
1606
1607     @rtype:  str
1608     @return: string with the edited text
1609     """
1610     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1611     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1612     try:
1613         print >>tmp, text,
1614         tmp.close()
1615         subprocess.check_call([editor, tmp.name])
1616         return open(tmp.name, 'r').read()
1617     finally:
1618         os.unlink(tmp.name)
1619
1620 ################################################################################
1621
1622 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1623     dbsuite = get_suite(suite, session)
1624     overridesuite = dbsuite
1625     if dbsuite.overridesuite is not None:
1626         overridesuite = get_suite(dbsuite.overridesuite, session)
1627     dep_problem = 0
1628     p2c = {}
1629     all_broken = {}
1630     if arches:
1631         all_arches = set(arches)
1632     else:
1633         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1634     all_arches -= set(["source", "all"])
1635     metakey_d = get_or_set_metadatakey("Depends", session)
1636     metakey_p = get_or_set_metadatakey("Provides", session)
1637     params = {
1638         'suite_id':     dbsuite.suite_id,
1639         'metakey_d_id': metakey_d.key_id,
1640         'metakey_p_id': metakey_p.key_id,
1641     }
1642     for architecture in all_arches | set(['all']):
1643         deps = {}
1644         sources = {}
1645         virtual_packages = {}
1646         params['arch_id'] = get_architecture(architecture, session).arch_id
1647
1648         statement = '''
1649             SELECT b.id, b.package, s.source, c.name as component,
1650                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1651                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1652                 FROM binaries b
1653                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1654                 JOIN source s ON b.source = s.id
1655                 JOIN files_archive_map af ON b.file = af.file_id
1656                 JOIN component c ON af.component_id = c.id
1657                 WHERE b.architecture = :arch_id'''
1658         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1659             from_statement(statement).params(params)
1660         for binary_id, package, source, component, depends, provides in query:
1661             sources[package] = source
1662             p2c[package] = component
1663             if depends is not None:
1664                 deps[package] = depends
1665             # Maintain a counter for each virtual package.  If a
1666             # Provides: exists, set the counter to 0 and count all
1667             # provides by a package not in the list for removal.
1668             # If the counter stays 0 at the end, we know that only
1669             # the to-be-removed packages provided this virtual
1670             # package.
1671             if provides is not None:
1672                 for virtual_pkg in provides.split(","):
1673                     virtual_pkg = virtual_pkg.strip()
1674                     if virtual_pkg == package: continue
1675                     if not virtual_packages.has_key(virtual_pkg):
1676                         virtual_packages[virtual_pkg] = 0
1677                     if package not in removals:
1678                         virtual_packages[virtual_pkg] += 1
1679
1680         # If a virtual package is only provided by the to-be-removed
1681         # packages, treat the virtual package as to-be-removed too.
1682         for virtual_pkg in virtual_packages.keys():
1683             if virtual_packages[virtual_pkg] == 0:
1684                 removals.append(virtual_pkg)
1685
1686         # Check binary dependencies (Depends)
1687         for package in deps.keys():
1688             if package in removals: continue
1689             parsed_dep = []
1690             try:
1691                 parsed_dep += apt_pkg.ParseDepends(deps[package])
1692             except ValueError as e:
1693                 print "Error for package %s: %s" % (package, e)
1694             for dep in parsed_dep:
1695                 # Check for partial breakage.  If a package has a ORed
1696                 # dependency, there is only a dependency problem if all
1697                 # packages in the ORed depends will be removed.
1698                 unsat = 0
1699                 for dep_package, _, _ in dep:
1700                     if dep_package in removals:
1701                         unsat += 1
1702                 if unsat == len(dep):
1703                     component = p2c[package]
1704                     source = sources[package]
1705                     if component != "main":
1706                         source = "%s/%s" % (source, component)
1707                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1708                     dep_problem = 1
1709
1710     if all_broken:
1711         if cruft:
1712             print "  - broken Depends:"
1713         else:
1714             print "# Broken Depends:"
1715         for source, bindict in sorted(all_broken.items()):
1716             lines = []
1717             for binary, arches in sorted(bindict.items()):
1718                 if arches == all_arches or 'all' in arches:
1719                     lines.append(binary)
1720                 else:
1721                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1722             if cruft:
1723                 print '    %s: %s' % (source, lines[0])
1724             else:
1725                 print '%s: %s' % (source, lines[0])
1726             for line in lines[1:]:
1727                 if cruft:
1728                     print '    ' + ' ' * (len(source) + 2) + line
1729                 else:
1730                     print ' ' * (len(source) + 2) + line
1731         if not cruft:
1732             print
1733
1734     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1735     all_broken.clear()
1736     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1737     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1738     params = {
1739         'suite_id':    dbsuite.suite_id,
1740         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1741     }
1742     statement = '''
1743         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1744            FROM source s
1745            JOIN source_metadata sm ON s.id = sm.src_id
1746            WHERE s.id in
1747                (SELECT source FROM src_associations
1748                    WHERE suite = :suite_id)
1749                AND sm.key_id in :metakey_ids
1750            GROUP BY s.id, s.source'''
1751     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1752         params(params)
1753     for source_id, source, build_dep in query:
1754         if source in removals: continue
1755         parsed_dep = []
1756         if build_dep is not None:
1757             # Remove [arch] information since we want to see breakage on all arches
1758             build_dep = re_build_dep_arch.sub("", build_dep)
1759             try:
1760                 parsed_dep += apt_pkg.ParseDepends(build_dep)
1761             except ValueError as e:
1762                 print "Error for source %s: %s" % (source, e)
1763         for dep in parsed_dep:
1764             unsat = 0
1765             for dep_package, _, _ in dep:
1766                 if dep_package in removals:
1767                     unsat += 1
1768             if unsat == len(dep):
1769                 component, = session.query(Component.component_name) \
1770                     .join(Component.overrides) \
1771                     .filter(Override.suite == overridesuite) \
1772                     .filter(Override.package == source) \
1773                     .join(Override.overridetype).filter(OverrideType.overridetype == 'dsc') \
1774                     .first()
1775                 if component != "main":
1776                     source = "%s/%s" % (source, component)
1777                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1778                 dep_problem = 1
1779
1780     if all_broken:
1781         if cruft:
1782             print "  - broken Build-Depends:"
1783         else:
1784             print "# Broken Build-Depends:"
1785         for source, bdeps in sorted(all_broken.items()):
1786             bdeps = sorted(bdeps)
1787             if cruft:
1788                 print '    %s: %s' % (source, bdeps[0])
1789             else:
1790                 print '%s: %s' % (source, bdeps[0])
1791             for bdep in bdeps[1:]:
1792                 if cruft:
1793                     print '    ' + ' ' * (len(source) + 2) + bdep
1794                 else:
1795                     print ' ' * (len(source) + 2) + bdep
1796         if not cruft:
1797             print
1798
1799     return dep_problem