daklib/utils.py

   1 #!/usr/bin/env python
   2 # vim:set et ts=4 sw=4:
   3
   4 """Utility functions
   5
   6 @contact: Debian FTP Master <ftpmaster@debian.org>
   7 @copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
   8 @license: GNU General Public License version 2 or later
   9 """
  10
  11 # This program is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15
  16 # This program is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  24
  25 import commands
  26 import datetime
  27 import email.Header
  28 import os
  29 import pwd
  30 import select
  31 import socket
  32 import shutil
  33 import sys
  34 import tempfile
  35 import traceback
  36 import stat
  37 import apt_inst
  38 import apt_pkg
  39 import time
  40 import re
  41 import email as modemail
  42 import subprocess
  43
  44 from dbconn import DBConn, get_architecture, get_component, get_suite, \
  45                    get_override_type, Keyring, session_wrapper, \
  46                    get_active_keyring_paths, get_primary_keyring_path, \
  47                    get_suite_architectures, get_or_set_metadatakey, DBSource
  48 from sqlalchemy import desc
  49 from dak_exceptions import *
  50 from gpg import SignedFile
  51 from textutils import fix_maintainer
  52 from regexes import re_html_escaping, html_escaping, re_single_line_field, \
  53                     re_multi_line_field, re_srchasver, re_taint_free, \
  54                     re_gpg_uid, re_re_mark, re_whitespace_comment, re_issource, \
  55                     re_is_orig_source, re_build_dep_arch
  56
  57 from formats import parse_format, validate_changes_format
  58 from srcformats import get_format_from_string
  59 from collections import defaultdict
  60
  61 ################################################################################
  62
  63 default_config = "/etc/dak/dak.conf"     #: default dak config, defines host properties
  64 default_apt_config = "/etc/dak/apt.conf" #: default apt config, not normally used
  65
  66 alias_cache = None        #: Cache for email alias checks
  67 key_uid_email_cache = {}  #: Cache for email addresses from gpg key uids
  68
  69 # (hashname, function, earliest_changes_version)
  70 known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
  71                 ("sha256", apt_pkg.sha256sum, (1, 8))] #: hashes we accept for entries in .changes/.dsc
  72
  73 # Monkeypatch commands.getstatusoutput as it may not return the correct exit
  74 # code in lenny's Python. This also affects commands.getoutput and
  75 # commands.getstatus.
  76 def dak_getstatusoutput(cmd):
  77     pipe = subprocess.Popen(cmd, shell=True, universal_newlines=True,
  78         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  79
  80     output = pipe.stdout.read()
  81
  82     pipe.wait()
  83
  84     if output[-1:] == '\n':
  85         output = output[:-1]
  86
  87     ret = pipe.wait()
  88     if ret is None:
  89         ret = 0
  90
  91     return ret, output
  92 commands.getstatusoutput = dak_getstatusoutput
  93
  94 ################################################################################
  95
  96 def html_escape(s):
  97     """ Escape html chars """
  98     return re_html_escaping.sub(lambda x: html_escaping.get(x.group(0)), s)
  99
 100 ################################################################################
 101
 102 def open_file(filename, mode='r'):
 103     """
 104     Open C{file}, return fileobject.
 105
 106     @type filename: string
 107     @param filename: path/filename to open
 108
 109     @type mode: string
 110     @param mode: open mode
 111
 112     @rtype: fileobject
 113     @return: open fileobject
 114
 115     @raise CantOpenError: If IOError is raised by open, reraise it as CantOpenError.
 116
 117     """
 118     try:
 119         f = open(filename, mode)
 120     except IOError:
 121         raise CantOpenError(filename)
 122     return f
 123
 124 ################################################################################
 125
 126 def our_raw_input(prompt=""):
 127     if prompt:
 128         while 1:
 129             try:
 130                 sys.stdout.write(prompt)
 131                 break
 132             except IOError:
 133                 pass
 134     sys.stdout.flush()
 135     try:
 136         ret = raw_input()
 137         return ret
 138     except EOFError:
 139         sys.stderr.write("\nUser interrupt (^D).\n")
 140         raise SystemExit
 141
 142 ################################################################################
 143
 144 def extract_component_from_section(section, session=None):
 145     component = ""
 146
 147     if section.find('/') != -1:
 148         component = section.split('/')[0]
 149
 150     # Expand default component
 151     if component == "":
 152         comp = get_component(section, session)
 153         if comp is None:
 154             component = "main"
 155         else:
 156             component = comp.component_name
 157
 158     return (section, component)
 159
 160 ################################################################################
 161
 162 def parse_deb822(armored_contents, signing_rules=0, keyrings=None, session=None):
 163     require_signature = True
 164     if keyrings == None:
 165         keyrings = []
 166         require_signature = False
 167
 168     signed_file = SignedFile(armored_contents, keyrings=keyrings, require_signature=require_signature)
 169     contents = signed_file.contents
 170
 171     error = ""
 172     changes = {}
 173
 174     # Split the lines in the input, keeping the linebreaks.
 175     lines = contents.splitlines(True)
 176
 177     if len(lines) == 0:
 178         raise ParseChangesError("[Empty changes file]")
 179
 180     # Reindex by line number so we can easily verify the format of
 181     # .dsc files...
 182     index = 0
 183     indexed_lines = {}
 184     for line in lines:
 185         index += 1
 186         indexed_lines[index] = line[:-1]
 187
 188     num_of_lines = len(indexed_lines.keys())
 189     index = 0
 190     first = -1
 191     while index < num_of_lines:
 192         index += 1
 193         line = indexed_lines[index]
 194         if line == "" and signing_rules == 1:
 195             if index != num_of_lines:
 196                 raise InvalidDscError(index)
 197             break
 198         slf = re_single_line_field.match(line)
 199         if slf:
 200             field = slf.groups()[0].lower()
 201             changes[field] = slf.groups()[1]
 202             first = 1
 203             continue
 204         if line == " .":
 205             changes[field] += '\n'
 206             continue
 207         mlf = re_multi_line_field.match(line)
 208         if mlf:
 209             if first == -1:
 210                 raise ParseChangesError("'%s'\n [Multi-line field continuing on from nothing?]" % (line))
 211             if first == 1 and changes[field] != "":
 212                 changes[field] += '\n'
 213             first = 0
 214             changes[field] += mlf.groups()[0] + '\n'
 215             continue
 216         error += line
 217
 218     changes["filecontents"] = armored_contents
 219
 220     if changes.has_key("source"):
 221         # Strip the source version in brackets from the source field,
 222         # put it in the "source-version" field instead.
 223         srcver = re_srchasver.search(changes["source"])
 224         if srcver:
 225             changes["source"] = srcver.group(1)
 226             changes["source-version"] = srcver.group(2)
 227
 228     if error:
 229         raise ParseChangesError(error)
 230
 231     return changes
 232
 233 ################################################################################
 234
 235 def parse_changes(filename, signing_rules=0, dsc_file=0, keyrings=None):
 236     """
 237     Parses a changes file and returns a dictionary where each field is a
 238     key.  The mandatory first argument is the filename of the .changes
 239     file.
 240
 241     signing_rules is an optional argument:
 242
 243       - If signing_rules == -1, no signature is required.
 244       - If signing_rules == 0 (the default), a signature is required.
 245       - If signing_rules == 1, it turns on the same strict format checking
 246         as dpkg-source.
 247
 248     The rules for (signing_rules == 1)-mode are:
 249
 250       - The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
 251         followed by any PGP header data and must end with a blank line.
 252
 253       - The data section must end with a blank line and must be followed by
 254         "-----BEGIN PGP SIGNATURE-----".
 255     """
 256
 257     changes_in = open_file(filename)
 258     content = changes_in.read()
 259     changes_in.close()
 260     try:
 261         unicode(content, 'utf-8')
 262     except UnicodeError:
 263         raise ChangesUnicodeError("Changes file not proper utf-8")
 264     changes = parse_deb822(content, signing_rules, keyrings=keyrings)
 265
 266
 267     if not dsc_file:
 268         # Finally ensure that everything needed for .changes is there
 269         must_keywords = ('Format', 'Date', 'Source', 'Binary', 'Architecture', 'Version',
 270                          'Distribution', 'Maintainer', 'Description', 'Changes', 'Files')
 271
 272         missingfields=[]
 273         for keyword in must_keywords:
 274             if not changes.has_key(keyword.lower()):
 275                 missingfields.append(keyword)
 276
 277                 if len(missingfields):
 278                     raise ParseChangesError("Missing mandantory field(s) in changes file (policy 5.5): %s" % (missingfields))
 279
 280     return changes
 281
 282 ################################################################################
 283
 284 def hash_key(hashname):
 285     return '%ssum' % hashname
 286
 287 ################################################################################
 288
 289 def create_hash(where, files, hashname, hashfunc):
 290     """
 291     create_hash extends the passed files dict with the given hash by
 292     iterating over all files on disk and passing them to the hashing
 293     function given.
 294     """
 295
 296     rejmsg = []
 297     for f in files.keys():
 298         try:
 299             file_handle = open_file(f)
 300         except CantOpenError:
 301             rejmsg.append("Could not open file %s for checksumming" % (f))
 302             continue
 303
 304         files[f][hash_key(hashname)] = hashfunc(file_handle)
 305
 306         file_handle.close()
 307     return rejmsg
 308
 309 ################################################################################
 310
 311 def check_hash(where, files, hashname, hashfunc):
 312     """
 313     check_hash checks the given hash in the files dict against the actual
 314     files on disk.  The hash values need to be present consistently in
 315     all file entries.  It does not modify its input in any way.
 316     """
 317
 318     rejmsg = []
 319     for f in files.keys():
 320         file_handle = None
 321         try:
 322             try:
 323                 file_handle = open_file(f)
 324
 325                 # Check for the hash entry, to not trigger a KeyError.
 326                 if not files[f].has_key(hash_key(hashname)):
 327                     rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
 328                         where))
 329                     continue
 330
 331                 # Actually check the hash for correctness.
 332                 if hashfunc(file_handle) != files[f][hash_key(hashname)]:
 333                     rejmsg.append("%s: %s check failed in %s" % (f, hashname,
 334                         where))
 335             except CantOpenError:
 336                 # TODO: This happens when the file is in the pool.
 337                 # warn("Cannot open file %s" % f)
 338                 continue
 339         finally:
 340             if file_handle:
 341                 file_handle.close()
 342     return rejmsg
 343
 344 ################################################################################
 345
 346 def check_size(where, files):
 347     """
 348     check_size checks the file sizes in the passed files dict against the
 349     files on disk.
 350     """
 351
 352     rejmsg = []
 353     for f in files.keys():
 354         try:
 355             entry = os.stat(f)
 356         except OSError as exc:
 357             if exc.errno == 2:
 358                 # TODO: This happens when the file is in the pool.
 359                 continue
 360             raise
 361
 362         actual_size = entry[stat.ST_SIZE]
 363         size = int(files[f]["size"])
 364         if size != actual_size:
 365             rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
 366                    % (f, actual_size, size, where))
 367     return rejmsg
 368
 369 ################################################################################
 370
 371 def check_dsc_files(dsc_filename, dsc, dsc_files):
 372     """
 373     Verify that the files listed in the Files field of the .dsc are
 374     those expected given the announced Format.
 375
 376     @type dsc_filename: string
 377     @param dsc_filename: path of .dsc file
 378
 379     @type dsc: dict
 380     @param dsc: the content of the .dsc parsed by C{parse_changes()}
 381
 382     @type dsc_files: dict
 383     @param dsc_files: the file list returned by C{build_file_list()}
 384
 385     @rtype: list
 386     @return: all errors detected
 387     """
 388     rejmsg = []
 389
 390     # Ensure .dsc lists proper set of source files according to the format
 391     # announced
 392     has = defaultdict(lambda: 0)
 393
 394     ftype_lookup = (
 395         (r'orig.tar.gz',               ('orig_tar_gz', 'orig_tar')),
 396         (r'diff.gz',                   ('debian_diff',)),
 397         (r'tar.gz',                    ('native_tar_gz', 'native_tar')),
 398         (r'debian\.tar\.(gz|bz2|xz)',  ('debian_tar',)),
 399         (r'orig\.tar\.(gz|bz2|xz)',    ('orig_tar',)),
 400         (r'tar\.(gz|bz2|xz)',          ('native_tar',)),
 401         (r'orig-.+\.tar\.(gz|bz2|xz)', ('more_orig_tar',)),
 402     )
 403
 404     for f in dsc_files:
 405         m = re_issource.match(f)
 406         if not m:
 407             rejmsg.append("%s: %s in Files field not recognised as source."
 408                           % (dsc_filename, f))
 409             continue
 410
 411         # Populate 'has' dictionary by resolving keys in lookup table
 412         matched = False
 413         for regex, keys in ftype_lookup:
 414             if re.match(regex, m.group(3)):
 415                 matched = True
 416                 for key in keys:
 417                     has[key] += 1
 418                 break
 419
 420         # File does not match anything in lookup table; reject
 421         if not matched:
 422             reject("%s: unexpected source file '%s'" % (dsc_filename, f))
 423
 424     # Check for multiple files
 425     for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'):
 426         if has[file_type] > 1:
 427             rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type))
 428
 429     # Source format specific tests
 430     try:
 431         format = get_format_from_string(dsc['format'])
 432         rejmsg.extend([
 433             '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has)
 434         ])
 435
 436     except UnknownFormatError:
 437         # Not an error here for now
 438         pass
 439
 440     return rejmsg
 441
 442 ################################################################################
 443
 444 def check_hash_fields(what, manifest):
 445     """
 446     check_hash_fields ensures that there are no checksum fields in the
 447     given dict that we do not know about.
 448     """
 449
 450     rejmsg = []
 451     hashes = map(lambda x: x[0], known_hashes)
 452     for field in manifest:
 453         if field.startswith("checksums-"):
 454             hashname = field.split("-",1)[1]
 455             if hashname not in hashes:
 456                 rejmsg.append("Unsupported checksum field for %s "\
 457                     "in %s" % (hashname, what))
 458     return rejmsg
 459
 460 ################################################################################
 461
 462 def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
 463     if format >= version:
 464         # The version should contain the specified hash.
 465         func = check_hash
 466
 467         # Import hashes from the changes
 468         rejmsg = parse_checksums(".changes", files, changes, hashname)
 469         if len(rejmsg) > 0:
 470             return rejmsg
 471     else:
 472         # We need to calculate the hash because it can't possibly
 473         # be in the file.
 474         func = create_hash
 475     return func(".changes", files, hashname, hashfunc)
 476
 477 # We could add the orig which might be in the pool to the files dict to
 478 # access the checksums easily.
 479
 480 def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
 481     """
 482     ensure_dsc_hashes' task is to ensure that each and every *present* hash
 483     in the dsc is correct, i.e. identical to the changes file and if necessary
 484     the pool.  The latter task is delegated to check_hash.
 485     """
 486
 487     rejmsg = []
 488     if not dsc.has_key('Checksums-%s' % (hashname,)):
 489         return rejmsg
 490     # Import hashes from the dsc
 491     parse_checksums(".dsc", dsc_files, dsc, hashname)
 492     # And check it...
 493     rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
 494     return rejmsg
 495
 496 ################################################################################
 497
 498 def parse_checksums(where, files, manifest, hashname):
 499     rejmsg = []
 500     field = 'checksums-%s' % hashname
 501     if not field in manifest:
 502         return rejmsg
 503     for line in manifest[field].split('\n'):
 504         if not line:
 505             break
 506         clist = line.strip().split(' ')
 507         if len(clist) == 3:
 508             checksum, size, checkfile = clist
 509         else:
 510             rejmsg.append("Cannot parse checksum line [%s]" % (line))
 511             continue
 512         if not files.has_key(checkfile):
 513         # TODO: check for the file's entry in the original files dict, not
 514         # the one modified by (auto)byhand and other weird stuff
 515         #    rejmsg.append("%s: not present in files but in checksums-%s in %s" %
 516         #        (file, hashname, where))
 517             continue
 518         if not files[checkfile]["size"] == size:
 519             rejmsg.append("%s: size differs for files and checksums-%s entry "\
 520                 "in %s" % (checkfile, hashname, where))
 521             continue
 522         files[checkfile][hash_key(hashname)] = checksum
 523     for f in files.keys():
 524         if not files[f].has_key(hash_key(hashname)):
 525             rejmsg.append("%s: no entry in checksums-%s in %s" % (f, hashname, where))
 526     return rejmsg
 527
 528 ################################################################################
 529
 530 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
 531
 532 def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
 533     files = {}
 534
 535     # Make sure we have a Files: field to parse...
 536     if not changes.has_key(field):
 537         raise NoFilesFieldError
 538
 539     # Validate .changes Format: field
 540     if not is_a_dsc:
 541         validate_changes_format(parse_format(changes['format']), field)
 542
 543     includes_section = (not is_a_dsc) and field == "files"
 544
 545     # Parse each entry/line:
 546     for i in changes[field].split('\n'):
 547         if not i:
 548             break
 549         s = i.split()
 550         section = priority = ""
 551         try:
 552             if includes_section:
 553                 (md5, size, section, priority, name) = s
 554             else:
 555                 (md5, size, name) = s
 556         except ValueError:
 557             raise ParseChangesError(i)
 558
 559         if section == "":
 560             section = "-"
 561         if priority == "":
 562             priority = "-"
 563
 564         (section, component) = extract_component_from_section(section)
 565
 566         files[name] = dict(size=size, section=section,
 567                            priority=priority, component=component)
 568         files[name][hashname] = md5
 569
 570     return files
 571
 572 ################################################################################
 573
 574 # see http://bugs.debian.org/619131
 575 def build_package_list(dsc, session = None):
 576     if not dsc.has_key("package-list"):
 577         return {}
 578
 579     packages = {}
 580
 581     for line in dsc["package-list"].split("\n"):
 582         if not line:
 583             break
 584
 585         fields = line.split()
 586         name = fields[0]
 587         package_type = fields[1]
 588         (section, component) = extract_component_from_section(fields[2])
 589         priority = fields[3]
 590
 591         # Validate type if we have a session
 592         if session and get_override_type(package_type, session) is None:
 593             # Maybe just warn and ignore? exit(1) might be a bit hard...
 594             utils.fubar("invalid type (%s) in Package-List." % (package_type))
 595
 596         if name not in packages or packages[name]["type"] == "dsc":
 597             packages[name] = dict(priority=priority, section=section, type=package_type, component=component, files=[])
 598
 599     return packages
 600
 601 ################################################################################
 602
 603 def send_mail (message, filename=""):
 604     """sendmail wrapper, takes _either_ a message string or a file as arguments"""
 605
 606     maildir = Cnf.get('Dir::Mail')
 607     if maildir:
 608         path = os.path.join(maildir, datetime.datetime.now().isoformat())
 609         path = find_next_free(path)
 610         fh = open(path, 'w')
 611         print >>fh, message,
 612         fh.close()
 613
 614     # Check whether we're supposed to be sending mail
 615     if Cnf.has_key("Dinstall::Options::No-Mail") and Cnf["Dinstall::Options::No-Mail"]:
 616         return
 617
 618     # If we've been passed a string dump it into a temporary file
 619     if message:
 620         (fd, filename) = tempfile.mkstemp()
 621         os.write (fd, message)
 622         os.close (fd)
 623
 624     if Cnf.has_key("Dinstall::MailWhiteList") and \
 625            Cnf["Dinstall::MailWhiteList"] != "":
 626         message_in = open_file(filename)
 627         message_raw = modemail.message_from_file(message_in)
 628         message_in.close();
 629
 630         whitelist = [];
 631         whitelist_in = open_file(Cnf["Dinstall::MailWhiteList"])
 632         try:
 633             for line in whitelist_in:
 634                 if not re_whitespace_comment.match(line):
 635                     if re_re_mark.match(line):
 636                         whitelist.append(re.compile(re_re_mark.sub("", line.strip(), 1)))
 637                     else:
 638                         whitelist.append(re.compile(re.escape(line.strip())))
 639         finally:
 640             whitelist_in.close()
 641
 642         # Fields to check.
 643         fields = ["To", "Bcc", "Cc"]
 644         for field in fields:
 645             # Check each field
 646             value = message_raw.get(field, None)
 647             if value != None:
 648                 match = [];
 649                 for item in value.split(","):
 650                     (rfc822_maint, rfc2047_maint, name, email) = fix_maintainer(item.strip())
 651                     mail_whitelisted = 0
 652                     for wr in whitelist:
 653                         if wr.match(email):
 654                             mail_whitelisted = 1
 655                             break
 656                     if not mail_whitelisted:
 657                         print "Skipping %s since it's not in %s" % (item, Cnf["Dinstall::MailWhiteList"])
 658                         continue
 659                     match.append(item)
 660
 661                 # Doesn't have any mail in whitelist so remove the header
 662                 if len(match) == 0:
 663                     del message_raw[field]
 664                 else:
 665                     message_raw.replace_header(field, ', '.join(match))
 666
 667         # Change message fields in order if we don't have a To header
 668         if not message_raw.has_key("To"):
 669             fields.reverse()
 670             for field in fields:
 671                 if message_raw.has_key(field):
 672                     message_raw[fields[-1]] = message_raw[field]
 673                     del message_raw[field]
 674                     break
 675             else:
 676                 # Clean up any temporary files
 677                 # and return, as we removed all recipients.
 678                 if message:
 679                     os.unlink (filename);
 680                 return;
 681
 682         fd = os.open(filename, os.O_RDWR|os.O_EXCL, 0o700);
 683         os.write (fd, message_raw.as_string(True));
 684         os.close (fd);
 685
 686     # Invoke sendmail
 687     (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename))
 688     if (result != 0):
 689         raise SendmailFailedError(output)
 690
 691     # Clean up any temporary files
 692     if message:
 693         os.unlink (filename)
 694
 695 ################################################################################
 696
 697 def poolify (source, component=None):
 698     if source[:3] == "lib":
 699         return source[:4] + '/' + source + '/'
 700     else:
 701         return source[:1] + '/' + source + '/'
 702
 703 ################################################################################
 704
 705 def move (src, dest, overwrite = 0, perms = 0o664):
 706     if os.path.exists(dest) and os.path.isdir(dest):
 707         dest_dir = dest
 708     else:
 709         dest_dir = os.path.dirname(dest)
 710     if not os.path.exists(dest_dir):
 711         umask = os.umask(00000)
 712         os.makedirs(dest_dir, 0o2775)
 713         os.umask(umask)
 714     #print "Moving %s to %s..." % (src, dest)
 715     if os.path.exists(dest) and os.path.isdir(dest):
 716         dest += '/' + os.path.basename(src)
 717     # Don't overwrite unless forced to
 718     if os.path.exists(dest):
 719         if not overwrite:
 720             fubar("Can't move %s to %s - file already exists." % (src, dest))
 721         else:
 722             if not os.access(dest, os.W_OK):
 723                 fubar("Can't move %s to %s - can't write to existing file." % (src, dest))
 724     shutil.copy2(src, dest)
 725     os.chmod(dest, perms)
 726     os.unlink(src)
 727
 728 def copy (src, dest, overwrite = 0, perms = 0o664):
 729     if os.path.exists(dest) and os.path.isdir(dest):
 730         dest_dir = dest
 731     else:
 732         dest_dir = os.path.dirname(dest)
 733     if not os.path.exists(dest_dir):
 734         umask = os.umask(00000)
 735         os.makedirs(dest_dir, 0o2775)
 736         os.umask(umask)
 737     #print "Copying %s to %s..." % (src, dest)
 738     if os.path.exists(dest) and os.path.isdir(dest):
 739         dest += '/' + os.path.basename(src)
 740     # Don't overwrite unless forced to
 741     if os.path.exists(dest):
 742         if not overwrite:
 743             raise FileExistsError
 744         else:
 745             if not os.access(dest, os.W_OK):
 746                 raise CantOverwriteError
 747     shutil.copy2(src, dest)
 748     os.chmod(dest, perms)
 749
 750 ################################################################################
 751
 752 def where_am_i ():
 753     res = socket.getfqdn()
 754     database_hostname = Cnf.get("Config::" + res + "::DatabaseHostname")
 755     if database_hostname:
 756         return database_hostname
 757     else:
 758         return res
 759
 760 def which_conf_file ():
 761     if os.getenv('DAK_CONFIG'):
 762         return os.getenv('DAK_CONFIG')
 763
 764     res = socket.getfqdn()
 765     # In case we allow local config files per user, try if one exists
 766     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 767         homedir = os.getenv("HOME")
 768         confpath = os.path.join(homedir, "/etc/dak.conf")
 769         if os.path.exists(confpath):
 770             apt_pkg.ReadConfigFileISC(Cnf,confpath)
 771
 772     # We are still in here, so there is no local config file or we do
 773     # not allow local files. Do the normal stuff.
 774     if Cnf.get("Config::" + res + "::DakConfig"):
 775         return Cnf["Config::" + res + "::DakConfig"]
 776
 777     return default_config
 778
 779 def which_apt_conf_file ():
 780     res = socket.getfqdn()
 781     # In case we allow local config files per user, try if one exists
 782     if Cnf.find_b("Config::" + res + "::AllowLocalConfig"):
 783         homedir = os.getenv("HOME")
 784         confpath = os.path.join(homedir, "/etc/dak.conf")
 785         if os.path.exists(confpath):
 786             apt_pkg.ReadConfigFileISC(Cnf,default_config)
 787
 788     if Cnf.get("Config::" + res + "::AptConfig"):
 789         return Cnf["Config::" + res + "::AptConfig"]
 790     else:
 791         return default_apt_config
 792
 793 def which_alias_file():
 794     hostname = socket.getfqdn()
 795     aliasfn = '/var/lib/misc/'+hostname+'/forward-alias'
 796     if os.path.exists(aliasfn):
 797         return aliasfn
 798     else:
 799         return None
 800
 801 ################################################################################
 802
 803 def TemplateSubst(subst_map, filename):
 804     """ Perform a substition of template """
 805     templatefile = open_file(filename)
 806     template = templatefile.read()
 807     for k, v in subst_map.iteritems():
 808         template = template.replace(k, str(v))
 809     templatefile.close()
 810     return template
 811
 812 ################################################################################
 813
 814 def fubar(msg, exit_code=1):
 815     sys.stderr.write("E: %s\n" % (msg))
 816     sys.exit(exit_code)
 817
 818 def warn(msg):
 819     sys.stderr.write("W: %s\n" % (msg))
 820
 821 ################################################################################
 822
 823 # Returns the user name with a laughable attempt at rfc822 conformancy
 824 # (read: removing stray periods).
 825 def whoami ():
 826     return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '')
 827
 828 def getusername ():
 829     return pwd.getpwuid(os.getuid())[0]
 830
 831 ################################################################################
 832
 833 def size_type (c):
 834     t  = " B"
 835     if c > 10240:
 836         c = c / 1024
 837         t = " KB"
 838     if c > 10240:
 839         c = c / 1024
 840         t = " MB"
 841     return ("%d%s" % (c, t))
 842
 843 ################################################################################
 844
 845 def cc_fix_changes (changes):
 846     o = changes.get("architecture", "")
 847     if o:
 848         del changes["architecture"]
 849     changes["architecture"] = {}
 850     for j in o.split():
 851         changes["architecture"][j] = 1
 852
 853 def changes_compare (a, b):
 854     """ Sort by source name, source version, 'have source', and then by filename """
 855     try:
 856         a_changes = parse_changes(a)
 857     except:
 858         return -1
 859
 860     try:
 861         b_changes = parse_changes(b)
 862     except:
 863         return 1
 864
 865     cc_fix_changes (a_changes)
 866     cc_fix_changes (b_changes)
 867
 868     # Sort by source name
 869     a_source = a_changes.get("source")
 870     b_source = b_changes.get("source")
 871     q = cmp (a_source, b_source)
 872     if q:
 873         return q
 874
 875     # Sort by source version
 876     a_version = a_changes.get("version", "0")
 877     b_version = b_changes.get("version", "0")
 878     q = apt_pkg.version_compare(a_version, b_version)
 879     if q:
 880         return q
 881
 882     # Sort by 'have source'
 883     a_has_source = a_changes["architecture"].get("source")
 884     b_has_source = b_changes["architecture"].get("source")
 885     if a_has_source and not b_has_source:
 886         return -1
 887     elif b_has_source and not a_has_source:
 888         return 1
 889
 890     # Fall back to sort by filename
 891     return cmp(a, b)
 892
 893 ################################################################################
 894
 895 def find_next_free (dest, too_many=100):
 896     extra = 0
 897     orig_dest = dest
 898     while os.path.exists(dest) and extra < too_many:
 899         dest = orig_dest + '.' + repr(extra)
 900         extra += 1
 901     if extra >= too_many:
 902         raise NoFreeFilenameError
 903     return dest
 904
 905 ################################################################################
 906
 907 def result_join (original, sep = '\t'):
 908     resultlist = []
 909     for i in xrange(len(original)):
 910         if original[i] == None:
 911             resultlist.append("")
 912         else:
 913             resultlist.append(original[i])
 914     return sep.join(resultlist)
 915
 916 ################################################################################
 917
 918 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
 919     out = ""
 920     for line in str.split('\n'):
 921         line = line.strip()
 922         if line or include_blank_lines:
 923             out += "%s%s\n" % (prefix, line)
 924     # Strip trailing new line
 925     if out:
 926         out = out[:-1]
 927     return out
 928
 929 ################################################################################
 930
 931 def validate_changes_file_arg(filename, require_changes=1):
 932     """
 933     'filename' is either a .changes or .dak file.  If 'filename' is a
 934     .dak file, it's changed to be the corresponding .changes file.  The
 935     function then checks if the .changes file a) exists and b) is
 936     readable and returns the .changes filename if so.  If there's a
 937     problem, the next action depends on the option 'require_changes'
 938     argument:
 939
 940       - If 'require_changes' == -1, errors are ignored and the .changes
 941         filename is returned.
 942       - If 'require_changes' == 0, a warning is given and 'None' is returned.
 943       - If 'require_changes' == 1, a fatal error is raised.
 944
 945     """
 946     error = None
 947
 948     orig_filename = filename
 949     if filename.endswith(".dak"):
 950         filename = filename[:-4]+".changes"
 951
 952     if not filename.endswith(".changes"):
 953         error = "invalid file type; not a changes file"
 954     else:
 955         if not os.access(filename,os.R_OK):
 956             if os.path.exists(filename):
 957                 error = "permission denied"
 958             else:
 959                 error = "file not found"
 960
 961     if error:
 962         if require_changes == 1:
 963             fubar("%s: %s." % (orig_filename, error))
 964         elif require_changes == 0:
 965             warn("Skipping %s - %s" % (orig_filename, error))
 966             return None
 967         else: # We only care about the .dak file
 968             return filename
 969     else:
 970         return filename
 971
 972 ################################################################################
 973
 974 def real_arch(arch):
 975     return (arch != "source" and arch != "all")
 976
 977 ################################################################################
 978
 979 def join_with_commas_and(list):
 980     if len(list) == 0: return "nothing"
 981     if len(list) == 1: return list[0]
 982     return ", ".join(list[:-1]) + " and " + list[-1]
 983
 984 ################################################################################
 985
 986 def pp_deps (deps):
 987     pp_deps = []
 988     for atom in deps:
 989         (pkg, version, constraint) = atom
 990         if constraint:
 991             pp_dep = "%s (%s %s)" % (pkg, constraint, version)
 992         else:
 993             pp_dep = pkg
 994         pp_deps.append(pp_dep)
 995     return " |".join(pp_deps)
 996
 997 ################################################################################
 998
 999 def get_conf():
1000     return Cnf
1001
1002 ################################################################################
1003
1004 def parse_args(Options):
1005     """ Handle -a, -c and -s arguments; returns them as SQL constraints """
1006     # XXX: This should go away and everything which calls it be converted
1007     #      to use SQLA properly.  For now, we'll just fix it not to use
1008     #      the old Pg interface though
1009     session = DBConn().session()
1010     # Process suite
1011     if Options["Suite"]:
1012         suite_ids_list = []
1013         for suitename in split_args(Options["Suite"]):
1014             suite = get_suite(suitename, session=session)
1015             if not suite or suite.suite_id is None:
1016                 warn("suite '%s' not recognised." % (suite and suite.suite_name or suitename))
1017             else:
1018                 suite_ids_list.append(suite.suite_id)
1019         if suite_ids_list:
1020             con_suites = "AND su.id IN (%s)" % ", ".join([ str(i) for i in suite_ids_list ])
1021         else:
1022             fubar("No valid suite given.")
1023     else:
1024         con_suites = ""
1025
1026     # Process component
1027     if Options["Component"]:
1028         component_ids_list = []
1029         for componentname in split_args(Options["Component"]):
1030             component = get_component(componentname, session=session)
1031             if component is None:
1032                 warn("component '%s' not recognised." % (componentname))
1033             else:
1034                 component_ids_list.append(component.component_id)
1035         if component_ids_list:
1036             con_components = "AND c.id IN (%s)" % ", ".join([ str(i) for i in component_ids_list ])
1037         else:
1038             fubar("No valid component given.")
1039     else:
1040         con_components = ""
1041
1042     # Process architecture
1043     con_architectures = ""
1044     check_source = 0
1045     if Options["Architecture"]:
1046         arch_ids_list = []
1047         for archname in split_args(Options["Architecture"]):
1048             if archname == "source":
1049                 check_source = 1
1050             else:
1051                 arch = get_architecture(archname, session=session)
1052                 if arch is None:
1053                     warn("architecture '%s' not recognised." % (archname))
1054                 else:
1055                     arch_ids_list.append(arch.arch_id)
1056         if arch_ids_list:
1057             con_architectures = "AND a.id IN (%s)" % ", ".join([ str(i) for i in arch_ids_list ])
1058         else:
1059             if not check_source:
1060                 fubar("No valid architecture given.")
1061     else:
1062         check_source = 1
1063
1064     return (con_suites, con_architectures, con_components, check_source)
1065
1066 ################################################################################
1067
1068 def arch_compare_sw (a, b):
1069     """
1070     Function for use in sorting lists of architectures.
1071
1072     Sorts normally except that 'source' dominates all others.
1073     """
1074
1075     if a == "source" and b == "source":
1076         return 0
1077     elif a == "source":
1078         return -1
1079     elif b == "source":
1080         return 1
1081
1082     return cmp (a, b)
1083
1084 ################################################################################
1085
1086 def split_args (s, dwim=1):
1087     """
1088     Split command line arguments which can be separated by either commas
1089     or whitespace.  If dwim is set, it will complain about string ending
1090     in comma since this usually means someone did 'dak ls -a i386, m68k
1091     foo' or something and the inevitable confusion resulting from 'm68k'
1092     being treated as an argument is undesirable.
1093     """
1094
1095     if s.find(",") == -1:
1096         return s.split()
1097     else:
1098         if s[-1:] == "," and dwim:
1099             fubar("split_args: found trailing comma, spurious space maybe?")
1100         return s.split(",")
1101
1102 ################################################################################
1103
1104 def gpgv_get_status_output(cmd, status_read, status_write):
1105     """
1106     Our very own version of commands.getouputstatus(), hacked to support
1107     gpgv's status fd.
1108     """
1109
1110     cmd = ['/bin/sh', '-c', cmd]
1111     p2cread, p2cwrite = os.pipe()
1112     c2pread, c2pwrite = os.pipe()
1113     errout, errin = os.pipe()
1114     pid = os.fork()
1115     if pid == 0:
1116         # Child
1117         os.close(0)
1118         os.close(1)
1119         os.dup(p2cread)
1120         os.dup(c2pwrite)
1121         os.close(2)
1122         os.dup(errin)
1123         for i in range(3, 256):
1124             if i != status_write:
1125                 try:
1126                     os.close(i)
1127                 except:
1128                     pass
1129         try:
1130             os.execvp(cmd[0], cmd)
1131         finally:
1132             os._exit(1)
1133
1134     # Parent
1135     os.close(p2cread)
1136     os.dup2(c2pread, c2pwrite)
1137     os.dup2(errout, errin)
1138
1139     output = status = ""
1140     while 1:
1141         i, o, e = select.select([c2pwrite, errin, status_read], [], [])
1142         more_data = []
1143         for fd in i:
1144             r = os.read(fd, 8196)
1145             if len(r) > 0:
1146                 more_data.append(fd)
1147                 if fd == c2pwrite or fd == errin:
1148                     output += r
1149                 elif fd == status_read:
1150                     status += r
1151                 else:
1152                     fubar("Unexpected file descriptor [%s] returned from select\n" % (fd))
1153         if not more_data:
1154             pid, exit_status = os.waitpid(pid, 0)
1155             try:
1156                 os.close(status_write)
1157                 os.close(status_read)
1158                 os.close(c2pread)
1159                 os.close(c2pwrite)
1160                 os.close(p2cwrite)
1161                 os.close(errin)
1162                 os.close(errout)
1163             except:
1164                 pass
1165             break
1166
1167     return output, status, exit_status
1168
1169 ################################################################################
1170
1171 def process_gpgv_output(status):
1172     # Process the status-fd output
1173     keywords = {}
1174     internal_error = ""
1175     for line in status.split('\n'):
1176         line = line.strip()
1177         if line == "":
1178             continue
1179         split = line.split()
1180         if len(split) < 2:
1181             internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line)
1182             continue
1183         (gnupg, keyword) = split[:2]
1184         if gnupg != "[GNUPG:]":
1185             internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg)
1186             continue
1187         args = split[2:]
1188         if keywords.has_key(keyword) and keyword not in [ "NODATA", "SIGEXPIRED", "KEYEXPIRED" ]:
1189             internal_error += "found duplicate status token ('%s').\n" % (keyword)
1190             continue
1191         else:
1192             keywords[keyword] = args
1193
1194     return (keywords, internal_error)
1195
1196 ################################################################################
1197
1198 def retrieve_key (filename, keyserver=None, keyring=None):
1199     """
1200     Retrieve the key that signed 'filename' from 'keyserver' and
1201     add it to 'keyring'.  Returns nothing on success, or an error message
1202     on error.
1203     """
1204
1205     # Defaults for keyserver and keyring
1206     if not keyserver:
1207         keyserver = Cnf["Dinstall::KeyServer"]
1208     if not keyring:
1209         keyring = get_primary_keyring_path()
1210
1211     # Ensure the filename contains no shell meta-characters or other badness
1212     if not re_taint_free.match(filename):
1213         return "%s: tainted filename" % (filename)
1214
1215     # Invoke gpgv on the file
1216     status_read, status_write = os.pipe()
1217     cmd = "gpgv --status-fd %s --keyring /dev/null %s" % (status_write, filename)
1218     (_, status, _) = gpgv_get_status_output(cmd, status_read, status_write)
1219
1220     # Process the status-fd output
1221     (keywords, internal_error) = process_gpgv_output(status)
1222     if internal_error:
1223         return internal_error
1224
1225     if not keywords.has_key("NO_PUBKEY"):
1226         return "didn't find expected NO_PUBKEY in gpgv status-fd output"
1227
1228     fingerprint = keywords["NO_PUBKEY"][0]
1229     # XXX - gpg sucks.  You can't use --secret-keyring=/dev/null as
1230     # it'll try to create a lockfile in /dev.  A better solution might
1231     # be a tempfile or something.
1232     cmd = "gpg --no-default-keyring --secret-keyring=%s --no-options" \
1233           % (Cnf["Dinstall::SigningKeyring"])
1234     cmd += " --keyring %s --keyserver %s --recv-key %s" \
1235            % (keyring, keyserver, fingerprint)
1236     (result, output) = commands.getstatusoutput(cmd)
1237     if (result != 0):
1238         return "'%s' failed with exit code %s" % (cmd, result)
1239
1240     return ""
1241
1242 ################################################################################
1243
1244 def gpg_keyring_args(keyrings=None):
1245     if not keyrings:
1246         keyrings = get_active_keyring_paths()
1247
1248     return " ".join(["--keyring %s" % x for x in keyrings])
1249
1250 ################################################################################
1251 @session_wrapper
1252 def check_signature (sig_filename, data_filename="", keyrings=None, autofetch=None, session=None):
1253     """
1254     Check the signature of a file and return the fingerprint if the
1255     signature is valid or 'None' if it's not.  The first argument is the
1256     filename whose signature should be checked.  The second argument is a
1257     reject function and is called when an error is found.  The reject()
1258     function must allow for two arguments: the first is the error message,
1259     the second is an optional prefix string.  It's possible for reject()
1260     to be called more than once during an invocation of check_signature().
1261     The third argument is optional and is the name of the files the
1262     detached signature applies to.  The fourth argument is optional and is
1263     a *list* of keyrings to use.  'autofetch' can either be None, True or
1264     False.  If None, the default behaviour specified in the config will be
1265     used.
1266     """
1267
1268     rejects = []
1269
1270     # Ensure the filename contains no shell meta-characters or other badness
1271     if not re_taint_free.match(sig_filename):
1272         rejects.append("!!WARNING!! tainted signature filename: '%s'." % (sig_filename))
1273         return (None, rejects)
1274
1275     if data_filename and not re_taint_free.match(data_filename):
1276         rejects.append("!!WARNING!! tainted data filename: '%s'." % (data_filename))
1277         return (None, rejects)
1278
1279     if not keyrings:
1280         keyrings = [ x.keyring_name for x in session.query(Keyring).filter(Keyring.active == True).all() ]
1281
1282     # Autofetch the signing key if that's enabled
1283     if autofetch == None:
1284         autofetch = Cnf.get("Dinstall::KeyAutoFetch")
1285     if autofetch:
1286         error_msg = retrieve_key(sig_filename)
1287         if error_msg:
1288             rejects.append(error_msg)
1289             return (None, rejects)
1290
1291     # Build the command line
1292     status_read, status_write = os.pipe()
1293     cmd = "gpgv --status-fd %s %s %s %s" % (
1294         status_write, gpg_keyring_args(keyrings), sig_filename, data_filename)
1295
1296     # Invoke gpgv on the file
1297     (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write)
1298
1299     # Process the status-fd output
1300     (keywords, internal_error) = process_gpgv_output(status)
1301
1302     # If we failed to parse the status-fd output, let's just whine and bail now
1303     if internal_error:
1304         rejects.append("internal error while performing signature check on %s." % (sig_filename))
1305         rejects.append(internal_error, "")
1306         rejects.append("Please report the above errors to the Archive maintainers by replying to this mail.", "")
1307         return (None, rejects)
1308
1309     # Now check for obviously bad things in the processed output
1310     if keywords.has_key("KEYREVOKED"):
1311         rejects.append("The key used to sign %s has been revoked." % (sig_filename))
1312     if keywords.has_key("BADSIG"):
1313         rejects.append("bad signature on %s." % (sig_filename))
1314     if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
1315         rejects.append("failed to check signature on %s." % (sig_filename))
1316     if keywords.has_key("NO_PUBKEY"):
1317         args = keywords["NO_PUBKEY"]
1318         if len(args) >= 1:
1319             key = args[0]
1320         rejects.append("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, sig_filename))
1321     if keywords.has_key("BADARMOR"):
1322         rejects.append("ASCII armour of signature was corrupt in %s." % (sig_filename))
1323     if keywords.has_key("NODATA"):
1324         rejects.append("no signature found in %s." % (sig_filename))
1325     if keywords.has_key("EXPKEYSIG"):
1326         args = keywords["EXPKEYSIG"]
1327         if len(args) >= 1:
1328             key = args[0]
1329         rejects.append("Signature made by expired key 0x%s" % (key))
1330     if keywords.has_key("KEYEXPIRED") and not keywords.has_key("GOODSIG"):
1331         args = keywords["KEYEXPIRED"]
1332         expiredate=""
1333         if len(args) >= 1:
1334             timestamp = args[0]
1335             if timestamp.count("T") == 0:
1336                 try:
1337                     expiredate = time.strftime("%Y-%m-%d", time.gmtime(float(timestamp)))
1338                 except ValueError:
1339                     expiredate = "unknown (%s)" % (timestamp)
1340             else:
1341                 expiredate = timestamp
1342         rejects.append("The key used to sign %s has expired on %s" % (sig_filename, expiredate))
1343
1344     if len(rejects) > 0:
1345         return (None, rejects)
1346
1347     # Next check gpgv exited with a zero return code
1348     if exit_status:
1349         rejects.append("gpgv failed while checking %s." % (sig_filename))
1350         if status.strip():
1351             rejects.append(prefix_multi_line_string(status, " [GPG status-fd output:] "))
1352         else:
1353             rejects.append(prefix_multi_line_string(output, " [GPG output:] "))
1354         return (None, rejects)
1355
1356     # Sanity check the good stuff we expect
1357     if not keywords.has_key("VALIDSIG"):
1358         rejects.append("signature on %s does not appear to be valid [No VALIDSIG]." % (sig_filename))
1359     else:
1360         args = keywords["VALIDSIG"]
1361         if len(args) < 1:
1362             rejects.append("internal error while checking signature on %s." % (sig_filename))
1363         else:
1364             fingerprint = args[0]
1365     if not keywords.has_key("GOODSIG"):
1366         rejects.append("signature on %s does not appear to be valid [No GOODSIG]." % (sig_filename))
1367     if not keywords.has_key("SIG_ID"):
1368         rejects.append("signature on %s does not appear to be valid [No SIG_ID]." % (sig_filename))
1369
1370     # Finally ensure there's not something we don't recognise
1371     known_keywords = dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
1372                           SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
1373                           NODATA="",NOTATION_DATA="",NOTATION_NAME="",KEYEXPIRED="",POLICY_URL="")
1374
1375     for keyword in keywords.keys():
1376         if not known_keywords.has_key(keyword):
1377             rejects.append("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], sig_filename))
1378
1379     if len(rejects) > 0:
1380         return (None, rejects)
1381     else:
1382         return (fingerprint, [])
1383
1384 ################################################################################
1385
1386 def gpg_get_key_addresses(fingerprint):
1387     """retreive email addresses from gpg key uids for a given fingerprint"""
1388     addresses = key_uid_email_cache.get(fingerprint)
1389     if addresses != None:
1390         return addresses
1391     addresses = list()
1392     cmd = "gpg --no-default-keyring %s --fingerprint %s" \
1393                 % (gpg_keyring_args(), fingerprint)
1394     (result, output) = commands.getstatusoutput(cmd)
1395     if result == 0:
1396         for l in output.split('\n'):
1397             m = re_gpg_uid.match(l)
1398             if m:
1399                 addresses.append(m.group(1))
1400     key_uid_email_cache[fingerprint] = addresses
1401     return addresses
1402
1403 ################################################################################
1404
1405 def clean_symlink (src, dest, root):
1406     """
1407     Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
1408     Returns fixed 'src'
1409     """
1410     src = src.replace(root, '', 1)
1411     dest = dest.replace(root, '', 1)
1412     dest = os.path.dirname(dest)
1413     new_src = '../' * len(dest.split('/'))
1414     return new_src + src
1415
1416 ################################################################################
1417
1418 def temp_filename(directory=None, prefix="dak", suffix=""):
1419     """
1420     Return a secure and unique filename by pre-creating it.
1421     If 'directory' is non-null, it will be the directory the file is pre-created in.
1422     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1423     If 'suffix' is non-null, the filename will end with it.
1424
1425     Returns a pair (fd, name).
1426     """
1427
1428     return tempfile.mkstemp(suffix, prefix, directory)
1429
1430 ################################################################################
1431
1432 def temp_dirname(parent=None, prefix="dak", suffix=""):
1433     """
1434     Return a secure and unique directory by pre-creating it.
1435     If 'parent' is non-null, it will be the directory the directory is pre-created in.
1436     If 'prefix' is non-null, the filename will be prefixed with it, default is dak.
1437     If 'suffix' is non-null, the filename will end with it.
1438
1439     Returns a pathname to the new directory
1440     """
1441
1442     return tempfile.mkdtemp(suffix, prefix, parent)
1443
1444 ################################################################################
1445
1446 def is_email_alias(email):
1447     """ checks if the user part of the email is listed in the alias file """
1448     global alias_cache
1449     if alias_cache == None:
1450         aliasfn = which_alias_file()
1451         alias_cache = set()
1452         if aliasfn:
1453             for l in open(aliasfn):
1454                 alias_cache.add(l.split(':')[0])
1455     uid = email.split('@')[0]
1456     return uid in alias_cache
1457
1458 ################################################################################
1459
1460 def get_changes_files(from_dir):
1461     """
1462     Takes a directory and lists all .changes files in it (as well as chdir'ing
1463     to the directory; this is due to broken behaviour on the part of p-u/p-a
1464     when you're not in the right place)
1465
1466     Returns a list of filenames
1467     """
1468     try:
1469         # Much of the rest of p-u/p-a depends on being in the right place
1470         os.chdir(from_dir)
1471         changes_files = [x for x in os.listdir(from_dir) if x.endswith('.changes')]
1472     except OSError as e:
1473         fubar("Failed to read list from directory %s (%s)" % (from_dir, e))
1474
1475     return changes_files
1476
1477 ################################################################################
1478
1479 apt_pkg.init()
1480
1481 Cnf = apt_pkg.Configuration()
1482 if not os.getenv("DAK_TEST"):
1483     apt_pkg.read_config_file_isc(Cnf,default_config)
1484
1485 if which_conf_file() != default_config:
1486     apt_pkg.read_config_file_isc(Cnf,which_conf_file())
1487
1488 ################################################################################
1489
1490 def parse_wnpp_bug_file(file = "/srv/ftp-master.debian.org/scripts/masterfiles/wnpp_rm"):
1491     """
1492     Parses the wnpp bug list available at http://qa.debian.org/data/bts/wnpp_rm
1493     Well, actually it parsed a local copy, but let's document the source
1494     somewhere ;)
1495
1496     returns a dict associating source package name with a list of open wnpp
1497     bugs (Yes, there might be more than one)
1498     """
1499
1500     line = []
1501     try:
1502         f = open(file)
1503         lines = f.readlines()
1504     except IOError as e:
1505         print "Warning:  Couldn't open %s; don't know about WNPP bugs, so won't close any." % file
1506         lines = []
1507     wnpp = {}
1508
1509     for line in lines:
1510         splited_line = line.split(": ", 1)
1511         if len(splited_line) > 1:
1512             wnpp[splited_line[0]] = splited_line[1].split("|")
1513
1514     for source in wnpp.keys():
1515         bugs = []
1516         for wnpp_bug in wnpp[source]:
1517             bug_no = re.search("(\d)+", wnpp_bug).group()
1518             if bug_no:
1519                 bugs.append(bug_no)
1520         wnpp[source] = bugs
1521     return wnpp
1522
1523 ################################################################################
1524
1525 def get_packages_from_ftp(root, suite, component, architecture):
1526     """
1527     Returns an object containing apt_pkg-parseable data collected by
1528     aggregating Packages.gz files gathered for each architecture.
1529
1530     @type root: string
1531     @param root: path to ftp archive root directory
1532
1533     @type suite: string
1534     @param suite: suite to extract files from
1535
1536     @type component: string
1537     @param component: component to extract files from
1538
1539     @type architecture: string
1540     @param architecture: architecture to extract files from
1541
1542     @rtype: TagFile
1543     @return: apt_pkg class containing package data
1544     """
1545     filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (root, suite, component, architecture)
1546     (fd, temp_file) = temp_filename()
1547     (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_file))
1548     if (result != 0):
1549         fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1550     filename = "%s/dists/%s/%s/debian-installer/binary-%s/Packages.gz" % (root, suite, component, architecture)
1551     if os.path.exists(filename):
1552         (result, output) = commands.getstatusoutput("gunzip -c %s >> %s" % (filename, temp_file))
1553         if (result != 0):
1554             fubar("Gunzip invocation failed!\n%s\n" % (output), result)
1555     packages = open_file(temp_file)
1556     Packages = apt_pkg.ParseTagFile(packages)
1557     os.unlink(temp_file)
1558     return Packages
1559
1560 ################################################################################
1561
1562 def deb_extract_control(fh):
1563     """extract DEBIAN/control from a binary package"""
1564     return apt_inst.DebFile(fh).control.extractdata("control")
1565
1566 ################################################################################
1567
1568 def mail_addresses_for_upload(maintainer, changed_by, fingerprint):
1569     """mail addresses to contact for an upload
1570
1571     @type  maintainer: str
1572     @param maintainer: Maintainer field of the .changes file
1573
1574     @type  changed_by: str
1575     @param changed_by: Changed-By field of the .changes file
1576
1577     @type  fingerprint: str
1578     @param fingerprint: fingerprint of the key used to sign the upload
1579
1580     @rtype:  list of str
1581     @return: list of RFC 2047-encoded mail addresses to contact regarding
1582              this upload
1583     """
1584     addresses = [maintainer]
1585     if changed_by != maintainer:
1586         addresses.append(changed_by)
1587
1588     fpr_addresses = gpg_get_key_addresses(fingerprint)
1589     if len(fpr_addresses) > 0 and fix_maintainer(changed_by)[3] not in fpr_addresses and fix_maintainer(maintainer)[3] not in fpr_addresses:
1590         addresses.append(fpr_addresses[0])
1591
1592     encoded_addresses = [ fix_maintainer(e)[1] for e in addresses ]
1593     return encoded_addresses
1594
1595 ################################################################################
1596
1597 def call_editor(text="", suffix=".txt"):
1598     """run editor and return the result as a string
1599
1600     @type  text: str
1601     @param text: initial text
1602
1603     @type  suffix: str
1604     @param suffix: extension for temporary file
1605
1606     @rtype:  str
1607     @return: string with the edited text
1608     """
1609     editor = os.environ.get('VISUAL', os.environ.get('EDITOR', 'vi'))
1610     tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
1611     try:
1612         print >>tmp, text,
1613         tmp.close()
1614         subprocess.check_call([editor, tmp.name])
1615         return open(tmp.name, 'r').read()
1616     finally:
1617         os.unlink(tmp.name)
1618
1619 ################################################################################
1620
1621 def check_reverse_depends(removals, suite, arches=None, session=None, cruft=False):
1622     dbsuite = get_suite(suite, session)
1623     dep_problem = 0
1624     p2c = {}
1625     all_broken = {}
1626     if arches:
1627         all_arches = set(arches)
1628     else:
1629         all_arches = set([x.arch_string for x in get_suite_architectures(suite)])
1630     all_arches -= set(["source", "all"])
1631     metakey_d = get_or_set_metadatakey("Depends", session)
1632     metakey_p = get_or_set_metadatakey("Provides", session)
1633     params = {
1634         'suite_id':     dbsuite.suite_id,
1635         'metakey_d_id': metakey_d.key_id,
1636         'metakey_p_id': metakey_p.key_id,
1637     }
1638     for architecture in all_arches | set(['all']):
1639         deps = {}
1640         sources = {}
1641         virtual_packages = {}
1642         params['arch_id'] = get_architecture(architecture, session).arch_id
1643
1644         statement = '''
1645             SELECT b.id, b.package, s.source, c.name as component,
1646                 (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
1647                 (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
1648                 FROM binaries b
1649                 JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
1650                 JOIN source s ON b.source = s.id
1651                 JOIN files f ON b.file = f.id
1652                 JOIN location l ON f.location = l.id
1653                 JOIN component c ON l.component = c.id
1654                 WHERE b.architecture = :arch_id'''
1655         query = session.query('id', 'package', 'source', 'component', 'depends', 'provides'). \
1656             from_statement(statement).params(params)
1657         for binary_id, package, source, component, depends, provides in query:
1658             sources[package] = source
1659             p2c[package] = component
1660             if depends is not None:
1661                 deps[package] = depends
1662             # Maintain a counter for each virtual package.  If a
1663             # Provides: exists, set the counter to 0 and count all
1664             # provides by a package not in the list for removal.
1665             # If the counter stays 0 at the end, we know that only
1666             # the to-be-removed packages provided this virtual
1667             # package.
1668             if provides is not None:
1669                 for virtual_pkg in provides.split(","):
1670                     virtual_pkg = virtual_pkg.strip()
1671                     if virtual_pkg == package: continue
1672                     if not virtual_packages.has_key(virtual_pkg):
1673                         virtual_packages[virtual_pkg] = 0
1674                     if package not in removals:
1675                         virtual_packages[virtual_pkg] += 1
1676
1677         # If a virtual package is only provided by the to-be-removed
1678         # packages, treat the virtual package as to-be-removed too.
1679         for virtual_pkg in virtual_packages.keys():
1680             if virtual_packages[virtual_pkg] == 0:
1681                 removals.append(virtual_pkg)
1682
1683         # Check binary dependencies (Depends)
1684         for package in deps.keys():
1685             if package in removals: continue
1686             parsed_dep = []
1687             try:
1688                 parsed_dep += apt_pkg.ParseDepends(deps[package])
1689             except ValueError as e:
1690                 print "Error for package %s: %s" % (package, e)
1691             for dep in parsed_dep:
1692                 # Check for partial breakage.  If a package has a ORed
1693                 # dependency, there is only a dependency problem if all
1694                 # packages in the ORed depends will be removed.
1695                 unsat = 0
1696                 for dep_package, _, _ in dep:
1697                     if dep_package in removals:
1698                         unsat += 1
1699                 if unsat == len(dep):
1700                     component = p2c[package]
1701                     source = sources[package]
1702                     if component != "main":
1703                         source = "%s/%s" % (source, component)
1704                     all_broken.setdefault(source, {}).setdefault(package, set()).add(architecture)
1705                     dep_problem = 1
1706
1707     if all_broken:
1708         if cruft:
1709             print "  - broken Depends:"
1710         else:
1711             print "# Broken Depends:"
1712         for source, bindict in sorted(all_broken.items()):
1713             lines = []
1714             for binary, arches in sorted(bindict.items()):
1715                 if arches == all_arches or 'all' in arches:
1716                     lines.append(binary)
1717                 else:
1718                     lines.append('%s [%s]' % (binary, ' '.join(sorted(arches))))
1719             if cruft:
1720                 print '    %s: %s' % (source, lines[0])
1721             else:
1722                 print '%s: %s' % (source, lines[0])
1723             for line in lines[1:]:
1724                 if cruft:
1725                     print '    ' + ' ' * (len(source) + 2) + line
1726                 else:
1727                     print ' ' * (len(source) + 2) + line
1728         if not cruft:
1729             print
1730
1731     # Check source dependencies (Build-Depends and Build-Depends-Indep)
1732     all_broken.clear()
1733     metakey_bd = get_or_set_metadatakey("Build-Depends", session)
1734     metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
1735     params = {
1736         'suite_id':    dbsuite.suite_id,
1737         'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
1738     }
1739     statement = '''
1740         SELECT s.id, s.source, string_agg(sm.value, ', ') as build_dep
1741            FROM source s
1742            JOIN source_metadata sm ON s.id = sm.src_id
1743            WHERE s.id in
1744                (SELECT source FROM src_associations
1745                    WHERE suite = :suite_id)
1746                AND sm.key_id in :metakey_ids
1747            GROUP BY s.id, s.source'''
1748     query = session.query('id', 'source', 'build_dep').from_statement(statement). \
1749         params(params)
1750     for source_id, source, build_dep in query:
1751         if source in removals: continue
1752         parsed_dep = []
1753         if build_dep is not None:
1754             # Remove [arch] information since we want to see breakage on all arches
1755             build_dep = re_build_dep_arch.sub("", build_dep)
1756             try:
1757                 parsed_dep += apt_pkg.ParseDepends(build_dep)
1758             except ValueError as e:
1759                 print "Error for source %s: %s" % (source, e)
1760         for dep in parsed_dep:
1761             unsat = 0
1762             for dep_package, _, _ in dep:
1763                 if dep_package in removals:
1764                     unsat += 1
1765             if unsat == len(dep):
1766                 component = DBSource.get(source_id, session).get_component_name()
1767                 if component != "main":
1768                     source = "%s/%s" % (source, component)
1769                 all_broken.setdefault(source, set()).add(pp_deps(dep))
1770                 dep_problem = 1
1771
1772     if all_broken:
1773         if cruft:
1774             print "  - broken Build-Depends:"
1775         else:
1776             print "# Broken Build-Depends:"
1777         for source, bdeps in sorted(all_broken.items()):
1778             bdeps = sorted(bdeps)
1779             if cruft:
1780                 print '    %s: %s' % (source, bdeps[0])
1781             else:
1782                 print '%s: %s' % (source, bdeps[0])
1783             for bdep in bdeps[1:]:
1784                 if cruft:
1785                     print '    ' + ' ' * (len(source) + 2) + bdep
1786                 else:
1787                     print ' ' * (len(source) + 2) + bdep
1788         if not cruft:
1789             print
1790
1791     return dep_problem