X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Fupload.py;h=4c10f45f4e433e36bfd40a40fcff31673f2fc5b9;hb=391f5ec09a119131dc846b796ca791f4cecc69e4;hp=5c19478bcf97931826c10d27bc181e96032cce2f;hpb=4174286376153578f1444aa0afcfa2b5bfe1fe61;p=dak.git diff --git a/daklib/upload.py b/daklib/upload.py index 5c19478b..4c10f45f 100644 --- a/daklib/upload.py +++ b/daklib/upload.py @@ -17,110 +17,188 @@ """module to handle uploads not yet installed to the archive This module provides classes to handle uploads not yet installed to the -archive. Central is the `Changes` class which represents a changes file. +archive. Central is the L{Changes} class which represents a changes file. It provides methods to access the included binary and source packages. """ import apt_inst import apt_pkg +import errno import os import re -from .gpg import SignedFile -from .regexes import * -class InvalidChangesException(Exception): +from daklib.gpg import SignedFile +from daklib.regexes import * +import daklib.packagelist + +class UploadException(Exception): + pass + +class InvalidChangesException(UploadException): pass -class InvalidBinaryException(Exception): +class InvalidBinaryException(UploadException): pass -class InvalidSourceException(Exception): +class InvalidSourceException(UploadException): pass -class InvalidHashException(Exception): +class InvalidHashException(UploadException): def __init__(self, filename, hash_name, expected, actual): self.filename = filename self.hash_name = hash_name self.expected = expected self.actual = actual def __str__(self): - return "Invalid {0} hash for {1}: expected {2}, but got {3}.".format(self.hash_name, self.filename, self.expected, self.actual) - -class InvalidFilenameException(Exception): + return ("Invalid {0} hash for {1}:\n" + "According to the control file the {0} hash should be {2},\n" + "but {1} has {3}.\n" + "\n" + "If you did not include {1} in your upload, a different version\n" + "might already be known to the archive software.") \ + .format(self.hash_name, self.filename, self.expected, self.actual) + +class InvalidFilenameException(UploadException): def __init__(self, filename): self.filename = filename def __str__(self): return "Invalid filename '{0}'.".format(self.filename) +class FileDoesNotExist(UploadException): + def __init__(self, filename): + self.filename = filename + def __str__(self): + return "Refers to non-existing file '{0}'".format(self.filename) + class HashedFile(object): """file with checksums - - Attributes: - filename (str): name of the file - size (long): size in bytes - md5sum (str): MD5 hash in hexdigits - sha1sum (str): SHA1 hash in hexdigits - sha256sum (str): SHA256 hash in hexdigits - section (str): section or None - priority (str): priority or None """ - def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None): + def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None, input_filename=None): self.filename = filename + """name of the file + @type: str + """ + + if input_filename is None: + input_filename = filename + self.input_filename = input_filename + """name of the file on disk + + Used for temporary files that should not be installed using their on-disk name. + @type: str + """ + self.size = size + """size in bytes + @type: long + """ + self.md5sum = md5sum + """MD5 hash in hexdigits + @type: str + """ + self.sha1sum = sha1sum + """SHA1 hash in hexdigits + @type: str + """ + self.sha256sum = sha256sum + """SHA256 hash in hexdigits + @type: str + """ + self.section = section + """section or C{None} + @type: str or C{None} + """ + self.priority = priority + """priority or C{None} + @type: str of C{None} + """ + + @classmethod + def from_file(cls, directory, filename, section=None, priority=None): + """create with values for an existing file + + Create a C{HashedFile} object that refers to an already existing file. + + @type directory: str + @param directory: directory the file is located in + + @type filename: str + @param filename: filename + + @type section: str or C{None} + @param section: optional section as given in .changes files + + @type priority: str or C{None} + @param priority: optional priority as given in .changes files + + @rtype: L{HashedFile} + @return: C{HashedFile} object for the given file + """ + path = os.path.join(directory, filename) + with open(path, 'r') as fh: + size = os.fstat(fh.fileno()).st_size + hashes = apt_pkg.Hashes(fh) + return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority) def check(self, directory): """Validate hashes Check if size and hashes match the expected value. - Args: - directory (str): directory the file is located in + @type directory: str + @param directory: directory the file is located in - Raises: - InvalidHashException: hash mismatch + @raise InvalidHashException: hash mismatch """ - path = os.path.join(directory, self.filename) - fh = open(path, 'r') + path = os.path.join(directory, self.input_filename) + try: + with open(path) as fh: + self.check_fh(fh) + except IOError as e: + if e.errno == errno.ENOENT: + raise FileDoesNotExist(self.input_filename) + raise + + def check_fh(self, fh): + size = os.fstat(fh.fileno()).st_size + fh.seek(0) + hashes = apt_pkg.Hashes(fh) - size = os.stat(path).st_size if size != self.size: raise InvalidHashException(self.filename, 'size', self.size, size) - md5sum = apt_pkg.md5sum(fh) - if md5sum != self.md5sum: - raise InvalidHashException(self.filename, 'md5sum', self.md5sum, md5sum) + if hashes.md5 != self.md5sum: + raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5) - fh.seek(0) - sha1sum = apt_pkg.sha1sum(fh) - if sha1sum != self.sha1sum: - raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, sha1sum) + if hashes.sha1 != self.sha1sum: + raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1) - fh.seek(0) - sha256sum = apt_pkg.sha256sum(fh) - if sha256sum != self.sha256sum: - raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, sha256sum) + if hashes.sha256 != self.sha256sum: + raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256) -def parse_file_list(control, has_priority_and_section): +def parse_file_list(control, has_priority_and_section, safe_file_regexp = re_file_safe, fields = ('Files', 'Checksums-Sha1', 'Checksums-Sha256')): """Parse Files and Checksums-* fields - Args: - control (dict-like): control file to take fields from - has_priority_and_section (bool): Files include section and priority (as in .changes) + @type control: dict-like + @param control: control file to take fields from - Raises: - InvalidChangesException: missing fields or other grave errors + @type has_priority_and_section: bool + @param has_priority_and_section: Files field include section and priority + (as in .changes) - Returns: - dictonary mapping filenames to `daklib.upload.HashedFile` objects + @raise InvalidChangesException: missing fields or other grave errors + + @rtype: dict + @return: dict mapping filenames to L{daklib.upload.HashedFile} objects """ entries = {} - for line in control["Files"].split('\n'): + for line in control.get(fields[0], "").split('\n'): if len(line) == 0: continue @@ -133,24 +211,26 @@ def parse_file_list(control, has_priority_and_section): entries[filename] = entry - for line in control["Checksums-Sha1"].split('\n'): + for line in control.get(fields[1], "").split('\n'): if len(line) == 0: continue (sha1sum, size, filename) = line.split() entry = entries.get(filename, None) - if entry.get('size', None) != long(size): - raise InvalidChangesException('Size for {0} in Files and Checksum-Sha1 fields differ.'.format(filename)) + if entry is None: + raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0])) + if entry is not None and entry.get('size', None) != long(size): + raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1])) entry['sha1sum'] = sha1sum - for line in control["Checksums-Sha256"].split('\n'): + for line in control.get(fields[2], "").split('\n'): if len(line) == 0: continue (sha256sum, size, filename) = line.split() entry = entries.get(filename, None) if entry is None: - raise InvalidChangesException('No sha256sum for {0}.'.format(filename)) - if entry.get('size', None) != long(size): - raise InvalidChangesException('Size for {0} in Files and Checksum-Sha256 fields differ.'.format(filename)) + raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0])) + if entry is not None and entry.get('size', None) != long(size): + raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2])) entry['sha256sum'] = sha256sum files = {} @@ -164,7 +244,7 @@ def parse_file_list(control, has_priority_and_section): raise InvalidChangesException('No sha1sum for {0}.'.format(filename)) if 'sha256sum' not in entry: raise InvalidChangesException('No sha256sum for {0}.'.format(filename)) - if not re_file_safe.match(filename): + if safe_file_regexp is not None and not safe_file_regexp.match(filename): raise InvalidChangesException("{0}: References file with unsafe filename {1}.".format(self.filename, filename)) f = files[filename] = HashedFile(**entry) @@ -172,32 +252,28 @@ def parse_file_list(control, has_priority_and_section): class Changes(object): """Representation of a .changes file - - Attributes: - architectures (list of str): list of architectures included in the upload - binaries (list of daklib.upload.Binary): included binary packages - binary_names (list of str): names of included binary packages - byhand_files (list of daklib.upload.HashedFile): included byhand files - bytes (int): total size of files included in this upload in bytes - changes (dict-like): dict to access fields of the .changes file - closed_bugs (list of str): list of bugs closed by this upload - directory (str): directory the .changes is located in - distributions (list of str): list of target distributions for the upload - filename (str): name of the .changes file - files (dict): dict mapping filenames to daklib.upload.HashedFile objects - path (str): path to the .changes files - primary_fingerprint (str): fingerprint of the PGP key used for the signature - source (daklib.upload.Source or None): included source - valid_signature (bool): True if the changes has a valid signature """ def __init__(self, directory, filename, keyrings, require_signature=True): if not re_file_safe.match(filename): raise InvalidChangesException('{0}: unsafe filename'.format(filename)) + self.directory = directory + """directory the .changes is located in + @type: str + """ + self.filename = filename + """name of the .changes file + @type: str + """ + data = open(self.path).read() self._signed_file = SignedFile(data, keyrings, require_signature) self.changes = apt_pkg.TagSection(self._signed_file.contents) + """dict to access fields of the .changes file + @type: dict-like + """ + self._binaries = None self._source = None self._files = None @@ -206,26 +282,52 @@ class Changes(object): @property def path(self): + """path to the .changes file + @type: str + """ return os.path.join(self.directory, self.filename) @property def primary_fingerprint(self): + """fingerprint of the key used for signing the .changes file + @type: str + """ return self._signed_file.primary_fingerprint @property def valid_signature(self): + """C{True} if the .changes has a valid signature + @type: bool + """ return self._signed_file.valid + @property + def signature_timestamp(self): + return self._signed_file.signature_timestamp + + @property + def contents_sha1(self): + return self._signed_file.contents_sha1 + @property def architectures(self): - return self.changes['Architecture'].split() + """list of architectures included in the upload + @type: list of str + """ + return self.changes.get('Architecture', '').split() @property def distributions(self): + """list of target distributions for the upload + @type: list of str + """ return self.changes['Distribution'].split() @property def source(self): + """included source or C{None} + @type: L{daklib.upload.Source} or C{None} + """ if self._source is None: source_files = [] for f in self.files.itervalues(): @@ -235,8 +337,25 @@ class Changes(object): self._source = Source(self.directory, source_files, self._keyrings, self._require_signature) return self._source + @property + def sourceful(self): + """C{True} if the upload includes source + @type: bool + """ + return "source" in self.architectures + + @property + def source_name(self): + """source package name + @type: str + """ + return re_field_source.match(self.changes['Source']).group('package') + @property def binaries(self): + """included binary packages + @type: list of L{daklib.upload.Binary} + """ if self._binaries is None: binaries = [] for f in self.files.itervalues(): @@ -247,11 +366,16 @@ class Changes(object): @property def byhand_files(self): + """included byhand files + @type: list of L{daklib.upload.HashedFile} + """ byhand = [] for f in self.files.itervalues(): if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename): continue + if re_file_buildinfo.match(f.filename): + continue if f.section != 'byhand' and f.section[:4] != 'raw-': raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section)) byhand.append(f) @@ -260,35 +384,47 @@ class Changes(object): @property def binary_names(self): + """names of included binary packages + @type: list of str + """ return self.changes['Binary'].split() @property def closed_bugs(self): + """bugs closed by this upload + @type: list of str + """ return self.changes.get('Closes', '').split() @property def files(self): + """dict mapping filenames to L{daklib.upload.HashedFile} objects + @type: dict + """ if self._files is None: self._files = parse_file_list(self.changes, True) return self._files @property def bytes(self): + """total size of files included in this upload in bytes + @type: number + """ count = 0 for f in self.files.itervalues(): count += f.size return count def __cmp__(self, other): - """Compare two changes packages + """compare two changes files We sort by source name and version first. If these are identical, we sort changes that include source before those without source (so that sourceful uploads get processed first), and finally fall back to the filename (this should really never happen). - Returns: - -1 if self < other, 0 if self == other, 1 if self > other + @rtype: number + @return: n where n < 0 if self < other, n = 0 if self == other, n > 0 if self > other """ ret = cmp(self.changes.get('Source'), other.changes.get('Source')) @@ -313,25 +449,30 @@ class Changes(object): class Binary(object): """Representation of a binary package - - Attributes: - component (str): component name - control (dict-like): dict to access fields in DEBIAN/control - hashed_file (HashedFile): HashedFile object for the .deb """ def __init__(self, directory, hashed_file): self.hashed_file = hashed_file + """file object for the .deb + @type: HashedFile + """ - path = os.path.join(directory, hashed_file.filename) + path = os.path.join(directory, hashed_file.input_filename) data = apt_inst.DebFile(path).control.extractdata("control") + self.control = apt_pkg.TagSection(data) + """dict to access fields in DEBIAN/control + @type: dict-like + """ + + @classmethod + def from_file(cls, directory, filename): + hashed_file = HashedFile.from_file(directory, filename) + return cls(directory, hashed_file) @property def source(self): - """Get source package name and version - - Returns: - tuple containing source package name and version + """get tuple with source package name and version + @type: tuple of str """ source = self.control.get("Source", None) if source is None: @@ -345,11 +486,13 @@ class Binary(object): return (match.group('package'), version) @property - def type(self): - """Get package type + def name(self): + return self.control['Package'] - Returns: - String with the package type ('deb' or 'udeb') + @property + def type(self): + """package type ('deb' or 'udeb') + @type: str """ match = re_file_binary.match(self.hashed_file.filename) if not match: @@ -358,6 +501,9 @@ class Binary(object): @property def component(self): + """component name + @type: str + """ fields = self.control['Section'].split('/') if len(fields) > 1: return fields[0] @@ -365,18 +511,13 @@ class Binary(object): class Source(object): """Representation of a source package - - Attributes: - component (str): guessed component name. Might be wrong! - dsc (dict-like): dict to access fields in the .dsc file - hashed_files (list of daklib.upload.HashedFile): list of source files (including .dsc) - files (dict): dictonary mapping filenames to HashedFile objects for - additional source files (not including .dsc) - primary_fingerprint (str): fingerprint of the PGP key used for the signature - valid_signature (bool): True if the dsc has a valid signature """ def __init__(self, directory, hashed_files, keyrings, require_signature=True): self.hashed_files = hashed_files + """list of source files (including the .dsc itself) + @type: list of L{HashedFile} + """ + self._dsc_file = None for f in hashed_files: if re_file_dsc.match(f.filename): @@ -384,31 +525,74 @@ class Source(object): raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename)) else: self._dsc_file = f - dsc_file_path = os.path.join(directory, self._dsc_file.filename) + + # make sure the hash for the dsc is valid before we use it + self._dsc_file.check(directory) + + dsc_file_path = os.path.join(directory, self._dsc_file.input_filename) data = open(dsc_file_path, 'r').read() self._signed_file = SignedFile(data, keyrings, require_signature) self.dsc = apt_pkg.TagSection(self._signed_file.contents) + """dict to access fields in the .dsc file + @type: dict-like + """ + + self.package_list = daklib.packagelist.PackageList(self.dsc) + """Information about packages built by the source. + @type: daklib.packagelist.PackageList + """ + self._files = None + @classmethod + def from_file(cls, directory, filename, keyrings, require_signature=True): + hashed_file = HashedFile.from_file(directory, filename) + return cls(directory, [hashed_file], keyrings, require_signature) + @property def files(self): + """dict mapping filenames to L{HashedFile} objects for additional source files + + This list does not include the .dsc itself. + + @type: dict + """ if self._files is None: self._files = parse_file_list(self.dsc, False) return self._files @property def primary_fingerprint(self): + """fingerprint of the key used to sign the .dsc + @type: str + """ return self._signed_file.primary_fingerprint @property def valid_signature(self): + """C{True} if the .dsc has a valid signature + @type: bool + """ return self._signed_file.valid @property def component(self): + """guessed component name + + Might be wrong. Don't rely on this. + + @type: str + """ if 'Section' not in self.dsc: return 'main' fields = self.dsc['Section'].split('/') if len(fields) > 1: return fields[0] return "main" + + @property + def filename(self): + """filename of .dsc file + @type: str + """ + return self._dsc_file.filename