From: Torsten Werner Date: Thu, 24 Mar 2011 08:35:23 +0000 (+0100) Subject: Merge branch 'master' into contents X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=64763c8d5153817bf68e07f2fb34f0d1870a1dc0;hp=-c;p=dak.git Merge branch 'master' into contents Conflicts: daklib/dbconn.py --- 64763c8d5153817bf68e07f2fb34f0d1870a1dc0 diff --combined daklib/dbconn.py index 014f1082,6317b584..d4caf01c --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@@ -204,7 -204,9 +204,9 @@@ class ORMObject(object) # list value = len(value) elif hasattr(value, 'count'): - # query + # query (but not during validation) + if self.in_validation: + continue value = value.count() else: raise KeyError('Do not understand property %s.' % property) @@@ -258,6 -260,8 +260,8 @@@ validation_message = \ "Validation failed because property '%s' must not be empty in object\n%s" + in_validation = False + def validate(self): ''' This function validates the not NULL constraints as returned by @@@ -272,8 -276,11 +276,11 @@@ getattr(self, property + '_id') is not None: continue if not hasattr(self, property) or getattr(self, property) is None: - raise DBUpdateError(self.validation_message % \ - (property, str(self))) + # str() might lead to races due to a 2nd flush + self.in_validation = True + message = self.validation_message % (property, str(self)) + self.in_validation = False + raise DBUpdateError(message) @classmethod @session_wrapper @@@ -492,6 -499,10 +499,10 @@@ class DBBinary(ORMObject) self.poolfile = poolfile self.binarytype = binarytype + @property + def pkid(self): + return self.binary_id + def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ @@@ -529,6 -540,33 +540,33 @@@ dpkg.stdout.close() dpkg.wait() + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: text + @return: stanza text of the control section. + ''' + import apt_inst + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file) + deb_file.close() + + return stanza + + def read_control_fields(self): + ''' + Reads the control information from a binary and return + as a dictionary. + + @rtype: dict + @return: fields of the control section as a dictionary. + ''' + import apt_pkg + stanza = self.read_control() + return apt_pkg.TagSection(stanza) + __all__.append('DBBinary') @session_wrapper @@@ -2157,18 -2195,60 +2195,72 @@@ __all__.append('get_sections' ################################################################################ +class SrcContents(ORMObject): + def __init__(self, file = None, source = None): + self.file = file + self.source = source + + def properties(self): + return ['file', 'source'] + +__all__.append('SrcContents') + +################################################################################ + + from debian.debfile import Deb822 + + # Temporary Deb822 subclass to fix bugs with : handling; see #597249 + class Dak822(Deb822): + def _internal_parser(self, sequence, fields=None): + # The key is non-whitespace, non-colon characters before any colon. + key_part = r"^(?P[^: \t\n\r\f\v]+)\s*:\s*" + single = re.compile(key_part + r"(?P\S.*?)\s*$") + multi = re.compile(key_part + r"$") + multidata = re.compile(r"^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = m.group('data') + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] = content + + class DBSource(ORMObject): def __init__(self, source = None, version = None, maintainer = None, \ changedby = None, poolfile = None, install_date = None): @@@ -2179,6 -2259,10 +2271,10 @@@ self.poolfile = poolfile self.install_date = install_date + @property + def pkid(self): + return self.source_id + def properties(self): return ['source', 'source_id', 'maintainer', 'changedby', \ 'fingerprint', 'poolfile', 'version', 'suites_count', \ @@@ -2188,27 -2272,19 +2284,38 @@@ return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control_fields(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: fields is the dsc information in a dictionary form + ''' + fullpath = self.poolfile.fullpath + fields = Dak822(open(self.poolfile.fullpath, 'r')) + return fields + metadata = association_proxy('key', 'value') + def scan_contents(self): + ''' + Returns a set of names for non directories. The path names are + normalized after converting them from either utf-8 or iso8859-1 + encoding. + ''' + fullpath = self.poolfile.fullpath + from daklib.contents import UnpackedSource + unpacked = UnpackedSource(fullpath) + fileset = set() + for name in unpacked.get_all_filenames(): + # enforce proper utf-8 encoding + try: + name.decode('utf-8') + except UnicodeDecodeError: + name = name.decode('iso8859-1').encode('utf-8') + fileset.add(name) + return fileset + __all__.append('DBSource') @session_wrapper @@@ -2352,6 -2428,34 +2459,34 @@@ def get_source_in_suite(source, suite, __all__.append('get_source_in_suite') + @session_wrapper + def import_metadata_into_db(obj, session=None): + """ + This routine works on either DBBinary or DBSource objects and imports + their metadata into the database + """ + fields = obj.read_control_fields() + for k in fields.keys(): + try: + # Try raw ASCII + val = str(fields[k]) + except UnicodeEncodeError: + # Fall back to UTF-8 + try: + val = fields[k].encode('utf-8') + except UnicodeEncodeError: + # Finally try iso8859-1 + val = fields[k].encode('iso8859-1') + # Otherwise we allow the exception to percolate up and we cause + # a reject as someone is playing silly buggers + + obj.metadata[get_or_set_metadatakey(k, session)] = val + + session.commit_or_flush() + + __all__.append('import_metadata_into_db') + + ################################################################################ @session_wrapper @@@ -2528,7 -2632,7 +2663,7 @@@ def add_deb_to_db(u, filename, session= # session.rollback() # raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename) - return poolfile + return bin, poolfile __all__.append('add_deb_to_db') @@@ -2595,7 -2699,7 +2730,7 @@@ class Suite(ORMObject) 'overrides_count'] def not_null_constraints(self): - return ['suite_name', 'version'] + return ['suite_name'] def __eq__(self, val): if isinstance(val, str): @@@ -2851,6 -2955,38 +2986,38 @@@ class MetadataKey(ORMObject) __all__.append('MetadataKey') + @session_wrapper + def get_or_set_metadatakey(keyname, session=None): + """ + Returns MetadataKey object for given uidname. + + If no matching keyname is found, a row is inserted. + + @type uidname: string + @param uidname: The keyname to add + + @type session: SQLAlchemy + @param session: Optional SQL session object (a temporary one will be + generated if not supplied). If not passed, a commit will be performed at + the end of the function, otherwise the caller is responsible for commiting. + + @rtype: MetadataKey + @return: the metadatakey object for the given keyname + """ + + q = session.query(MetadataKey).filter_by(key=keyname) + + try: + ret = q.one() + except NoResultFound: + ret = MetadataKey(keyname) + session.add(ret) + session.commit_or_flush() + + return ret + + __all__.append('get_or_set_metadatakey') + ################################################################################ class BinaryMetadata(ORMObject): @@@ -2941,7 -3077,6 +3108,7 @@@ class DBConn(object) 'source_acl', 'source_metadata', 'src_associations', + 'src_contents', 'src_format', 'src_uploaders', 'suite', @@@ -3245,12 -3380,6 +3412,12 @@@ backref=backref('contents', lazy='dynamic', cascade='all')), file = self.tbl_bin_contents.c.file)) + mapper(SrcContents, self.tbl_src_contents, + properties = dict( + source = relation(DBSource, + backref=backref('contents', lazy='dynamic', cascade='all')), + file = self.tbl_src_contents.c.file)) + mapper(MetadataKey, self.tbl_metadata_keys, properties = dict( key_id = self.tbl_metadata_keys.c.key_id,