X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Fdbconn.py;h=d4caf01c9176d60980c13933090950efa477aab9;hb=64763c8d5153817bf68e07f2fb34f0d1870a1dc0;hp=4c6f38340676670e27b41373e7780aa81b5ce1cf;hpb=93746045d5a3a4943e1e464c77880a7070a31aba;p=dak.git diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 4c6f3834..d4caf01c 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -61,6 +61,8 @@ from sqlalchemy import create_engine, Table, MetaData, Column, Integer, desc, \ from sqlalchemy.orm import sessionmaker, mapper, relation, object_session, \ backref, MapperExtension, EXT_CONTINUE, object_mapper, clear_mappers from sqlalchemy import types as sqltypes +from sqlalchemy.orm.collections import attribute_mapped_collection +from sqlalchemy.ext.associationproxy import association_proxy # Don't remove this, we re-export the exceptions to scripts which import us from sqlalchemy.exc import * @@ -202,7 +204,9 @@ class ORMObject(object): # list value = len(value) elif hasattr(value, 'count'): - # query + # query (but not during validation) + if self.in_validation: + continue value = value.count() else: raise KeyError('Do not understand property %s.' % property) @@ -256,6 +260,8 @@ class ORMObject(object): validation_message = \ "Validation failed because property '%s' must not be empty in object\n%s" + in_validation = False + def validate(self): ''' This function validates the not NULL constraints as returned by @@ -270,8 +276,11 @@ class ORMObject(object): getattr(self, property + '_id') is not None: continue if not hasattr(self, property) or getattr(self, property) is None: - raise DBUpdateError(self.validation_message % \ - (property, str(self))) + # str() might lead to races due to a 2nd flush + self.in_validation = True + message = self.validation_message % (property, str(self)) + self.in_validation = False + raise DBUpdateError(message) @classmethod @session_wrapper @@ -490,15 +499,21 @@ class DBBinary(ORMObject): self.poolfile = poolfile self.binarytype = binarytype + @property + def pkid(self): + return self.binary_id + def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ - 'suites_count', 'binary_id', 'contents_count'] + 'suites_count', 'binary_id', 'contents_count', 'extra_sources'] def not_null_constraints(self): return ['package', 'version', 'maintainer', 'source', 'poolfile', \ 'binarytype'] + metadata = association_proxy('key', 'value') + def get_component_name(self): return self.poolfile.location.component.component_name @@ -525,6 +540,33 @@ class DBBinary(ORMObject): dpkg.stdout.close() dpkg.wait() + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: text + @return: stanza text of the control section. + ''' + import apt_inst + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file) + deb_file.close() + + return stanza + + def read_control_fields(self): + ''' + Reads the control information from a binary and return + as a dictionary. + + @rtype: dict + @return: fields of the control section as a dictionary. + ''' + import apt_pkg + stanza = self.read_control() + return apt_pkg.TagSection(stanza) + __all__.append('DBBinary') @session_wrapper @@ -2153,6 +2195,72 @@ __all__.append('get_sections') ################################################################################ +class SrcContents(ORMObject): + def __init__(self, file = None, source = None): + self.file = file + self.source = source + + def properties(self): + return ['file', 'source'] + +__all__.append('SrcContents') + +################################################################################ + +from debian.debfile import Deb822 + +# Temporary Deb822 subclass to fix bugs with : handling; see #597249 +class Dak822(Deb822): + def _internal_parser(self, sequence, fields=None): + # The key is non-whitespace, non-colon characters before any colon. + key_part = r"^(?P[^: \t\n\r\f\v]+)\s*:\s*" + single = re.compile(key_part + r"(?P\S.*?)\s*$") + multi = re.compile(key_part + r"$") + multidata = re.compile(r"^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = m.group('data') + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] = content + + class DBSource(ORMObject): def __init__(self, source = None, version = None, maintainer = None, \ changedby = None, poolfile = None, install_date = None): @@ -2163,6 +2271,10 @@ class DBSource(ORMObject): self.poolfile = poolfile self.install_date = install_date + @property + def pkid(self): + return self.source_id + def properties(self): return ['source', 'source_id', 'maintainer', 'changedby', \ 'fingerprint', 'poolfile', 'version', 'suites_count', \ @@ -2172,6 +2284,38 @@ class DBSource(ORMObject): return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control_fields(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: fields is the dsc information in a dictionary form + ''' + fullpath = self.poolfile.fullpath + fields = Dak822(open(self.poolfile.fullpath, 'r')) + return fields + + metadata = association_proxy('key', 'value') + + def scan_contents(self): + ''' + Returns a set of names for non directories. The path names are + normalized after converting them from either utf-8 or iso8859-1 + encoding. + ''' + fullpath = self.poolfile.fullpath + from daklib.contents import UnpackedSource + unpacked = UnpackedSource(fullpath) + fileset = set() + for name in unpacked.get_all_filenames(): + # enforce proper utf-8 encoding + try: + name.decode('utf-8') + except UnicodeDecodeError: + name = name.decode('iso8859-1').encode('utf-8') + fileset.add(name) + return fileset + __all__.append('DBSource') @session_wrapper @@ -2315,6 +2459,34 @@ def get_source_in_suite(source, suite, session=None): __all__.append('get_source_in_suite') +@session_wrapper +def import_metadata_into_db(obj, session=None): + """ + This routine works on either DBBinary or DBSource objects and imports + their metadata into the database + """ + fields = obj.read_control_fields() + for k in fields.keys(): + try: + # Try raw ASCII + val = str(fields[k]) + except UnicodeEncodeError: + # Fall back to UTF-8 + try: + val = fields[k].encode('utf-8') + except UnicodeEncodeError: + # Finally try iso8859-1 + val = fields[k].encode('iso8859-1') + # Otherwise we allow the exception to percolate up and we cause + # a reject as someone is playing silly buggers + + obj.metadata[get_or_set_metadatakey(k, session)] = val + + session.commit_or_flush() + +__all__.append('import_metadata_into_db') + + ################################################################################ @session_wrapper @@ -2465,6 +2637,16 @@ def add_deb_to_db(u, filename, session=None): bin.source_id = bin_sources[0].source_id + if entry.has_key("built-using"): + for srcname, version in entry["built-using"]: + exsources = get_sources_from_name(srcname, version, session=session) + if len(exsources) != 1: + raise NoSourceFieldError, "Unable to find source package (%s = %s) in Built-Using for %s (%s), %s, file %s, type %s, signed by %s" % \ + (srcname, version, bin.package, bin.version, entry["architecture"], + filename, bin.binarytype, u.pkg.changes["fingerprint"]) + + bin.extra_sources.append(exsources[0]) + # Add and flush object so it has an ID session.add(bin) @@ -2481,7 +2663,7 @@ def add_deb_to_db(u, filename, session=None): # session.rollback() # raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename) - return poolfile + return bin, poolfile __all__.append('add_deb_to_db') @@ -2548,7 +2730,7 @@ class Suite(ORMObject): 'overrides_count'] def not_null_constraints(self): - return ['suite_name', 'version'] + return ['suite_name'] def __eq__(self, val): if isinstance(val, str): @@ -2792,6 +2974,84 @@ __all__.append('UploadBlock') ################################################################################ +class MetadataKey(ORMObject): + def __init__(self, key = None): + self.key = key + + def properties(self): + return ['key'] + + def not_null_constraints(self): + return ['key'] + +__all__.append('MetadataKey') + +@session_wrapper +def get_or_set_metadatakey(keyname, session=None): + """ + Returns MetadataKey object for given uidname. + + If no matching keyname is found, a row is inserted. + + @type uidname: string + @param uidname: The keyname to add + + @type session: SQLAlchemy + @param session: Optional SQL session object (a temporary one will be + generated if not supplied). If not passed, a commit will be performed at + the end of the function, otherwise the caller is responsible for commiting. + + @rtype: MetadataKey + @return: the metadatakey object for the given keyname + """ + + q = session.query(MetadataKey).filter_by(key=keyname) + + try: + ret = q.one() + except NoResultFound: + ret = MetadataKey(keyname) + session.add(ret) + session.commit_or_flush() + + return ret + +__all__.append('get_or_set_metadatakey') + +################################################################################ + +class BinaryMetadata(ORMObject): + def __init__(self, key = None, value = None, binary = None): + self.key = key + self.value = value + self.binary = binary + + def properties(self): + return ['binary', 'key', 'value'] + + def not_null_constraints(self): + return ['value'] + +__all__.append('BinaryMetadata') + +################################################################################ + +class SourceMetadata(ORMObject): + def __init__(self, key = None, value = None, source = None): + self.key = key + self.value = value + self.source = source + + def properties(self): + return ['source', 'key', 'value'] + + def not_null_constraints(self): + return ['value'] + +__all__.append('SourceMetadata') + +################################################################################ + class DBConn(object): """ database module init. @@ -2807,11 +3067,13 @@ class DBConn(object): self.__createconn() def __setuptables(self): - tables_with_primary = ( + tables = ( 'architecture', 'archive', 'bin_associations', + 'bin_contents', 'binaries', + 'binaries_metadata', 'binary_acl', 'binary_acl_map', 'build_queue', @@ -2823,37 +3085,38 @@ class DBConn(object): 'changes_pending_binaries', 'changes_pending_files', 'changes_pending_source', + 'changes_pending_files_map', + 'changes_pending_source_files', + 'changes_pool_files', 'dsc_files', + 'extra_src_references', 'files', 'fingerprint', 'keyrings', 'keyring_acl_map', 'location', 'maintainer', + 'metadata_keys', 'new_comments', + # TODO: the maintainer column in table override should be removed. + 'override', 'override_type', 'policy_queue', 'priority', 'section', 'source', 'source_acl', + 'source_metadata', 'src_associations', + 'src_contents', 'src_format', 'src_uploaders', 'suite', - 'uid', - 'upload_blocks', - ) - - tables_no_primary = ( - 'changes_pending_files_map', - 'changes_pending_source_files', - 'changes_pool_files', - # TODO: the maintainer column in table override should be removed. - 'override', 'suite_architectures', - 'suite_src_formats', 'suite_build_queue_copy', + 'suite_src_formats', + 'uid', + 'upload_blocks', ) views = ( @@ -2880,28 +3143,11 @@ class DBConn(object): 'suite_arch_by_name', ) - # Sqlalchemy version 0.5 fails to reflect the SERIAL type - # correctly and that is why we have to use a workaround. It can - # be removed as soon as we switch to version 0.6. - for table_name in tables_with_primary: + for table_name in tables: table = Table(table_name, self.db_meta, \ - Column('id', Integer, primary_key = True), \ autoload=True, useexisting=True) setattr(self, 'tbl_%s' % table_name, table) - for table_name in tables_no_primary: - table = Table(table_name, self.db_meta, autoload=True) - setattr(self, 'tbl_%s' % table_name, table) - - # bin_contents needs special attention until the SERIAL type is - # correctly detected and the workaround has been removed; see comment - # above - self.tbl_bin_contents = Table('bin_contents', self.db_meta, \ - Column('file', Text, primary_key = True), - Column('binary_id', Integer, ForeignKey('binaries.id'), \ - primary_key = True), - autoload=True, useexisting=True) - for view_name in views: view = Table(view_name, self.db_meta, autoload=True) setattr(self, 'view_%s' % view_name, view) @@ -2942,7 +3188,11 @@ class DBConn(object): fingerprint = relation(Fingerprint), install_date = self.tbl_binaries.c.install_date, suites = relation(Suite, secondary=self.tbl_bin_associations, - backref=backref('binaries', lazy='dynamic'))), + backref=backref('binaries', lazy='dynamic')), + extra_sources = relation(DBSource, secondary=self.tbl_extra_src_references, + backref=backref('extra_binary_references', lazy='dynamic')), + key = relation(BinaryMetadata, cascade='all', + collection_class=attribute_mapped_collection('key'))), extension = validator) mapper(BinaryACL, self.tbl_binary_acl, @@ -3112,7 +3362,9 @@ class DBConn(object): primaryjoin=(self.tbl_source.c.id==self.tbl_dsc_files.c.source)), suites = relation(Suite, secondary=self.tbl_src_associations, backref=backref('sources', lazy='dynamic')), - srcuploaders = relation(SrcUploader)), + srcuploaders = relation(SrcUploader), + key = relation(SourceMetadata, cascade='all', + collection_class=attribute_mapped_collection('key'))), extension = validator) mapper(SourceACL, self.tbl_source_acl, @@ -3160,22 +3412,49 @@ class DBConn(object): backref=backref('contents', lazy='dynamic', cascade='all')), file = self.tbl_bin_contents.c.file)) + mapper(SrcContents, self.tbl_src_contents, + properties = dict( + source = relation(DBSource, + backref=backref('contents', lazy='dynamic', cascade='all')), + file = self.tbl_src_contents.c.file)) + + mapper(MetadataKey, self.tbl_metadata_keys, + properties = dict( + key_id = self.tbl_metadata_keys.c.key_id, + key = self.tbl_metadata_keys.c.key)) + + mapper(BinaryMetadata, self.tbl_binaries_metadata, + properties = dict( + binary_id = self.tbl_binaries_metadata.c.bin_id, + binary = relation(DBBinary), + key_id = self.tbl_binaries_metadata.c.key_id, + key = relation(MetadataKey), + value = self.tbl_binaries_metadata.c.value)) + + mapper(SourceMetadata, self.tbl_source_metadata, + properties = dict( + source_id = self.tbl_source_metadata.c.src_id, + source = relation(DBSource), + key_id = self.tbl_source_metadata.c.key_id, + key = relation(MetadataKey), + value = self.tbl_source_metadata.c.value)) + ## Connection functions def __createconn(self): from config import Config cnf = Config() - if cnf["DB::Service"]: + if cnf.has_key("DB::Service"): connstr = "postgresql://service=%s" % cnf["DB::Service"] - elif cnf["DB::Host"]: + elif cnf.has_key("DB::Host"): # TCP/IP connstr = "postgresql://%s" % cnf["DB::Host"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += ":%s" % cnf["DB::Port"] connstr += "/%s" % cnf["DB::Name"] else: # Unix Socket connstr = "postgresql:///%s" % cnf["DB::Name"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += "?port=%s" % cnf["DB::Port"] engine_args = { 'echo': self.debug } @@ -3210,19 +3489,15 @@ class DBConn(object): self.__setuptables() self.__setupmappers() + self.pid = os.getpid() def session(self): + # reinitialize DBConn in new processes + if self.pid != os.getpid(): + clear_mappers() + self.__createconn() return self.db_smaker() - def reset(self): - ''' - Resets the DBConn object. This function must be called by subprocesses - created by the multiprocessing module. See tests/dbtest_multiproc.py - for an example. - ''' - clear_mappers() - self.__createconn() - __all__.append('DBConn')