X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Fdbconn.py;h=d4caf01c9176d60980c13933090950efa477aab9;hb=64763c8d5153817bf68e07f2fb34f0d1870a1dc0;hp=24f8fb17d9620f99570db40e203ac21381ec142c;hpb=4db9c79b71600a48f5323bd75a0ef4f894f02559;p=dak.git diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 24f8fb17..d4caf01c 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -59,8 +59,10 @@ import sqlalchemy from sqlalchemy import create_engine, Table, MetaData, Column, Integer, desc, \ Text, ForeignKey from sqlalchemy.orm import sessionmaker, mapper, relation, object_session, \ - backref, MapperExtension, EXT_CONTINUE, object_mapper + backref, MapperExtension, EXT_CONTINUE, object_mapper, clear_mappers from sqlalchemy import types as sqltypes +from sqlalchemy.orm.collections import attribute_mapped_collection +from sqlalchemy.ext.associationproxy import association_proxy # Don't remove this, we re-export the exceptions to scripts which import us from sqlalchemy.exc import * @@ -77,9 +79,6 @@ import warnings warnings.filterwarnings('ignore', \ "The SQLAlchemy PostgreSQL dialect has been renamed from 'postgres' to 'postgresql'.*", \ SADeprecationWarning) -# TODO: sqlalchemy needs some extra configuration to correctly reflect -# the ind_deb_contents_* indexes - we ignore the warnings at the moment -warnings.filterwarnings("ignore", 'Predicate of partial index', SAWarning) ################################################################################ @@ -205,7 +204,9 @@ class ORMObject(object): # list value = len(value) elif hasattr(value, 'count'): - # query + # query (but not during validation) + if self.in_validation: + continue value = value.count() else: raise KeyError('Do not understand property %s.' % property) @@ -259,6 +260,8 @@ class ORMObject(object): validation_message = \ "Validation failed because property '%s' must not be empty in object\n%s" + in_validation = False + def validate(self): ''' This function validates the not NULL constraints as returned by @@ -273,8 +276,11 @@ class ORMObject(object): getattr(self, property + '_id') is not None: continue if not hasattr(self, property) or getattr(self, property) is None: - raise DBUpdateError(self.validation_message % \ - (property, str(self))) + # str() might lead to races due to a 2nd flush + self.in_validation = True + message = self.validation_message % (property, str(self)) + self.in_validation = False + raise DBUpdateError(message) @classmethod @session_wrapper @@ -493,37 +499,73 @@ class DBBinary(ORMObject): self.poolfile = poolfile self.binarytype = binarytype + @property + def pkid(self): + return self.binary_id + def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ - 'suites_count', 'binary_id', 'contents_count'] + 'suites_count', 'binary_id', 'contents_count', 'extra_sources'] def not_null_constraints(self): return ['package', 'version', 'maintainer', 'source', 'poolfile', \ 'binarytype'] + metadata = association_proxy('key', 'value') + def get_component_name(self): return self.poolfile.location.component.component_name def scan_contents(self): ''' Yields the contents of the package. Only regular files are yielded and - the path names are normalized after converting them from either utf-8 or - iso8859-1 encoding. + the path names are normalized after converting them from either utf-8 + or iso8859-1 encoding. It yields the string ' ' if the + package does not contain any regular file. ''' fullpath = self.poolfile.fullpath - debdata = Popen(['dpkg-deb', '--fsys-tarfile', fullpath], - stdout = PIPE).stdout - tar = TarFile.open(fileobj = debdata, mode = 'r|') + dpkg = Popen(['dpkg-deb', '--fsys-tarfile', fullpath], stdout = PIPE) + tar = TarFile.open(fileobj = dpkg.stdout, mode = 'r|') for member in tar.getmembers(): - if member.isfile(): + if not member.isdir(): + name = normpath(member.name) + # enforce proper utf-8 encoding try: - name = member.name.decode('utf-8') + name.decode('utf-8') except UnicodeDecodeError: - name = member.name.decode('iso8859-1') - yield normpath(name) + name = name.decode('iso8859-1').encode('utf-8') + yield name tar.close() - debdata.close() + dpkg.stdout.close() + dpkg.wait() + + def read_control(self): + ''' + Reads the control information from a binary. + + @rtype: text + @return: stanza text of the control section. + ''' + import apt_inst + fullpath = self.poolfile.fullpath + deb_file = open(fullpath, 'r') + stanza = apt_inst.debExtractControl(deb_file) + deb_file.close() + + return stanza + + def read_control_fields(self): + ''' + Reads the control information from a binary and return + as a dictionary. + + @rtype: dict + @return: fields of the control section as a dictionary. + ''' + import apt_pkg + stanza = self.read_control() + return apt_pkg.TagSection(stanza) __all__.append('DBBinary') @@ -1945,111 +1987,6 @@ __all__.append('get_override_type') ################################################################################ -class DebContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % (self.package.package,self.file) - -__all__.append('DebContents') - - -class UdebContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % (self.package.package,self.file) - -__all__.append('UdebContents') - -class PendingBinContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % self.contents_id - -__all__.append('PendingBinContents') - -def insert_pending_content_paths(package, - is_udeb, - fullpaths, - session=None): - """ - Make sure given paths are temporarily associated with given - package - - @type package: dict - @param package: the package to associate with should have been read in from the binary control file - @type fullpaths: list - @param fullpaths: the list of paths of the file being associated with the binary - @type session: SQLAlchemy session - @param session: Optional SQLAlchemy session. If this is passed, the caller - is responsible for ensuring a transaction has begun and committing the - results or rolling back based on the result code. If not passed, a commit - will be performed at the end of the function - - @return: True upon success, False if there is a problem - """ - - privatetrans = False - - if session is None: - session = DBConn().session() - privatetrans = True - - try: - arch = get_architecture(package['Architecture'], session) - arch_id = arch.arch_id - - # Remove any already existing recorded files for this package - q = session.query(PendingBinContents) - q = q.filter_by(package=package['Package']) - q = q.filter_by(version=package['Version']) - q = q.filter_by(architecture=arch_id) - q.delete() - - for fullpath in fullpaths: - - if fullpath.startswith( "./" ): - fullpath = fullpath[2:] - - pca = PendingBinContents() - pca.package = package['Package'] - pca.version = package['Version'] - pca.file = fullpath - pca.architecture = arch_id - - if isudeb: - pca.type = 8 # gross - else: - pca.type = 7 # also gross - session.add(pca) - - # Only commit if we set up the session ourself - if privatetrans: - session.commit() - session.close() - else: - session.flush() - - return True - except Exception, e: - traceback.print_exc() - - # Only rollback if we set up the session ourself - if privatetrans: - session.rollback() - session.close() - - return False - -__all__.append('insert_pending_content_paths') - -################################################################################ - class PolicyQueue(object): def __init__(self, *args, **kwargs): pass @@ -2258,6 +2195,72 @@ __all__.append('get_sections') ################################################################################ +class SrcContents(ORMObject): + def __init__(self, file = None, source = None): + self.file = file + self.source = source + + def properties(self): + return ['file', 'source'] + +__all__.append('SrcContents') + +################################################################################ + +from debian.debfile import Deb822 + +# Temporary Deb822 subclass to fix bugs with : handling; see #597249 +class Dak822(Deb822): + def _internal_parser(self, sequence, fields=None): + # The key is non-whitespace, non-colon characters before any colon. + key_part = r"^(?P[^: \t\n\r\f\v]+)\s*:\s*" + single = re.compile(key_part + r"(?P\S.*?)\s*$") + multi = re.compile(key_part + r"$") + multidata = re.compile(r"^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = m.group('data') + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] = content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] = content + + class DBSource(ORMObject): def __init__(self, source = None, version = None, maintainer = None, \ changedby = None, poolfile = None, install_date = None): @@ -2268,6 +2271,10 @@ class DBSource(ORMObject): self.poolfile = poolfile self.install_date = install_date + @property + def pkid(self): + return self.source_id + def properties(self): return ['source', 'source_id', 'maintainer', 'changedby', \ 'fingerprint', 'poolfile', 'version', 'suites_count', \ @@ -2277,6 +2284,38 @@ class DBSource(ORMObject): return ['source', 'version', 'install_date', 'maintainer', \ 'changedby', 'poolfile', 'install_date'] + def read_control_fields(self): + ''' + Reads the control information from a dsc + + @rtype: tuple + @return: fields is the dsc information in a dictionary form + ''' + fullpath = self.poolfile.fullpath + fields = Dak822(open(self.poolfile.fullpath, 'r')) + return fields + + metadata = association_proxy('key', 'value') + + def scan_contents(self): + ''' + Returns a set of names for non directories. The path names are + normalized after converting them from either utf-8 or iso8859-1 + encoding. + ''' + fullpath = self.poolfile.fullpath + from daklib.contents import UnpackedSource + unpacked = UnpackedSource(fullpath) + fileset = set() + for name in unpacked.get_all_filenames(): + # enforce proper utf-8 encoding + try: + name.decode('utf-8') + except UnicodeDecodeError: + name = name.decode('iso8859-1').encode('utf-8') + fileset.add(name) + return fileset + __all__.append('DBSource') @session_wrapper @@ -2420,6 +2459,34 @@ def get_source_in_suite(source, suite, session=None): __all__.append('get_source_in_suite') +@session_wrapper +def import_metadata_into_db(obj, session=None): + """ + This routine works on either DBBinary or DBSource objects and imports + their metadata into the database + """ + fields = obj.read_control_fields() + for k in fields.keys(): + try: + # Try raw ASCII + val = str(fields[k]) + except UnicodeEncodeError: + # Fall back to UTF-8 + try: + val = fields[k].encode('utf-8') + except UnicodeEncodeError: + # Finally try iso8859-1 + val = fields[k].encode('iso8859-1') + # Otherwise we allow the exception to percolate up and we cause + # a reject as someone is playing silly buggers + + obj.metadata[get_or_set_metadatakey(k, session)] = val + + session.commit_or_flush() + +__all__.append('import_metadata_into_db') + + ################################################################################ @session_wrapper @@ -2570,6 +2637,16 @@ def add_deb_to_db(u, filename, session=None): bin.source_id = bin_sources[0].source_id + if entry.has_key("built-using"): + for srcname, version in entry["built-using"]: + exsources = get_sources_from_name(srcname, version, session=session) + if len(exsources) != 1: + raise NoSourceFieldError, "Unable to find source package (%s = %s) in Built-Using for %s (%s), %s, file %s, type %s, signed by %s" % \ + (srcname, version, bin.package, bin.version, entry["architecture"], + filename, bin.binarytype, u.pkg.changes["fingerprint"]) + + bin.extra_sources.append(exsources[0]) + # Add and flush object so it has an ID session.add(bin) @@ -2586,7 +2663,7 @@ def add_deb_to_db(u, filename, session=None): # session.rollback() # raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename) - return poolfile + return bin, poolfile __all__.append('add_deb_to_db') @@ -2653,7 +2730,7 @@ class Suite(ORMObject): 'overrides_count'] def not_null_constraints(self): - return ['suite_name', 'version'] + return ['suite_name'] def __eq__(self, val): if isinstance(val, str): @@ -2897,6 +2974,84 @@ __all__.append('UploadBlock') ################################################################################ +class MetadataKey(ORMObject): + def __init__(self, key = None): + self.key = key + + def properties(self): + return ['key'] + + def not_null_constraints(self): + return ['key'] + +__all__.append('MetadataKey') + +@session_wrapper +def get_or_set_metadatakey(keyname, session=None): + """ + Returns MetadataKey object for given uidname. + + If no matching keyname is found, a row is inserted. + + @type uidname: string + @param uidname: The keyname to add + + @type session: SQLAlchemy + @param session: Optional SQL session object (a temporary one will be + generated if not supplied). If not passed, a commit will be performed at + the end of the function, otherwise the caller is responsible for commiting. + + @rtype: MetadataKey + @return: the metadatakey object for the given keyname + """ + + q = session.query(MetadataKey).filter_by(key=keyname) + + try: + ret = q.one() + except NoResultFound: + ret = MetadataKey(keyname) + session.add(ret) + session.commit_or_flush() + + return ret + +__all__.append('get_or_set_metadatakey') + +################################################################################ + +class BinaryMetadata(ORMObject): + def __init__(self, key = None, value = None, binary = None): + self.key = key + self.value = value + self.binary = binary + + def properties(self): + return ['binary', 'key', 'value'] + + def not_null_constraints(self): + return ['value'] + +__all__.append('BinaryMetadata') + +################################################################################ + +class SourceMetadata(ORMObject): + def __init__(self, key = None, value = None, source = None): + self.key = key + self.value = value + self.source = source + + def properties(self): + return ['source', 'key', 'value'] + + def not_null_constraints(self): + return ['value'] + +__all__.append('SourceMetadata') + +################################################################################ + class DBConn(object): """ database module init. @@ -2912,11 +3067,13 @@ class DBConn(object): self.__createconn() def __setuptables(self): - tables_with_primary = ( + tables = ( 'architecture', 'archive', 'bin_associations', + 'bin_contents', 'binaries', + 'binaries_metadata', 'binary_acl', 'binary_acl_map', 'build_queue', @@ -2928,40 +3085,38 @@ class DBConn(object): 'changes_pending_binaries', 'changes_pending_files', 'changes_pending_source', + 'changes_pending_files_map', + 'changes_pending_source_files', + 'changes_pool_files', 'dsc_files', + 'extra_src_references', 'files', 'fingerprint', 'keyrings', 'keyring_acl_map', 'location', 'maintainer', + 'metadata_keys', 'new_comments', + # TODO: the maintainer column in table override should be removed. + 'override', 'override_type', - 'pending_bin_contents', 'policy_queue', 'priority', 'section', 'source', 'source_acl', + 'source_metadata', 'src_associations', + 'src_contents', 'src_format', 'src_uploaders', 'suite', - 'uid', - 'upload_blocks', - ) - - tables_no_primary = ( - 'changes_pending_files_map', - 'changes_pending_source_files', - 'changes_pool_files', - 'deb_contents', - # TODO: the maintainer column in table override should be removed. - 'override', 'suite_architectures', - 'suite_src_formats', 'suite_build_queue_copy', - 'udeb_contents', + 'suite_src_formats', + 'uid', + 'upload_blocks', ) views = ( @@ -2988,27 +3143,11 @@ class DBConn(object): 'suite_arch_by_name', ) - # Sqlalchemy version 0.5 fails to reflect the SERIAL type - # correctly and that is why we have to use a workaround. It can - # be removed as soon as we switch to version 0.6. - for table_name in tables_with_primary: + for table_name in tables: table = Table(table_name, self.db_meta, \ - Column('id', Integer, primary_key = True), \ autoload=True, useexisting=True) setattr(self, 'tbl_%s' % table_name, table) - for table_name in tables_no_primary: - table = Table(table_name, self.db_meta, autoload=True) - setattr(self, 'tbl_%s' % table_name, table) - - # bin_contents needs special attention until update #41 has been - # applied - self.tbl_bin_contents = Table('bin_contents', self.db_meta, \ - Column('file', Text, primary_key = True), - Column('binary_id', Integer, ForeignKey('binaries.id'), \ - primary_key = True), - autoload=True, useexisting=True) - for view_name in views: view = Table(view_name, self.db_meta, autoload=True) setattr(self, 'view_%s' % view_name, view) @@ -3025,30 +3164,6 @@ class DBConn(object): properties = dict(archive_id = self.tbl_archive.c.id, archive_name = self.tbl_archive.c.name)) - mapper(PendingBinContents, self.tbl_pending_bin_contents, - properties = dict(contents_id =self.tbl_pending_bin_contents.c.id, - filename = self.tbl_pending_bin_contents.c.filename, - package = self.tbl_pending_bin_contents.c.package, - version = self.tbl_pending_bin_contents.c.version, - arch = self.tbl_pending_bin_contents.c.arch, - otype = self.tbl_pending_bin_contents.c.type)) - - mapper(DebContents, self.tbl_deb_contents, - properties = dict(binary_id=self.tbl_deb_contents.c.binary_id, - package=self.tbl_deb_contents.c.package, - suite=self.tbl_deb_contents.c.suite, - arch=self.tbl_deb_contents.c.arch, - section=self.tbl_deb_contents.c.section, - filename=self.tbl_deb_contents.c.filename)) - - mapper(UdebContents, self.tbl_udeb_contents, - properties = dict(binary_id=self.tbl_udeb_contents.c.binary_id, - package=self.tbl_udeb_contents.c.package, - suite=self.tbl_udeb_contents.c.suite, - arch=self.tbl_udeb_contents.c.arch, - section=self.tbl_udeb_contents.c.section, - filename=self.tbl_udeb_contents.c.filename)) - mapper(BuildQueue, self.tbl_build_queue, properties = dict(queue_id = self.tbl_build_queue.c.id)) @@ -3073,7 +3188,11 @@ class DBConn(object): fingerprint = relation(Fingerprint), install_date = self.tbl_binaries.c.install_date, suites = relation(Suite, secondary=self.tbl_bin_associations, - backref=backref('binaries', lazy='dynamic'))), + backref=backref('binaries', lazy='dynamic')), + extra_sources = relation(DBSource, secondary=self.tbl_extra_src_references, + backref=backref('extra_binary_references', lazy='dynamic')), + key = relation(BinaryMetadata, cascade='all', + collection_class=attribute_mapped_collection('key'))), extension = validator) mapper(BinaryACL, self.tbl_binary_acl, @@ -3243,7 +3362,9 @@ class DBConn(object): primaryjoin=(self.tbl_source.c.id==self.tbl_dsc_files.c.source)), suites = relation(Suite, secondary=self.tbl_src_associations, backref=backref('sources', lazy='dynamic')), - srcuploaders = relation(SrcUploader)), + srcuploaders = relation(SrcUploader), + key = relation(SourceMetadata, cascade='all', + collection_class=attribute_mapped_collection('key'))), extension = validator) mapper(SourceACL, self.tbl_source_acl, @@ -3288,26 +3409,78 @@ class DBConn(object): mapper(BinContents, self.tbl_bin_contents, properties = dict( binary = relation(DBBinary, - backref=backref('contents', lazy='dynamic')), + backref=backref('contents', lazy='dynamic', cascade='all')), file = self.tbl_bin_contents.c.file)) + mapper(SrcContents, self.tbl_src_contents, + properties = dict( + source = relation(DBSource, + backref=backref('contents', lazy='dynamic', cascade='all')), + file = self.tbl_src_contents.c.file)) + + mapper(MetadataKey, self.tbl_metadata_keys, + properties = dict( + key_id = self.tbl_metadata_keys.c.key_id, + key = self.tbl_metadata_keys.c.key)) + + mapper(BinaryMetadata, self.tbl_binaries_metadata, + properties = dict( + binary_id = self.tbl_binaries_metadata.c.bin_id, + binary = relation(DBBinary), + key_id = self.tbl_binaries_metadata.c.key_id, + key = relation(MetadataKey), + value = self.tbl_binaries_metadata.c.value)) + + mapper(SourceMetadata, self.tbl_source_metadata, + properties = dict( + source_id = self.tbl_source_metadata.c.src_id, + source = relation(DBSource), + key_id = self.tbl_source_metadata.c.key_id, + key = relation(MetadataKey), + value = self.tbl_source_metadata.c.value)) + ## Connection functions def __createconn(self): from config import Config cnf = Config() - if cnf["DB::Host"]: + if cnf.has_key("DB::Service"): + connstr = "postgresql://service=%s" % cnf["DB::Service"] + elif cnf.has_key("DB::Host"): # TCP/IP - connstr = "postgres://%s" % cnf["DB::Host"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + connstr = "postgresql://%s" % cnf["DB::Host"] + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += ":%s" % cnf["DB::Port"] connstr += "/%s" % cnf["DB::Name"] else: # Unix Socket - connstr = "postgres:///%s" % cnf["DB::Name"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + connstr = "postgresql:///%s" % cnf["DB::Name"] + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += "?port=%s" % cnf["DB::Port"] - self.db_pg = create_engine(connstr, echo=self.debug) + engine_args = { 'echo': self.debug } + if cnf.has_key('DB::PoolSize'): + engine_args['pool_size'] = int(cnf['DB::PoolSize']) + if cnf.has_key('DB::MaxOverflow'): + engine_args['max_overflow'] = int(cnf['DB::MaxOverflow']) + if sa_major_version == '0.6' and cnf.has_key('DB::Unicode') and \ + cnf['DB::Unicode'] == 'false': + engine_args['use_native_unicode'] = False + + # Monkey patch a new dialect in in order to support service= syntax + import sqlalchemy.dialects.postgresql + from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2 + class PGDialect_psycopg2_dak(PGDialect_psycopg2): + def create_connect_args(self, url): + if str(url).startswith('postgresql://service='): + # Eww + servicename = str(url)[21:] + return (['service=%s' % servicename], {}) + else: + return PGDialect_psycopg2.create_connect_args(self, url) + + sqlalchemy.dialects.postgresql.base.dialect = PGDialect_psycopg2_dak + + self.db_pg = create_engine(connstr, **engine_args) self.db_meta = MetaData() self.db_meta.bind = self.db_pg self.db_smaker = sessionmaker(bind=self.db_pg, @@ -3316,8 +3489,13 @@ class DBConn(object): self.__setuptables() self.__setupmappers() + self.pid = os.getpid() def session(self): + # reinitialize DBConn in new processes + if self.pid != os.getpid(): + clear_mappers() + self.__createconn() return self.db_smaker() __all__.append('DBConn')