X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=daklib%2Fdbconn.py;h=979256e0c5c0969c24d94fd1c60df57f40baabfb;hb=e6a0eee330810ba5bdb752f67f7dc6cc2b623184;hp=e1e21d2b2b1f10a386dd42c50460287d7e6c8f4d;hpb=221c511735a22974ac5ee9e1d2d5d52bf29e263e;p=dak.git diff --git a/daklib/dbconn.py b/daklib/dbconn.py index e1e21d2b..979256e0 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -34,6 +34,7 @@ ################################################################################ import os +from os.path import normpath import re import psycopg2 import traceback @@ -49,13 +50,16 @@ except: from datetime import datetime, timedelta from errno import ENOENT from tempfile import mkstemp, mkdtemp +from subprocess import Popen, PIPE +from tarfile import TarFile from inspect import getargspec import sqlalchemy -from sqlalchemy import create_engine, Table, MetaData, Column, Integer +from sqlalchemy import create_engine, Table, MetaData, Column, Integer, desc, \ + Text, ForeignKey from sqlalchemy.orm import sessionmaker, mapper, relation, object_session, \ - backref, MapperExtension, EXT_CONTINUE + backref, MapperExtension, EXT_CONTINUE, object_mapper, clear_mappers from sqlalchemy import types as sqltypes # Don't remove this, we re-export the exceptions to scripts which import us @@ -73,9 +77,6 @@ import warnings warnings.filterwarnings('ignore', \ "The SQLAlchemy PostgreSQL dialect has been renamed from 'postgres' to 'postgresql'.*", \ SADeprecationWarning) -# TODO: sqlalchemy needs some extra configuration to correctly reflect -# the ind_deb_contents_* indexes - we ignore the warnings at the moment -warnings.filterwarnings("ignore", 'Predicate of partial index', SAWarning) ################################################################################ @@ -287,6 +288,50 @@ class ORMObject(object): ''' return session.query(cls).get(primary_key) + def session(self, replace = False): + ''' + Returns the current session that is associated with the object. May + return None is object is in detached state. + ''' + + return object_session(self) + + def clone(self, session = None): + ''' + Clones the current object in a new session and returns the new clone. A + fresh session is created if the optional session parameter is not + provided. The function will fail if a session is provided and has + unflushed changes. + + RATIONALE: SQLAlchemy's session is not thread safe. This method clones + an existing object to allow several threads to work with their own + instances of an ORMObject. + + WARNING: Only persistent (committed) objects can be cloned. Changes + made to the original object that are not committed yet will get lost. + The session of the new object will always be rolled back to avoid + ressource leaks. + ''' + + if self.session() is None: + raise RuntimeError( \ + 'Method clone() failed for detached object:\n%s' % self) + self.session().flush() + mapper = object_mapper(self) + primary_key = mapper.primary_key_from_instance(self) + object_class = self.__class__ + if session is None: + session = DBConn().session() + elif len(session.new) + len(session.dirty) + len(session.deleted) > 0: + raise RuntimeError( \ + 'Method clone() failed due to unflushed changes in session.') + new_object = session.query(object_class).get(primary_key) + session.rollback() + if new_object is None: + raise RuntimeError( \ + 'Method clone() failed for non-persistent object:\n%s' % self) + return new_object + __all__.append('ORMObject') ################################################################################ @@ -421,12 +466,13 @@ __all__.append('get_archive') ################################################################################ -class BinContents(object): - def __init__(self, *args, **kwargs): - pass +class BinContents(ORMObject): + def __init__(self, file = None, binary = None): + self.file = file + self.binary = binary - def __repr__(self): - return '' % (self.binary, self.filename) + def properties(self): + return ['file', 'binary'] __all__.append('BinContents') @@ -447,12 +493,38 @@ class DBBinary(ORMObject): def properties(self): return ['package', 'version', 'maintainer', 'source', 'architecture', \ 'poolfile', 'binarytype', 'fingerprint', 'install_date', \ - 'suites_count', 'binary_id'] + 'suites_count', 'binary_id', 'contents_count'] def not_null_constraints(self): return ['package', 'version', 'maintainer', 'source', 'poolfile', \ 'binarytype'] + def get_component_name(self): + return self.poolfile.location.component.component_name + + def scan_contents(self): + ''' + Yields the contents of the package. Only regular files are yielded and + the path names are normalized after converting them from either utf-8 + or iso8859-1 encoding. It yields the string ' ' if the + package does not contain any regular file. + ''' + fullpath = self.poolfile.fullpath + dpkg = Popen(['dpkg-deb', '--fsys-tarfile', fullpath], stdout = PIPE) + tar = TarFile.open(fileobj = dpkg.stdout, mode = 'r|') + for member in tar.getmembers(): + if not member.isdir(): + name = normpath(member.name) + # enforce proper utf-8 encoding + try: + name.decode('utf-8') + except UnicodeDecodeError: + name = name.decode('iso8859-1').encode('utf-8') + yield name + tar.close() + dpkg.stdout.close() + dpkg.wait() + __all__.append('DBBinary') @session_wrapper @@ -472,41 +544,37 @@ def get_suites_binary_in(package, session=None): __all__.append('get_suites_binary_in') @session_wrapper -def get_binary_from_name_suite(package, suitename, session=None): - ### For dak examine-package - ### XXX: Doesn't use object API yet - - sql = """SELECT DISTINCT(b.package), b.version, c.name, su.suite_name - FROM binaries b, files fi, location l, component c, bin_associations ba, suite su - WHERE b.package='%(package)s' - AND b.file = fi.id - AND fi.location = l.id - AND l.component = c.id - AND ba.bin=b.id - AND ba.suite = su.id - AND su.suite_name %(suitename)s - ORDER BY b.version DESC""" +def get_component_by_package_suite(package, suite_list, arch_list=[], session=None): + ''' + Returns the component name of the newest binary package in suite_list or + None if no package is found. The result can be optionally filtered by a list + of architecture names. - return session.execute(sql % {'package': package, 'suitename': suitename}) + @type package: str + @param package: DBBinary package name to search for -__all__.append('get_binary_from_name_suite') + @type suite_list: list of str + @param suite_list: list of suite_name items -@session_wrapper -def get_binary_components(package, suitename, arch, session=None): - # Check for packages that have moved from one component to another - query = """SELECT c.name FROM binaries b, bin_associations ba, suite s, location l, component c, architecture a, files f - WHERE b.package=:package AND s.suite_name=:suitename - AND (a.arch_string = :arch OR a.arch_string = 'all') - AND ba.bin = b.id AND ba.suite = s.id AND b.architecture = a.id - AND f.location = l.id - AND l.component = c.id - AND b.file = f.id""" + @type arch_list: list of str + @param arch_list: optional list of arch_string items that defaults to [] - vals = {'package': package, 'suitename': suitename, 'arch': arch} + @rtype: str or NoneType + @return: name of component or None + ''' - return session.execute(query, vals) + q = session.query(DBBinary).filter_by(package = package). \ + join(DBBinary.suites).filter(Suite.suite_name.in_(suite_list)) + if len(arch_list) > 0: + q = q.join(DBBinary.architecture). \ + filter(Architecture.arch_string.in_(arch_list)) + binary = q.order_by(desc(DBBinary.version)).first() + if binary is None: + return None + else: + return binary.get_component_name() -__all__.append('get_binary_components') +__all__.append('get_component_by_package_suite') ################################################################################ @@ -866,8 +934,8 @@ class Component(ORMObject): return NotImplemented def properties(self): - return ['component_name', 'component_id', 'description', 'location', \ - 'meets_dfsg'] + return ['component_name', 'component_id', 'description', \ + 'location_count', 'meets_dfsg', 'overrides_count'] def not_null_constraints(self): return ['component_name'] @@ -1162,8 +1230,8 @@ class PoolFile(ORMObject): def fullpath(self): return os.path.join(self.location.path, self.filename) - def is_valid(self, filesize = -1, md5sum = None):\ - return self.filesize == filesize and self.md5sum == md5sum + def is_valid(self, filesize = -1, md5sum = None): + return self.filesize == long(filesize) and self.md5sum == md5sum def properties(self): return ['filename', 'file_id', 'filesize', 'md5sum', 'sha1sum', \ @@ -1576,8 +1644,6 @@ __all__.append('get_dbchange') ################################################################################ -# TODO: Why do we have a separate Location class? Can't it be fully integrated -# into class Component? class Location(ORMObject): def __init__(self, path = None, component = None): self.path = path @@ -1586,7 +1652,8 @@ class Location(ORMObject): self.archive_type = 'pool' def properties(self): - return ['path', 'archive_type', 'component', 'files_count'] + return ['path', 'location_id', 'archive_type', 'component', \ + 'files_count'] def not_null_constraints(self): return ['path', 'archive_type'] @@ -1770,12 +1837,22 @@ __all__.append('get_new_comments') ################################################################################ -class Override(object): - def __init__(self, *args, **kwargs): - pass +class Override(ORMObject): + def __init__(self, package = None, suite = None, component = None, overridetype = None, \ + section = None, priority = None): + self.package = package + self.suite = suite + self.component = component + self.overridetype = overridetype + self.section = section + self.priority = priority - def __repr__(self): - return '' % (self.package, self.suite_id) + def properties(self): + return ['package', 'suite', 'component', 'overridetype', 'section', \ + 'priority'] + + def not_null_constraints(self): + return ['package', 'suite', 'component', 'overridetype', 'section'] __all__.append('Override') @@ -1829,12 +1906,15 @@ __all__.append('get_override') ################################################################################ -class OverrideType(object): - def __init__(self, *args, **kwargs): - pass +class OverrideType(ORMObject): + def __init__(self, overridetype = None): + self.overridetype = overridetype - def __repr__(self): - return '' % self.overridetype + def properties(self): + return ['overridetype', 'overridetype_id', 'overrides_count'] + + def not_null_constraints(self): + return ['overridetype'] __all__.append('OverrideType') @@ -1865,111 +1945,6 @@ __all__.append('get_override_type') ################################################################################ -class DebContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % (self.package.package,self.file) - -__all__.append('DebContents') - - -class UdebContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % (self.package.package,self.file) - -__all__.append('UdebContents') - -class PendingBinContents(object): - def __init__(self, *args, **kwargs): - pass - - def __repr__(self): - return '' % self.contents_id - -__all__.append('PendingBinContents') - -def insert_pending_content_paths(package, - is_udeb, - fullpaths, - session=None): - """ - Make sure given paths are temporarily associated with given - package - - @type package: dict - @param package: the package to associate with should have been read in from the binary control file - @type fullpaths: list - @param fullpaths: the list of paths of the file being associated with the binary - @type session: SQLAlchemy session - @param session: Optional SQLAlchemy session. If this is passed, the caller - is responsible for ensuring a transaction has begun and committing the - results or rolling back based on the result code. If not passed, a commit - will be performed at the end of the function - - @return: True upon success, False if there is a problem - """ - - privatetrans = False - - if session is None: - session = DBConn().session() - privatetrans = True - - try: - arch = get_architecture(package['Architecture'], session) - arch_id = arch.arch_id - - # Remove any already existing recorded files for this package - q = session.query(PendingBinContents) - q = q.filter_by(package=package['Package']) - q = q.filter_by(version=package['Version']) - q = q.filter_by(architecture=arch_id) - q.delete() - - for fullpath in fullpaths: - - if fullpath.startswith( "./" ): - fullpath = fullpath[2:] - - pca = PendingBinContents() - pca.package = package['Package'] - pca.version = package['Version'] - pca.file = fullpath - pca.architecture = arch_id - - if isudeb: - pca.type = 8 # gross - else: - pca.type = 7 # also gross - session.add(pca) - - # Only commit if we set up the session ourself - if privatetrans: - session.commit() - session.close() - else: - session.flush() - - return True - except Exception, e: - traceback.print_exc() - - # Only rollback if we set up the session ourself - if privatetrans: - session.rollback() - session.close() - - return False - -__all__.append('insert_pending_content_paths') - -################################################################################ - class PolicyQueue(object): def __init__(self, *args, **kwargs): pass @@ -2031,9 +2006,16 @@ __all__.append('get_policy_queue_from_path') ################################################################################ -class Priority(object): - def __init__(self, *args, **kwargs): - pass +class Priority(ORMObject): + def __init__(self, priority = None, level = None): + self.priority = priority + self.level = level + + def properties(self): + return ['priority', 'priority_id', 'level', 'overrides_count'] + + def not_null_constraints(self): + return ['priority', 'level'] def __eq__(self, val): if isinstance(val, str): @@ -2047,9 +2029,6 @@ class Priority(object): # This signals to use the normal comparison operator return NotImplemented - def __repr__(self): - return '' % (self.priority, self.priority_id) - __all__.append('Priority') @session_wrapper @@ -2101,9 +2080,15 @@ __all__.append('get_priorities') ################################################################################ -class Section(object): - def __init__(self, *args, **kwargs): - pass +class Section(ORMObject): + def __init__(self, section = None): + self.section = section + + def properties(self): + return ['section', 'section_id', 'overrides_count'] + + def not_null_constraints(self): + return ['section'] def __eq__(self, val): if isinstance(val, str): @@ -2117,9 +2102,6 @@ class Section(object): # This signals to use the normal comparison operator return NotImplemented - def __repr__(self): - return '
' % self.section - __all__.append('Section') @session_wrapper @@ -2237,9 +2219,9 @@ def source_exists(source, source_version, suites = ["any"], session=None): maps = [ (x[1], x[2]) for x in maps if x[0] == "map" or x[0] == "silent-map" ] s = [suite] - for x in maps: - if x[1] in s and x[0] not in s: - s.append(x[0]) + for (from_, to) in maps: + if from_ in s and to not in s: + s.append(to) q = q.filter(DBSource.suites.any(Suite.suite_name.in_(s))) @@ -2562,7 +2544,8 @@ class Suite(ORMObject): self.version = version def properties(self): - return ['suite_name', 'version', 'sources_count', 'binaries_count'] + return ['suite_name', 'version', 'sources_count', 'binaries_count', \ + 'overrides_count'] def not_null_constraints(self): return ['suite_name', 'version'] @@ -2832,7 +2815,9 @@ class DBConn(object): 'binary_acl', 'binary_acl_map', 'build_queue', + 'build_queue_files', 'changelogs_text', + 'changes', 'component', 'config', 'changes_pending_binaries', @@ -2847,7 +2832,6 @@ class DBConn(object): 'maintainer', 'new_comments', 'override_type', - 'pending_bin_contents', 'policy_queue', 'priority', 'section', @@ -2859,27 +2843,17 @@ class DBConn(object): 'suite', 'uid', 'upload_blocks', - # The following tables have primary keys but sqlalchemy - # version 0.5 fails to reflect them correctly with database - # versions before upgrade #41. - #'changes', - #'build_queue_files', ) tables_no_primary = ( - 'bin_contents', 'changes_pending_files_map', 'changes_pending_source_files', 'changes_pool_files', - 'deb_contents', + # TODO: the maintainer column in table override should be removed. 'override', 'suite_architectures', 'suite_src_formats', 'suite_build_queue_copy', - 'udeb_contents', - # see the comment above - 'changes', - 'build_queue_files', ) views = ( @@ -2919,6 +2893,15 @@ class DBConn(object): table = Table(table_name, self.db_meta, autoload=True) setattr(self, 'tbl_%s' % table_name, table) + # bin_contents needs special attention until the SERIAL type is + # correctly detected and the workaround has been removed; see comment + # above + self.tbl_bin_contents = Table('bin_contents', self.db_meta, \ + Column('file', Text, primary_key = True), + Column('binary_id', Integer, ForeignKey('binaries.id'), \ + primary_key = True), + autoload=True, useexisting=True) + for view_name in views: view = Table(view_name, self.db_meta, autoload=True) setattr(self, 'view_%s' % view_name, view) @@ -2935,30 +2918,6 @@ class DBConn(object): properties = dict(archive_id = self.tbl_archive.c.id, archive_name = self.tbl_archive.c.name)) - mapper(PendingBinContents, self.tbl_pending_bin_contents, - properties = dict(contents_id =self.tbl_pending_bin_contents.c.id, - filename = self.tbl_pending_bin_contents.c.filename, - package = self.tbl_pending_bin_contents.c.package, - version = self.tbl_pending_bin_contents.c.version, - arch = self.tbl_pending_bin_contents.c.arch, - otype = self.tbl_pending_bin_contents.c.type)) - - mapper(DebContents, self.tbl_deb_contents, - properties = dict(binary_id=self.tbl_deb_contents.c.binary_id, - package=self.tbl_deb_contents.c.package, - suite=self.tbl_deb_contents.c.suite, - arch=self.tbl_deb_contents.c.arch, - section=self.tbl_deb_contents.c.section, - filename=self.tbl_deb_contents.c.filename)) - - mapper(UdebContents, self.tbl_udeb_contents, - properties = dict(binary_id=self.tbl_udeb_contents.c.binary_id, - package=self.tbl_udeb_contents.c.package, - suite=self.tbl_udeb_contents.c.suite, - arch=self.tbl_udeb_contents.c.arch, - section=self.tbl_udeb_contents.c.section, - filename=self.tbl_udeb_contents.c.filename)) - mapper(BuildQueue, self.tbl_build_queue, properties = dict(queue_id = self.tbl_build_queue.c.id)) @@ -3089,8 +3048,7 @@ class DBConn(object): mapper(Location, self.tbl_location, properties = dict(location_id = self.tbl_location.c.id, component_id = self.tbl_location.c.component, - component = relation(Component, \ - backref=backref('location', uselist = False)), + component = relation(Component, backref='location'), archive_id = self.tbl_location.c.archive, archive = relation(Archive), # FIXME: the 'type' column is old cruft and @@ -3111,16 +3069,21 @@ class DBConn(object): mapper(Override, self.tbl_override, properties = dict(suite_id = self.tbl_override.c.suite, - suite = relation(Suite), + suite = relation(Suite, \ + backref=backref('overrides', lazy='dynamic')), package = self.tbl_override.c.package, component_id = self.tbl_override.c.component, - component = relation(Component), + component = relation(Component, \ + backref=backref('overrides', lazy='dynamic')), priority_id = self.tbl_override.c.priority, - priority = relation(Priority), + priority = relation(Priority, \ + backref=backref('overrides', lazy='dynamic')), section_id = self.tbl_override.c.section, - section = relation(Section), + section = relation(Section, \ + backref=backref('overrides', lazy='dynamic')), overridetype_id = self.tbl_override.c.type, - overridetype = relation(OverrideType))) + overridetype = relation(OverrideType, \ + backref=backref('overrides', lazy='dynamic')))) mapper(OverrideType, self.tbl_override_type, properties = dict(overridetype = self.tbl_override_type.c.type, @@ -3191,23 +3154,54 @@ class DBConn(object): fingerprint = relation(Fingerprint, backref="uploadblocks"), uid = relation(Uid, backref="uploadblocks"))) + mapper(BinContents, self.tbl_bin_contents, + properties = dict( + binary = relation(DBBinary, + backref=backref('contents', lazy='dynamic', cascade='all')), + file = self.tbl_bin_contents.c.file)) + ## Connection functions def __createconn(self): from config import Config cnf = Config() - if cnf["DB::Host"]: + if cnf.has_key("DB::Service"): + connstr = "postgresql://service=%s" % cnf["DB::Service"] + elif cnf.has_key("DB::Host"): # TCP/IP - connstr = "postgres://%s" % cnf["DB::Host"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + connstr = "postgresql://%s" % cnf["DB::Host"] + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += ":%s" % cnf["DB::Port"] connstr += "/%s" % cnf["DB::Name"] else: # Unix Socket - connstr = "postgres:///%s" % cnf["DB::Name"] - if cnf["DB::Port"] and cnf["DB::Port"] != "-1": + connstr = "postgresql:///%s" % cnf["DB::Name"] + if cnf.has_key("DB::Port") and cnf["DB::Port"] != "-1": connstr += "?port=%s" % cnf["DB::Port"] - self.db_pg = create_engine(connstr, echo=self.debug) + engine_args = { 'echo': self.debug } + if cnf.has_key('DB::PoolSize'): + engine_args['pool_size'] = int(cnf['DB::PoolSize']) + if cnf.has_key('DB::MaxOverflow'): + engine_args['max_overflow'] = int(cnf['DB::MaxOverflow']) + if sa_major_version == '0.6' and cnf.has_key('DB::Unicode') and \ + cnf['DB::Unicode'] == 'false': + engine_args['use_native_unicode'] = False + + # Monkey patch a new dialect in in order to support service= syntax + import sqlalchemy.dialects.postgresql + from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2 + class PGDialect_psycopg2_dak(PGDialect_psycopg2): + def create_connect_args(self, url): + if str(url).startswith('postgresql://service='): + # Eww + servicename = str(url)[21:] + return (['service=%s' % servicename], {}) + else: + return PGDialect_psycopg2.create_connect_args(self, url) + + sqlalchemy.dialects.postgresql.base.dialect = PGDialect_psycopg2_dak + + self.db_pg = create_engine(connstr, **engine_args) self.db_meta = MetaData() self.db_meta.bind = self.db_pg self.db_smaker = sessionmaker(bind=self.db_pg, @@ -3220,6 +3214,15 @@ class DBConn(object): def session(self): return self.db_smaker() + def reset(self): + ''' + Resets the DBConn object. This function must be called by subprocesses + created by the multiprocessing module. See tests/dbtest_multiproc.py + for an example. + ''' + clear_mappers() + self.__createconn() + __all__.append('DBConn')