]> git.decadent.org.uk Git - dak.git/blobdiff - daklib/dbconn.py
metadata generation work
[dak.git] / daklib / dbconn.py
index 79e0621bbc302680624fa5261eadc2eab5d32e1e..98b6c7d50eb6a23affaa1f3453e485055ccb7e53 100755 (executable)
@@ -61,6 +61,8 @@ from sqlalchemy import create_engine, Table, MetaData, Column, Integer, desc, \
 from sqlalchemy.orm import sessionmaker, mapper, relation, object_session, \
     backref, MapperExtension, EXT_CONTINUE, object_mapper, clear_mappers
 from sqlalchemy import types as sqltypes
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from sqlalchemy.ext.associationproxy import association_proxy
 
 # Don't remove this, we re-export the exceptions to scripts which import us
 from sqlalchemy.exc import *
@@ -490,6 +492,10 @@ class DBBinary(ORMObject):
         self.poolfile = poolfile
         self.binarytype = binarytype
 
+    @property
+    def pkid(self):
+        return self.binary_id
+
     def properties(self):
         return ['package', 'version', 'maintainer', 'source', 'architecture', \
             'poolfile', 'binarytype', 'fingerprint', 'install_date', \
@@ -499,6 +505,8 @@ class DBBinary(ORMObject):
         return ['package', 'version', 'maintainer', 'source',  'poolfile', \
             'binarytype']
 
+    metadata = association_proxy('key', 'value')
+
     def get_component_name(self):
         return self.poolfile.location.component.component_name
 
@@ -529,20 +537,28 @@ class DBBinary(ORMObject):
         '''
         Reads the control information from a binary.
 
-        @rtype: tuple
-        @return: (stanza, controldict)  stanza is the text of the control
-                 section.  controldict is the information in a dictionary
-                 form
+        @rtype: text
+        @return: stanza text of the control section.
         '''
-        import apt_inst, apt_pk
+        import apt_inst
         fullpath = self.poolfile.fullpath
         deb_file = open(fullpath, 'r')
-        stanza = apt_inst.debExtractControl(deb_file).rstrip()
-        control = dict(apt_pkg.TagSection(stanza))
+        stanza = apt_inst.debExtractControl(deb_file)
         deb_file.close()
 
-        return stanza, control
+        return stanza
 
+    def read_control_fields(self):
+        '''
+        Reads the control information from a binary and return
+        as a dictionary.
+
+        @rtype: dict
+        @return: fields of the control section as a dictionary.
+        '''
+        import apt_pkg
+        stanza = self.read_control()
+        return apt_pkg.TagSection(stanza)
 
 __all__.append('DBBinary')
 
@@ -2172,6 +2188,60 @@ __all__.append('get_sections')
 
 ################################################################################
 
+from debian.debfile import Deb822
+
+# Temporary Deb822 subclass to fix bugs with : handling; see #597249
+class Dak822(Deb822):
+    def _internal_parser(self, sequence, fields=None):
+        # The key is non-whitespace, non-colon characters before any colon.
+        key_part = r"^(?P<key>[^: \t\n\r\f\v]+)\s*:\s*"
+        single = re.compile(key_part + r"(?P<data>\S.*?)\s*$")
+        multi = re.compile(key_part + r"$")
+        multidata = re.compile(r"^\s(?P<data>.+?)\s*$")
+
+        wanted_field = lambda f: fields is None or f in fields
+
+        if isinstance(sequence, basestring):
+            sequence = sequence.splitlines()
+
+        curkey = None
+        content = ""
+        for line in self.gpg_stripped_paragraph(sequence):
+            m = single.match(line)
+            if m:
+                if curkey:
+                    self[curkey] = content
+
+                if not wanted_field(m.group('key')):
+                    curkey = None
+                    continue
+
+                curkey = m.group('key')
+                content = m.group('data')
+                continue
+
+            m = multi.match(line)
+            if m:
+                if curkey:
+                    self[curkey] = content
+
+                if not wanted_field(m.group('key')):
+                    curkey = None
+                    continue
+
+                curkey = m.group('key')
+                content = ""
+                continue
+
+            m = multidata.match(line)
+            if m:
+                content += '\n' + line # XXX not m.group('data')?
+                continue
+
+        if curkey:
+            self[curkey] = content
+
+
 class DBSource(ORMObject):
     def __init__(self, source = None, version = None, maintainer = None, \
         changedby = None, poolfile = None, install_date = None):
@@ -2182,6 +2252,10 @@ class DBSource(ORMObject):
         self.poolfile = poolfile
         self.install_date = install_date
 
+    @property
+    def pkid(self):
+        return self.source_id
+
     def properties(self):
         return ['source', 'source_id', 'maintainer', 'changedby', \
             'fingerprint', 'poolfile', 'version', 'suites_count', \
@@ -2191,20 +2265,19 @@ class DBSource(ORMObject):
         return ['source', 'version', 'install_date', 'maintainer', \
             'changedby', 'poolfile', 'install_date']
 
-    def read_control(self):
+    def read_control_fields(self):
         '''
         Reads the control information from a dsc
 
         @rtype: tuple
-        @return: (stanza, controldict)  stanza is the text of the control
-                 section.  controldict is the information in a dictionary
-                 form
+        @return: fields is the dsc information in a dictionary form
         '''
-        from debian.debfile import Deb822
         fullpath = self.poolfile.fullpath
-        fields = Deb822(open(self.poolfile.fullpath, 'r'))
+        fields = Dak822(open(self.poolfile.fullpath, 'r'))
         return fields
 
+    metadata = association_proxy('key', 'value')
+
 __all__.append('DBSource')
 
 @session_wrapper
@@ -2348,6 +2421,34 @@ def get_source_in_suite(source, suite, session=None):
 
 __all__.append('get_source_in_suite')
 
+@session_wrapper
+def import_metadata_into_db(obj, session=None):
+    """
+    This routine works on either DBBinary or DBSource objects and imports
+    their metadata into the database
+    """
+    fields = obj.read_control_fields()
+    for k in fields.keys():
+        try:
+            # Try raw ASCII
+            val = str(fields[k])
+        except UnicodeEncodeError:
+            # Fall back to UTF-8
+            try:
+                val = fields[k].encode('utf-8')
+            except UnicodeEncodeError:
+                # Finally try iso8859-1
+                val = fields[k].encode('iso8859-1')
+                # Otherwise we allow the exception to percolate up and we cause
+                # a reject as someone is playing silly buggers
+
+        obj.metadata[get_or_set_metadatakey(k, session)] = val
+
+    session.commit_or_flush()
+
+__all__.append('import_metadata_into_db')
+
+
 ################################################################################
 
 @session_wrapper
@@ -2524,7 +2625,7 @@ def add_deb_to_db(u, filename, session=None):
     #    session.rollback()
     #    raise MissingContents, "No contents stored for package %s, and couldn't determine contents of %s" % (bin.package, filename)
 
-    return poolfile
+    return bin, poolfile
 
 __all__.append('add_deb_to_db')
 
@@ -2847,13 +2948,45 @@ class MetadataKey(ORMObject):
 
 __all__.append('MetadataKey')
 
+@session_wrapper
+def get_or_set_metadatakey(keyname, session=None):
+    """
+    Returns MetadataKey object for given uidname.
+
+    If no matching keyname is found, a row is inserted.
+
+    @type uidname: string
+    @param uidname: The keyname to add
+
+    @type session: SQLAlchemy
+    @param session: Optional SQL session object (a temporary one will be
+    generated if not supplied).  If not passed, a commit will be performed at
+    the end of the function, otherwise the caller is responsible for commiting.
+
+    @rtype: MetadataKey
+    @return: the metadatakey object for the given keyname
+    """
+
+    q = session.query(MetadataKey).filter_by(key=keyname)
+
+    try:
+        ret = q.one()
+    except NoResultFound:
+        ret = MetadataKey(keyname)
+        session.add(ret)
+        session.commit_or_flush()
+
+    return ret
+
+__all__.append('get_or_set_metadatakey')
+
 ################################################################################
 
 class BinaryMetadata(ORMObject):
-    def __init__(self, binary = None, key = None, value = None):
-        self.binary = binary
+    def __init__(self, key = None, value = None, binary = None):
         self.key = key
         self.value = value
+        self.binary = binary
 
     def properties(self):
         return ['binary', 'key', 'value']
@@ -2866,10 +2999,10 @@ __all__.append('BinaryMetadata')
 ################################################################################
 
 class SourceMetadata(ORMObject):
-    def __init__(self, source = None, key = None, value = None):
-        self.source = source
+    def __init__(self, key = None, value = None, source = None):
         self.key = key
         self.value = value
+        self.source = source
 
     def properties(self):
         return ['source', 'key', 'value']
@@ -3018,7 +3151,9 @@ class DBConn(object):
                                  suites = relation(Suite, secondary=self.tbl_bin_associations,
                                      backref=backref('binaries', lazy='dynamic')),
                                  extra_sources = relation(DBSource, secondary=self.tbl_extra_src_references,
-                                     backref=backref('extra_binary_references', lazy='dynamic'))),
+                                     backref=backref('extra_binary_references', lazy='dynamic')),
+                                 key = relation(BinaryMetadata, cascade='all',
+                                     collection_class=attribute_mapped_collection('key'))),
                 extension = validator)
 
         mapper(BinaryACL, self.tbl_binary_acl,
@@ -3188,7 +3323,9 @@ class DBConn(object):
                                                      primaryjoin=(self.tbl_source.c.id==self.tbl_dsc_files.c.source)),
                                  suites = relation(Suite, secondary=self.tbl_src_associations,
                                      backref=backref('sources', lazy='dynamic')),
-                                 srcuploaders = relation(SrcUploader)),
+                                 srcuploaders = relation(SrcUploader),
+                                 key = relation(SourceMetadata, cascade='all',
+                                     collection_class=attribute_mapped_collection('key'))),
                extension = validator)
 
         mapper(SourceACL, self.tbl_source_acl,