]> git.decadent.org.uk Git - dak.git/commitdiff
Merge branch 'dbtests' into merge
authorTorsten Werner <twerner@debian.org>
Tue, 1 Mar 2011 21:16:17 +0000 (22:16 +0100)
committerTorsten Werner <twerner@debian.org>
Tue, 1 Mar 2011 21:16:17 +0000 (22:16 +0100)
Add classes ContentsWriter and ContentsScanner as a preparation for the new
contents command.

Signed-off-by: Torsten Werner <twerner@debian.org>
daklib/contents.py [new file with mode: 0755]
daklib/dbconn.py
tests/db_test.py
tests/dbtest_contents.py
tests/dbtest_packages.py
tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1_i386.deb [new file with mode: 0644]

diff --git a/daklib/contents.py b/daklib/contents.py
new file mode 100755 (executable)
index 0000000..740c0b2
--- /dev/null
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+"""
+Helper code for contents generation.
+
+@contact: Debian FTPMaster <ftpmaster@debian.org>
+@copyright: 2011 Torsten Werner <twerner@debian.org>
+@license: GNU General Public License version 2 or later
+"""
+
+################################################################################
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+################################################################################
+
+from daklib.dbconn import *
+from daklib.config import Config
+from daklib.threadpool import ThreadPool
+
+from sqlalchemy import desc, or_
+from subprocess import Popen, PIPE
+
+class ContentsWriter(object):
+    '''
+    ContentsWriter writes the Contents-$arch.gz files.
+    '''
+    def __init__(self, suite, architecture, overridetype, component = None):
+        '''
+        The constructor clones its arguments into a new session object to make
+        sure that the new ContentsWriter object can be executed in a different
+        thread.
+        '''
+        self.suite = suite.clone()
+        self.session = self.suite.session()
+        self.architecture = architecture.clone(self.session)
+        self.overridetype = overridetype.clone(self.session)
+        if component is not None:
+            self.component = component.clone(self.session)
+        else:
+            self.component = None
+
+    def query(self):
+        '''
+        Returns a query object that is doing most of the work.
+        '''
+        params = {
+            'suite':    self.suite.suite_id,
+            'arch_all': get_architecture('all', self.session).arch_id,
+            'arch':     self.architecture.arch_id,
+            'type_id':  self.overridetype.overridetype_id,
+            'type':     self.overridetype.overridetype,
+        }
+
+        if self.component is not None:
+            params['component'] = component.component_id
+            sql = '''
+create temp table newest_binaries (
+    id integer primary key,
+    package text);
+
+create index newest_binaries_by_package on newest_binaries (package);
+
+insert into newest_binaries (id, package)
+    select distinct on (package) id, package from binaries
+        where type = :type and
+            (architecture = :arch_all or architecture = :arch) and
+            id in (select bin from bin_associations where suite = :suite)
+        order by package, version desc;
+
+with
+
+unique_override as
+    (select o.package, s.section
+        from override o, section s
+        where o.suite = :suite and o.type = :type_id and o.section = s.id and
+        o.component = :component)
+
+select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+    from newest_binaries b, bin_contents bc, unique_override o
+    where b.id = bc.binary_id and o.package = b.package
+    order by bc.file, b.package'''
+
+        else:
+            sql = '''
+create temp table newest_binaries (
+    id integer primary key,
+    package text);
+
+create index newest_binaries_by_package on newest_binaries (package);
+
+insert into newest_binaries (id, package)
+    select distinct on (package) id, package from binaries
+        where type = :type and
+            (architecture = :arch_all or architecture = :arch) and
+            id in (select bin from bin_associations where suite = :suite)
+        order by package, version desc;
+
+with
+
+unique_override as
+    (select distinct on (o.package, s.section) o.package, s.section
+        from override o, section s
+        where o.suite = :suite and o.type = :type_id and o.section = s.id
+        order by o.package, s.section, o.modified desc)
+
+select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+    from newest_binaries b, bin_contents bc, unique_override o
+    where b.id = bc.binary_id and o.package = b.package
+    order by bc.file, b.package'''
+
+        return self.session.query("file", "package").from_statement(sql). \
+            params(params)
+
+    def formatline(self, filename, package_list):
+        '''
+        Returns a formatted string for the filename argument.
+        '''
+        package_list = ','.join(package_list)
+        return "%-60s%s\n" % (filename, package_list)
+
+    def fetch(self):
+        '''
+        Yields a new line of the Contents-$arch.gz file in filename order.
+        '''
+        last_filename = None
+        package_list = []
+        for filename, package in self.query().yield_per(100):
+            if filename != last_filename:
+                if last_filename is not None:
+                    yield self.formatline(last_filename, package_list)
+                last_filename = filename
+                package_list = []
+            package_list.append(package)
+        yield self.formatline(last_filename, package_list)
+        # end transaction to return connection to pool
+        self.session.rollback()
+
+    def get_list(self):
+        '''
+        Returns a list of lines for the Contents-$arch.gz file.
+        '''
+        return [item for item in self.fetch()]
+
+    def output_filename(self):
+        '''
+        Returns the name of the output file.
+        '''
+        values = {
+            'root': Config()['Dir::Root'],
+            'suite': self.suite.suite_name,
+            'architecture': self.architecture.arch_string
+        }
+        if self.component is None:
+            return "%(root)s%(suite)s/Contents-%(architecture)s.gz" % values
+        values['component'] = self.component.component_name
+        return "%(root)s%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
+
+    def write_file(self):
+        '''
+        Write the output file.
+        '''
+        command = ['gzip', '--rsyncable']
+        output_file = open(self.output_filename(), 'w')
+        pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
+        for item in self.fetch():
+            pipe.write(item)
+        pipe.close()
+        output_file.close()
+
+
+class ContentsScanner(object):
+    '''
+    ContentsScanner provides a threadsafe method scan() to scan the contents of
+    a DBBinary object.
+    '''
+    def __init__(self, binary):
+        '''
+        The argument binary is the actual DBBinary object that should be
+        scanned.
+        '''
+        self.binary_id = binary.binary_id
+
+    def scan(self, dummy_arg = None):
+        '''
+        This method does the actual scan and fills in the associated BinContents
+        property. It commits any changes to the database. The argument dummy_arg
+        is ignored but needed by our threadpool implementation.
+        '''
+        session = DBConn().session()
+        binary = session.query(DBBinary).get(self.binary_id)
+        for filename in binary.scan_contents():
+            binary.contents.append(BinContents(file = filename))
+        session.commit()
+        session.close()
+
+    @classmethod
+    def scan_all(class_, limit = None):
+        '''
+        The class method scan_all() scans all binaries using multiple threads.
+        The number of binaries to be scanned can be limited with the limit
+        argument.
+        '''
+        session = DBConn().session()
+        query = session.query(DBBinary).filter(DBBinary.contents == None)
+        if limit is not None:
+            query = query.limit(limit)
+        threadpool = ThreadPool()
+        for binary in query.yield_per(100):
+            threadpool.queueTask(ContentsScanner(binary).scan)
+        threadpool.joinAll()
+        session.close()
index 3b60ae65808d9aba8587647d0d85b32702ce2576..24f8fb17d9620f99570db40e203ac21381ec142c 100755 (executable)
@@ -34,6 +34,7 @@
 ################################################################################
 
 import os
+from os.path import normpath
 import re
 import psycopg2
 import traceback
@@ -49,6 +50,8 @@ except:
 from datetime import datetime, timedelta
 from errno import ENOENT
 from tempfile import mkstemp, mkdtemp
+from subprocess import Popen, PIPE
+from tarfile import TarFile
 
 from inspect import getargspec
 
@@ -502,6 +505,26 @@ class DBBinary(ORMObject):
     def get_component_name(self):
         return self.poolfile.location.component.component_name
 
+    def scan_contents(self):
+        '''
+        Yields the contents of the package. Only regular files are yielded and
+        the path names are normalized after converting them from either utf-8 or
+        iso8859-1 encoding.
+        '''
+        fullpath = self.poolfile.fullpath
+        debdata = Popen(['dpkg-deb', '--fsys-tarfile', fullpath],
+            stdout = PIPE).stdout
+        tar = TarFile.open(fileobj = debdata, mode = 'r|')
+        for member in tar.getmembers():
+            if member.isfile():
+                try:
+                    name = member.name.decode('utf-8')
+                except UnicodeDecodeError:
+                    name = member.name.decode('iso8859-1')
+                yield normpath(name)
+        tar.close()
+        debdata.close()
+
 __all__.append('DBBinary')
 
 @session_wrapper
index 2ce786ab685b2cc2418a21cfe9d12dd7fd5c0678..9ae23017be550ce74ee0bdfc0f36c2d034375ff0 100644 (file)
@@ -101,8 +101,8 @@ class DBDakTestCase(DakTestCase):
         if 'comp' in self.__dict__:
             return
         self.comp = {}
-        self.comp['main'] = Component(component_name = 'main')
-        self.comp['contrib'] = Component(component_name = 'contrib')
+        for name in ('main', 'contrib', 'non-free'):
+            self.comp[name] = Component(component_name = name)
         self.session.add_all(self.comp.values())
 
     def setup_locations(self):
@@ -113,11 +113,9 @@ class DBDakTestCase(DakTestCase):
         self.setup_components()
         self.loc = {}
         self.loc['main'] = Location( \
-            path = '/srv/ftp-master.debian.org/ftp/pool/', \
-            component = self.comp['main'])
+            path = fixture('ftp/pool/'), component = self.comp['main'])
         self.loc['contrib'] = Location( \
-            path = '/srv/ftp-master.debian.org/ftp/pool/', \
-            component = self.comp['contrib'])
+            path = fixture('ftp/pool/'), component = self.comp['contrib'])
         self.session.add_all(self.loc.values())
 
     def setup_poolfiles(self):
@@ -322,7 +320,8 @@ class DBDakTestCase(DakTestCase):
     def tearDown(self):
         self.session.rollback()
         for class_ in self.classes_to_clean():
-            self.session.query(class_).delete()
+            for object_ in self.session.query(class_):
+                self.session.delete(object_)
         self.session.commit()
         # usually there is no need to drop all tables here
         #self.metadata.drop_all()
index f7be2f268fb66fc26dc4764b9eed6a13c590d65c..7402601925178ddbfd1d3fa90796733461a7cabb 100755 (executable)
@@ -2,9 +2,8 @@
 
 from db_test import DBDakTestCase
 
-from daklib.dbconn import DBConn, BinContents, OverrideType, get_override_type, \
-    Section, get_section, get_sections, Priority, get_priority, get_priorities, \
-    Override, get_override
+from daklib.dbconn import *
+from daklib.contents import ContentsWriter, ContentsScanner
 
 from sqlalchemy.exc import FlushError, IntegrityError
 import unittest
@@ -129,5 +128,48 @@ class ContentsTestCase(DBDakTestCase):
         self.assertEqual(self.override['hello_sid_main_udeb'], \
             self.otype['udeb'].overrides.one())
 
+    def test_contentswriter(self):
+        '''
+        Test the ContentsWriter class.
+        '''
+        self.setup_suites()
+        self.setup_architectures()
+        self.setup_overridetypes()
+        self.setup_binaries()
+        self.setup_overrides()
+        self.binary['hello_2.2-1_i386'].contents.append(BinContents(file = '/usr/bin/hello'))
+        self.session.commit()
+        cw = ContentsWriter(self.suite['squeeze'], self.arch['i386'], self.otype['deb'])
+        self.assertEqual(['/usr/bin/hello                                              python/hello\n'], \
+            cw.get_list())
+        # test formatline and sort order
+        self.assertEqual('/usr/bin/hello                                              python/hello\n', \
+            cw.formatline('/usr/bin/hello', ['python/hello']))
+        self.assertEqual('/usr/bin/hello                                              editors/emacs,python/hello,utils/sl\n', \
+            cw.formatline('/usr/bin/hello', ['editors/emacs', 'python/hello', 'utils/sl']))
+        # test output_filename
+        self.assertEqual('tests/fixtures/ftp/squeeze/Contents-i386.gz', \
+            cw.output_filename())
+        cw = ContentsWriter(self.suite['squeeze'], self.arch['i386'], \
+            self.otype['udeb'], self.comp['main'])
+        self.assertEqual('tests/fixtures/ftp/squeeze/main/Contents-i386.gz', \
+            cw.output_filename())
+
+    def test_scan_contents(self):
+        self.setup_binaries()
+        filelist = [f for f in self.binary['hello_2.2-1_i386'].scan_contents()]
+        self.assertEqual(['usr/bin/hello', 'usr/share/doc/hello/copyright'],
+            filelist)
+        self.session.commit()
+        ContentsScanner(self.binary['hello_2.2-1_i386']).scan()
+        bin_contents_list = self.binary['hello_2.2-1_i386'].contents.all()
+        self.assertEqual(2, len(bin_contents_list))
+        self.assertEqual('usr/bin/hello', bin_contents_list[0].file)
+        self.assertEqual('usr/share/doc/hello/copyright', bin_contents_list[1].file)
+
+    def classes_to_clean(self):
+        return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \
+            OverrideType, Maintainer, Component, Priority, PoolFile]
+
 if __name__ == '__main__':
     unittest.main()
index 32c1b8c5c72a9fb3428e32c289261e860e327115..2b17905305c8df7314df27207ddd9e5f2175216c 100755 (executable)
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 from db_test import DBDakTestCase
+from base_test import fixture
 
 from daklib.dbconn import *
 from daklib.queue_install import package_to_suite
@@ -84,7 +85,7 @@ class PackageTestCase(DBDakTestCase):
 
         main = self.loc['main']
         contrib = self.loc['contrib']
-        self.assertEqual('/srv/ftp-master.debian.org/ftp/pool/', main.path)
+        self.assertEqual(fixture('ftp/pool/'), main.path)
         count = len(self.file.keys()) - 2
         self.assertEqual(count, main.files.count())
         self.assertEqual(2, contrib.files.count())
@@ -103,7 +104,7 @@ class PackageTestCase(DBDakTestCase):
         self.assertEqual(count - 1, main.files.count())
         self.assertEqual(3, contrib.files.count())
         # test fullpath
-        self.assertEqual('/srv/ftp-master.debian.org/ftp/pool/main/s/sl/sl_3.03-16.dsc', \
+        self.assertEqual(fixture('ftp/pool/main/s/sl/sl_3.03-16.dsc'), \
             self.file['sl_3.03-16.dsc'].fullpath)
         # test check_poolfile()
         self.assertEqual((True, self.file['sl_3.03-16.dsc']), \
diff --git a/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1_i386.deb b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1_i386.deb
new file mode 100644 (file)
index 0000000..f40802b
Binary files /dev/null and b/tests/fixtures/ftp/pool/main/h/hello/hello_2.2-1_i386.deb differ