]> git.decadent.org.uk Git - dak.git/blobdiff - daklib/contents.py
Contents: use the new string_agg() function of Pg9.
[dak.git] / daklib / contents.py
index 6b3b81534d3862a296e58a00201abc431188654b..5bf94d9a252da7c7bdd5a6c10d70b408294b5dab 100755 (executable)
@@ -28,9 +28,11 @@ Helper code for contents generation.
 from daklib.dbconn import *
 from daklib.config import Config
 from daklib.threadpool import ThreadPool
+from multiprocessing import Pool
 
 from sqlalchemy import desc, or_
-from subprocess import Popen, PIPE
+from sqlalchemy.exc import IntegrityError
+from subprocess import Popen, PIPE, call
 
 import os.path
 
@@ -44,25 +46,26 @@ class ContentsWriter(object):
         sure that the new ContentsWriter object can be executed in a different
         thread.
         '''
-        self.suite = suite.clone()
-        self.session = self.suite.session()
-        self.architecture = architecture.clone(self.session)
-        self.overridetype = overridetype.clone(self.session)
-        if component is not None:
-            self.component = component.clone(self.session)
-        else:
-            self.component = None
+        self.suite = suite
+        self.architecture = architecture
+        self.overridetype = overridetype
+        self.component = component
+        self.session = suite.session()
 
     def query(self):
         '''
         Returns a query object that is doing most of the work.
         '''
+        overridesuite = self.suite
+        if self.suite.overridesuite is not None:
+            overridesuite = get_suite(self.suite.overridesuite, self.session)
         params = {
-            'suite':    self.suite.suite_id,
-            'arch_all': get_architecture('all', self.session).arch_id,
-            'arch':     self.architecture.arch_id,
-            'type_id':  self.overridetype.overridetype_id,
-            'type':     self.overridetype.overridetype,
+            'suite':         self.suite.suite_id,
+            'overridesuite': overridesuite.suite_id,
+            'arch_all':      get_architecture('all', self.session).arch_id,
+            'arch':          self.architecture.arch_id,
+            'type_id':       self.overridetype.overridetype_id,
+            'type':          self.overridetype.overridetype,
         }
 
         if self.component is not None:
@@ -86,13 +89,13 @@ with
 unique_override as
     (select o.package, s.section
         from override o, section s
-        where o.suite = :suite and o.type = :type_id and o.section = s.id and
+        where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
         o.component = :component)
 
-select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
     from newest_binaries b, bin_contents bc, unique_override o
     where b.id = bc.binary_id and o.package = b.package
-    order by bc.file, b.package'''
+    group by bc.file'''
 
         else:
             sql = '''
@@ -114,39 +117,29 @@ with
 unique_override as
     (select distinct on (o.package, s.section) o.package, s.section
         from override o, section s
-        where o.suite = :suite and o.type = :type_id and o.section = s.id
+        where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
         order by o.package, s.section, o.modified desc)
 
-select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
+select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
     from newest_binaries b, bin_contents bc, unique_override o
     where b.id = bc.binary_id and o.package = b.package
-    order by bc.file, b.package'''
+    group by bc.file'''
 
-        return self.session.query("file", "package").from_statement(sql). \
+        return self.session.query("file", "pkglist").from_statement(sql). \
             params(params)
 
     def formatline(self, filename, package_list):
         '''
         Returns a formatted string for the filename argument.
         '''
-        package_list = ','.join(package_list)
         return "%-55s %s\n" % (filename, package_list)
 
     def fetch(self):
         '''
         Yields a new line of the Contents-$arch.gz file in filename order.
         '''
-        last_filename = None
-        package_list = []
-        for filename, package in self.query().yield_per(100):
-            if filename != last_filename:
-                if last_filename is not None:
-                    yield self.formatline(last_filename, package_list)
-                last_filename = filename
-                package_list = []
-            package_list.append(package)
-        if last_filename is not None:
-            yield self.formatline(last_filename, package_list)
+        for filename, package_list in self.query().yield_per(100):
+            yield self.formatline(filename, package_list)
         # end transaction to return connection to pool
         self.session.rollback()
 
@@ -183,19 +176,24 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
             if header_file:
                 header_file.close()
 
-    def write_file(self, dummy_arg = None):
+    def write_file(self):
         '''
-        Write the output file. The argument dummy_arg is ignored but needed by
-        our threadpool implementation.
+        Write the output file.
         '''
         command = ['gzip', '--rsyncable']
-        output_file = open(self.output_filename(), 'w')
-        pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
-        pipe.write(self.get_header())
+        final_filename = self.output_filename()
+        temp_filename = final_filename + '.new'
+        output_file = open(temp_filename, 'w')
+        gzip = Popen(command, stdin = PIPE, stdout = output_file)
+        gzip.stdin.write(self.get_header())
         for item in self.fetch():
-            pipe.write(item)
-        pipe.close()
+            gzip.stdin.write(item)
+        gzip.stdin.close()
         output_file.close()
+        gzip.wait()
+        os.remove(final_filename)
+        os.rename(temp_filename, final_filename)
+        os.chmod(final_filename, 0664)
 
     @classmethod
     def write_all(class_, suite_names = [], force = False):
@@ -210,22 +208,22 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
             suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
         if not force:
             suite_query = suite_query.filter_by(untouchable = False)
-        main = get_component('main', session)
-        non_free = get_component('non-free', session)
-        deb = get_override_type('deb', session)
-        udeb = get_override_type('udeb', session)
-        threadpool = ThreadPool()
+        pool = Pool()
         for suite in suite_query:
             for architecture in suite.get_architectures(skipsrc = True, skipall = True):
                 # handle 'deb' packages
-                writer = ContentsWriter(suite, architecture, deb)
-                threadpool.queueTask(writer.write_file)
+                command = ['dak', 'contents', '-s', suite.suite_name, \
+                    'generate_helper', architecture.arch_string, 'deb']
+                pool.apply_async(call, (command, ))
                 # handle 'udeb' packages for 'main' and 'non-free'
-                writer = ContentsWriter(suite, architecture, udeb, component = main)
-                threadpool.queueTask(writer.write_file)
-                writer = ContentsWriter(suite, architecture, udeb, component = non_free)
-                threadpool.queueTask(writer.write_file)
-        threadpool.joinAll()
+                command = ['dak', 'contents', '-s', suite.suite_name, \
+                    'generate_helper', architecture.arch_string, 'udeb', 'main']
+                pool.apply_async(call, (command, ))
+                command = ['dak', 'contents', '-s', suite.suite_name, \
+                    'generate_helper', architecture.arch_string, 'udeb', 'non-free']
+                pool.apply_async(call, (command, ))
+        pool.close()
+        pool.join()
         session.close()
 
 
@@ -249,7 +247,10 @@ class ContentsScanner(object):
         '''
         session = DBConn().session()
         binary = session.query(DBBinary).get(self.binary_id)
-        for filename in binary.scan_contents():
+        fileset = set(binary.scan_contents())
+        if len(fileset) == 0:
+            fileset.add('EMPTY_PACKAGE')
+        for filename in fileset:
             binary.contents.append(BinContents(file = filename))
         session.commit()
         session.close()