]> git.decadent.org.uk Git - dak.git/blobdiff - daklib/contents.py
Switch to multiprocessing module and reap child gzips.
[dak.git] / daklib / contents.py
index 740c0b2942951a5ff6f38df9581e9dd376698222..730d21488b27916a08f0e0771c7533904fe635b5 100755 (executable)
@@ -28,10 +28,13 @@ Helper code for contents generation.
 from daklib.dbconn import *
 from daklib.config import Config
 from daklib.threadpool import ThreadPool
+from multiprocessing import Pool
 
 from sqlalchemy import desc, or_
 from subprocess import Popen, PIPE
 
+import os.path
+
 class ContentsWriter(object):
     '''
     ContentsWriter writes the Contents-$arch.gz files.
@@ -64,7 +67,7 @@ class ContentsWriter(object):
         }
 
         if self.component is not None:
-            params['component'] = component.component_id
+            params['component'] = self.component.component_id
             sql = '''
 create temp table newest_binaries (
     id integer primary key,
@@ -128,7 +131,7 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
         Returns a formatted string for the filename argument.
         '''
         package_list = ','.join(package_list)
-        return "%-60s%s\n" % (filename, package_list)
+        return "%-55s %s\n" % (filename, package_list)
 
     def fetch(self):
         '''
@@ -143,7 +146,8 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
                 last_filename = filename
                 package_list = []
             package_list.append(package)
-        yield self.formatline(last_filename, package_list)
+        if last_filename is not None:
+            yield self.formatline(last_filename, package_list)
         # end transaction to return connection to pool
         self.session.rollback()
 
@@ -163,9 +167,22 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
             'architecture': self.architecture.arch_string
         }
         if self.component is None:
-            return "%(root)s%(suite)s/Contents-%(architecture)s.gz" % values
+            return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
         values['component'] = self.component.component_name
-        return "%(root)s%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
+        return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
+
+    def get_header(self):
+        '''
+        Returns the header for the Contents files as a string.
+        '''
+        header_file = None
+        try:
+            filename = os.path.join(Config()['Dir::Templates'], 'contents')
+            header_file = open(filename)
+            return header_file.read()
+        finally:
+            if header_file:
+                header_file.close()
 
     def write_file(self):
         '''
@@ -173,11 +190,45 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
         '''
         command = ['gzip', '--rsyncable']
         output_file = open(self.output_filename(), 'w')
-        pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
+        gzip = Popen(command, stdin = PIPE, stdout = output_file)
+        gzip.stdin.write(self.get_header())
         for item in self.fetch():
-            pipe.write(item)
-        pipe.close()
+            gzip.stdin.write(item)
+        gzip.stdin.close()
         output_file.close()
+        gzip.wait()
+
+    @classmethod
+    def write_all(class_, suite_names = [], force = False):
+        '''
+        Writes all Contents files for suites in list suite_names which defaults
+        to all 'touchable' suites if not specified explicitely. Untouchable
+        suites will be included if the force argument is set to True.
+        '''
+        session = DBConn().session()
+        suite_query = session.query(Suite)
+        if len(suite_names) > 0:
+            suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
+        if not force:
+            suite_query = suite_query.filter_by(untouchable = False)
+        main = get_component('main', session)
+        non_free = get_component('non-free', session)
+        deb = get_override_type('deb', session)
+        udeb = get_override_type('udeb', session)
+        pool = Pool()
+        for suite in suite_query:
+            for architecture in suite.get_architectures(skipsrc = True, skipall = True):
+                # handle 'deb' packages
+                writer = ContentsWriter(suite, architecture, deb)
+                pool.apply(writer.write_file)
+                # handle 'udeb' packages for 'main' and 'non-free'
+                writer = ContentsWriter(suite, architecture, udeb, component = main)
+                pool.apply(writer.write_file)
+                writer = ContentsWriter(suite, architecture, udeb, component = non_free)
+                pool.apply(writer.write_file)
+        pool.close()
+        pool.join()
+        session.close()
 
 
 class ContentsScanner(object):
@@ -210,14 +261,19 @@ class ContentsScanner(object):
         '''
         The class method scan_all() scans all binaries using multiple threads.
         The number of binaries to be scanned can be limited with the limit
-        argument.
+        argument. Returns the number of processed and remaining packages as a
+        dict.
         '''
         session = DBConn().session()
         query = session.query(DBBinary).filter(DBBinary.contents == None)
+        remaining = query.count
         if limit is not None:
             query = query.limit(limit)
+        processed = query.count()
         threadpool = ThreadPool()
         for binary in query.yield_per(100):
             threadpool.queueTask(ContentsScanner(binary).scan)
         threadpool.joinAll()
+        remaining = remaining()
         session.close()
+        return { 'processed': processed, 'remaining': remaining }