Add header to Contents files.

[dak.git] / daklib / contents.py
diff --git a/daklib/contents.py b/daklib/contents.py

index 7914c20e5e87c47df07c6f464f10fff28d3cd27f..6aadb66e38f6972b27d3826dc12b06e539eb000d 100755 (executable)
--- a/daklib/contents.py
+++ b/daklib/contents.py
@@ -27,10 +27,13 @@ Helper code for contents generation.
  
  from daklib.dbconn import *
  from daklib.config import Config
+from daklib.threadpool import ThreadPool
  
  from sqlalchemy import desc, or_
  from subprocess import Popen, PIPE
  
+import os
+
  class ContentsWriter(object):
      '''
      ContentsWriter writes the Contents-$arch.gz files.
@@ -166,6 +169,18 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
          values['component'] = self.component.component_name
          return "%(root)s%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
  
+    def get_header(self):
+        '''
+        Returns the header for the Contents files as a string.
+        '''
+        try:
+            filename = os.join(Config()['Dir::Templates'], 'contents')
+            header_file = open(filename)
+            return header_file.read()
+        finally:
+            if header_file:
+                header_file.close()
+
      def write_file(self):
          '''
          Write the output file.
@@ -173,7 +188,56 @@ select bc.file, substring(o.section from position('/' in o.section) + 1) || '/'
          command = ['gzip', '--rsyncable']
          output_file = open(self.output_filename(), 'w')
          pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
+        pipe.write(self.get_header())
          for item in self.fetch():
              pipe.write(item)
          pipe.close()
          output_file.close()
+
+
+class ContentsScanner(object):
+    '''
+    ContentsScanner provides a threadsafe method scan() to scan the contents of
+    a DBBinary object.
+    '''
+    def __init__(self, binary):
+        '''
+        The argument binary is the actual DBBinary object that should be
+        scanned.
+        '''
+        self.binary_id = binary.binary_id
+
+    def scan(self, dummy_arg = None):
+        '''
+        This method does the actual scan and fills in the associated BinContents
+        property. It commits any changes to the database. The argument dummy_arg
+        is ignored but needed by our threadpool implementation.
+        '''
+        session = DBConn().session()
+        binary = session.query(DBBinary).get(self.binary_id)
+        for filename in binary.scan_contents():
+            binary.contents.append(BinContents(file = filename))
+        session.commit()
+        session.close()
+
+    @classmethod
+    def scan_all(class_, limit = None):
+        '''
+        The class method scan_all() scans all binaries using multiple threads.
+        The number of binaries to be scanned can be limited with the limit
+        argument. Returns the number of processed and remaining packages as a
+        dict.
+        '''
+        session = DBConn().session()
+        query = session.query(DBBinary).filter(DBBinary.contents == None)
+        remaining = query.count
+        if limit is not None:
+            query = query.limit(limit)
+        processed = query.count()
+        threadpool = ThreadPool()
+        for binary in query.yield_per(100):
+            threadpool.queueTask(ContentsScanner(binary).scan)
+        threadpool.joinAll()
+        remaining = remaining()
+        session.close()
+        return { 'processed': processed, 'remaining': remaining }