before I rip out pending_*

[dak.git] / dak / contents.py
diff --git a/dak/contents.py b/dak/contents.py

index be1e6d05ceef672389b364949c87ceeedb4a33ed..c0d00c85abec80249c6b4f6a8726a4b87503b8d1 100755 (executable)
--- a/dak/contents.py
+++ b/dak/contents.py
@@ -37,7 +37,6 @@ Create all the contents files
  import sys
  import os
  import logging
  import sys
  import os
  import logging
-import math
  import gzip
  import threading
  import Queue
  import gzip
  import threading
  import Queue
@@ -45,7 +44,8 @@ import apt_pkg
  from daklib import utils
  from daklib.binary import Binary
  from daklib.config import Config
  from daklib import utils
  from daklib.binary import Binary
  from daklib.config import Config
-from daklib.dbconn import DBConn
+from daklib.dbconn import *
+
  ################################################################################
  
  def usage (exit_code=0):
  ################################################################################
  
  def usage (exit_code=0):
@@ -55,8 +55,8 @@ COMMANDS
      generate
          generate Contents-$arch.gz files
  
      generate
          generate Contents-$arch.gz files
  
-    bootstrap
-        scan the debs in the existing pool and load contents in the the database
+    bootstrap_bin
+        scan the debs in the existing pool and load contents into the bin_contents table
  
      cruft
          remove files/paths which are no longer referenced by a binary
  
      cruft
          remove files/paths which are no longer referenced by a binary
@@ -87,199 +87,250 @@ log = logging.getLogger()
  
  ################################################################################
  
  
  ################################################################################
  
-# get all the arches delivered for a given suite
-# this should probably exist somehere common
-arches_q = """PREPARE arches_q(int) as
-              SELECT s.architecture, a.arch_string
-              FROM suite_architectures s
-              JOIN architecture a ON (s.architecture=a.id)
-                  WHERE suite = $1"""
-
-# find me the .deb for a given binary id
-debs_q = """PREPARE debs_q(int, int) as
-              SELECT b.id, f.filename FROM bin_assoc_by_arch baa
-              JOIN binaries b ON baa.bin=b.id
-              JOIN files f ON b.file=f.id
-              WHERE suite = $1
-                  AND arch = $2"""
-
-# ask if we already have contents associated with this binary
-olddeb_q = """PREPARE olddeb_q(int) as
-              SELECT 1 FROM content_associations
-              WHERE binary_pkg = $1
-              LIMIT 1"""
-
-# find me all of the contents for a given .deb
-contents_q = """PREPARE contents_q(int,int) as
-                SELECT (p.path||'/'||n.file) AS fn,
-                        s.section,
-                        b.package,
-                        b.architecture
-               FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
-               JOIN content_file_names n ON (c.filename=n.id)
-               JOIN binaries b ON (b.id=c.binary_pkg)
-               JOIN override o ON (o.package=b.package)
-               JOIN section s ON (s.id=o.section)
-               WHERE o.suite = $1 AND o.type = $2
-               AND b.type='deb'
-               ORDER BY fn"""
-
-# find me all of the contents for a given .udeb
-udeb_contents_q = """PREPARE udeb_contents_q(int,int,int) as
-              SELECT (p.path||'/'||n.file) AS fn,
-                        s.section,
-                        b.package,
-                        b.architecture
-               FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
-               JOIN content_file_names n ON (c.filename=n.id)
-               JOIN binaries b ON (b.id=c.binary_pkg)
-               JOIN override o ON (o.package=b.package)
-               JOIN section s ON (s.id=o.section)
-               WHERE o.suite = $1 AND o.type = $2
-               AND s.id = $3
-               AND b.type='udeb'
-               ORDER BY fn"""
-
-#               FROM content_file_paths p join content_associations c ON (c.filepath=p.id)
-#               JOIN content_file_names n ON (c.filename=n.id)
-#               JOIN binaries b ON (b.id=c.binary_pkg)
-#               JOIN override o ON (o.package=b.package)
-#               JOIN section s ON (s.id=o.section)
-#               WHERE o.suite = $1 AND o.type = $2
-#               AND s.id = $3
-#               AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='udeb')
-#               GROUP BY fn
-#               ORDER BY fn;"""
-
-
-
-# clear out all of the temporarily stored content associations
-# this should be run only after p-a has run.  after a p-a
-# run we should have either accepted or rejected every package
-# so there should no longer be anything in the queue
-remove_pending_contents_cruft_q = """DELETE FROM pending_content_associations"""
-
-# delete any filenames we are storing which have no binary associated with them
-remove_filename_cruft_q = """DELETE FROM content_file_names
-                             WHERE id IN (SELECT cfn.id FROM content_file_names cfn
-                                          LEFT JOIN content_associations ca
-                                            ON ca.filename=cfn.id
-                                          WHERE ca.id IS NULL)"""
-
-# delete any paths we are storing which have no binary associated with them
-remove_filepath_cruft_q = """DELETE FROM content_file_paths
-                             WHERE id IN (SELECT cfn.id FROM content_file_paths cfn
-                                          LEFT JOIN content_associations ca
-                                             ON ca.filepath=cfn.id
-                                          WHERE ca.id IS NULL)"""
-
  class EndOfContents(object):
      """
      A sentry object for the end of the filename stream
      """
      pass
  
  class EndOfContents(object):
      """
      A sentry object for the end of the filename stream
      """
      pass
  
-class GzippedContentWriter(object):
+class OneAtATime(object):
+    """
+    """
+    def __init__(self):
+        self.next_in_line = None
+        self.next_lock = threading.Condition()
+
+    def enqueue(self, next):
+        self.next_lock.acquire()
+        while self.next_in_line:
+            self.next_lock.wait()
+            
+        assert( not self.next_in_line )
+        self.next_in_line = next
+        self.next_lock.notify()
+        self.next_lock.release()
+
+    def dequeue(self):
+        self.next_lock.acquire()
+        while not self.next_in_line:
+            self.next_lock.wait()
+        result = self.next_in_line
+        self.next_in_line = None
+        self.next_lock.notify()
+        self.next_lock.release()
+        return result
+
+class ContentsWorkThread(threading.Thread):
      """
      """
-    An object which will write contents out to a Contents-$arch.gz
-    file on a separate thread
      """
      """
+    def __init__(self, upstream, downstream):
+        threading.Thread.__init__(self)
+        self.upstream = upstream
+        self.downstream = downstream
  
  
-    header = None # a class object holding the header section of contents file
+    def run(self):
+        while True:
+            try:
+                contents_file = self.upstream.dequeue()
+                if isinstance(contents_file,EndOfContents):
+                    if self.downstream:
+                        self.downstream.enqueue(contents_file)
+                    break
+
+                s = datetime.datetime.now()
+                print("%s start: %s" % (self,contents_file) )
+                self._run(contents_file)
+                print("%s finished: %s in %d seconds" % (self, contents_file, (datetime.datetime.now()-s).seconds ))
+                if self.downstream:
+                    self.downstream.enqueue(contents_file)
+            except:
+                traceback.print_exc()
+
+class QueryThread(ContentsWorkThread):
+    def __init__(self, upstream, downstream):
+        ContentsWorkThread.__init__(self, upstream, downstream)
+
+    def __str__(self):
+        return "QueryThread"
+    __repr__ = __str__
+
+    def _run(self, contents_file):
+        contents_file.query()
+
+class IngestThread(ContentsWorkThread):
+    def __init__(self, upstream, downstream):
+        ContentsWorkThread.__init__(self, upstream, downstream)
+
+    def __str__(self):
+        return "IngestThread"
+    __repr__ = __str__
+
+    def _run(self, contents_file):
+        contents_file.ingest()
+
+class SortThread(ContentsWorkThread):
+    def __init__(self, upstream, downstream):
+        ContentsWorkThread.__init__(self, upstream, downstream)
+
+    def __str__(self):
+        return "SortThread"
+    __repr__ = __str__
+
+    def _run(self, contents_file):
+        contents_file.sorted_keys = sorted(contents_file.filenames.keys())
+
+class OutputThread(ContentsWorkThread):
+    def __init__(self, upstream, downstream):
+        ContentsWorkThread.__init__(self, upstream, downstream)
+
+    def __str__(self):
+        return "OutputThread"
+    __repr__ = __str__
+
+    def _run(self, contents_file):
+        contents_file.open_file()
+        for fname in contents_file.sorted_keys:
+            contents_file.filehandle.write("%s\t%s\n" % (fname,contents_file.filenames[fname]))
+        contents_file.sorted_keys = None
+        contents_file.filenames.clear()
+    
+class GzipThread(ContentsWorkThread):
+    def __init__(self, upstream, downstream):
+        ContentsWorkThread.__init__(self, upstream, downstream)
+
+    def __str__(self):
+        return "GzipThread"
+    __repr__ = __str__
+
+    def _run(self, contents_file):
+        os.system("gzip -f %s" % contents_file.filename)
+
+class ContentFile(object):
+    def __init__(self,
+                 filename,
+                 suite_str,
+                 suite_id)
+
+        self.filename = filename
+        self.filenames = {}
+        self.sorted_keys = None
+        self.suite_str = suite_str
+        self.suite_id = suite_id
+        self.cursor = None
+        self.filehandle = None
+
+    def __str__(self):
+        return self.filename
+    __repr__ = __str__
+
+
+    def cleanup(self):
+        self.filenames = None
+        self.sortedkeys = None
+        self.filehandle.close()
+        self.cursor.close()
+
+    def ingest(self):
+        while True:
+            r = self.cursor.fetchone()
+            if not r:
+                break
+            filename, package = r
+            if self.filenames.has_key(filename):
+                self.filenames[filename] += ",%s" % (package)
+            else:
+                self.filenames[filename] = "%s" % (package)
+        self.cursor.close()
  
  
-    def __init__(self, filename):
-        """
-        @ptype filename: string
-        @param filename: the name of the file to write to
-        """
-        self.queue = Queue.Queue()
-        self.current_file = None
-        self.first_package = True
-        self.output = self.open_file(filename)
-        self.thread = threading.Thread(target=self.write_thread,
-                                       name='Contents writer')
-        self.thread.start()
-
-    def open_file(self, filename):
+    def open_file(self):
          """
          opens a gzip stream to the contents file
          """
          """
          opens a gzip stream to the contents file
          """
-        filepath = Config()["Contents::Root"] + filename
-        filedir = os.path.dirname(filepath)
+#        filepath = Config()["Contents::Root"] + self.filename
+        self.filename = "/home/stew/contents/" + self.filename
+        filedir = os.path.dirname(self.filename)
          if not os.path.isdir(filedir):
              os.makedirs(filedir)
          if not os.path.isdir(filedir):
              os.makedirs(filedir)
-        return gzip.open(filepath, "w")
-
-    def write(self, filename, section, package):
-        """
-        enqueue content to be written to the file on a separate thread
-        """
-        self.queue.put((filename,section,package))
-
-    def write_thread(self):
-        """
-        the target of a Thread which will do the actual writing
-        """
-        while True:
-            next = self.queue.get()
-            if isinstance(next, EndOfContents):
-                self.output.write('\n')
-                self.output.close()
-                break
-
-            (filename,section,package)=next
-            if next != self.current_file:
-                # this is the first file, so write the header first
-                if not self.current_file:
-                    self.output.write(self._getHeader())
-
-                self.output.write('\n%s\t' % filename)
-                self.first_package = True
+#        self.filehandle = gzip.open(self.filename, "w")
+        self.filehandle = open(self.filename, "w")
+        self._write_header()
  
  
-            self.current_file=filename
+    def _write_header(self):
+        self._get_header();
+        self.filehandle.write(ContentFile.header)
  
  
-            if not self.first_package:
-                self.output.write(',')
-            else:
-                self.first_package=False
-            self.output.write('%s/%s' % (section,package))
-
-    def finish(self):
-        """
-        enqueue the sentry object so that writers will know to terminate
-        """
-        self.queue.put(EndOfContents())
+    header=None
  
      @classmethod
  
      @classmethod
-    def _getHeader(self):
+    def _get_header(self):
          """
          Internal method to return the header for Contents.gz files
  
          This is boilerplate which explains the contents of the file and how
          it can be used.
          """
          """
          Internal method to return the header for Contents.gz files
  
          This is boilerplate which explains the contents of the file and how
          it can be used.
          """
-        if not GzippedContentWriter.header:
+        if not ContentFile.header:
              if Config().has_key("Contents::Header"):
                  try:
                      h = open(os.path.join( Config()["Dir::Templates"],
                                             Config()["Contents::Header"] ), "r")
              if Config().has_key("Contents::Header"):
                  try:
                      h = open(os.path.join( Config()["Dir::Templates"],
                                             Config()["Contents::Header"] ), "r")
-                    GzippedContentWriter.header = h.read()
+                    ContentFile.header = h.read()
                      h.close()
                  except:
                      log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
                                                                        traceback.format_exc() ))
                      h.close()
                  except:
                      log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
                                                                        traceback.format_exc() ))
-                    GzippedContentWriter.header = None
+                    ContentFile.header = None
              else:
              else:
-                GzippedContentWriter.header = None
-
-        return GzippedContentWriter.header
-
+                ContentFile.header = None
+
+        return ContentFile.header
+
+
+class DebContentFile(ContentFile):
+    def __init__(self,
+                 filename,
+                 suite_str,
+                 suite_id,
+                 arch_str,
+                 arch_id):
+        ContentFile.__init__(self,
+                             filename,
+                             suite_str,
+                             suite_id )
+        self.arch_str = arch_str
+        self.arch_id = arch_id
+
+    def query(self):
+        self.cursor = DBConn().session();
+
+        self.cursor.execute("""SELECT file, component || section || '/' || package
+        FROM deb_contents
+        WHERE ( arch=2 or arch = :arch) AND suite = :suite
+        """, { 'arch':self.arch_id, 'suite':self.suite_id }
+
+class UdebContentFile(ContentFile):
+    def __init__(self,
+                 filename,
+                 suite_str,
+                 suite_id,
+                 section_name,
+                 section_id)
+        ContentFile.__init__(self,
+                             filename,
+                             suite_str,
+                             suite_id )
+
+    def query(self):
+        self.cursor = DBConn().session();
+
+        self.cursor.execute("""SELECT file, component || section || '/' || package
+        FROM udeb_contents
+        WHERE suite = :suite
+        """ , { 'suite': self.suite_id } )
  
  class Contents(object):
      """
      Class capable of generating Contents-$arch.gz files
  
  class Contents(object):
      """
      Class capable of generating Contents-$arch.gz files
-
-    Usage GenerateContents().generateContents( ["main","contrib","non-free"] )
      """
  
      def __init__(self):
      """
  
      def __init__(self):
@@ -288,159 +339,310 @@ class Contents(object):
      def reject(self, message):
          log.error("E: %s" % message)
  
      def reject(self, message):
          log.error("E: %s" % message)
  
-    # goal column for section column
-    _goal_column = 54
-
      def cruft(self):
          """
          remove files/paths from the DB which are no longer referenced
          by binaries and clean the temporary table
          """
      def cruft(self):
          """
          remove files/paths from the DB which are no longer referenced
          by binaries and clean the temporary table
          """
-        cursor = DBConn().cursor();
-        cursor.execute( "BEGIN WORK" )
-        cursor.execute( remove_pending_contents_cruft_q )
-        cursor.execute( remove_filename_cruft_q )
-        cursor.execute( remove_filepath_cruft_q )
-        cursor.execute( "COMMIT" )
+        s = DBConn().session()
  
  
+        # clear out all of the temporarily stored content associations
+        # this should be run only after p-a has run.  after a p-a
+        # run we should have either accepted or rejected every package
+        # so there should no longer be anything in the queue
+        s.query(PendingContentAssociation).delete()
  
  
-    def bootstrap(self):
+        # delete any filenames we are storing which have no binary associated
+        # with them
+        cafq = s.query(ContentAssociation.filename_id).distinct()
+        cfq = s.query(ContentFilename)
+        cfq = cfq.filter(~ContentFilename.cafilename_id.in_(cafq))
+        cfq.delete()
+
+        # delete any paths we are storing which have no binary associated with
+        # them
+        capq = s.query(ContentAssociation.filepath_id).distinct()
+        cpq = s.query(ContentFilepath)
+        cpq = cpq.filter(~ContentFilepath.cafilepath_id.in_(capq))
+        cpq.delete()
+
+        s.commit()
+
+
+    def bootstrap_bin(self):
          """
          """
-        scan the existing debs in the pool to populate the contents database tables
+        scan the existing debs in the pool to populate the bin_contents table
          """
          pooldir = Config()[ 'Dir::Pool' ]
  
          """
          pooldir = Config()[ 'Dir::Pool' ]
  
-        cursor = DBConn().cursor();
-        DBConn().prepare("debs_q",debs_q)
-        DBConn().prepare("olddeb_q",olddeb_q)
-        DBConn().prepare("arches_q",arches_q)
+        s = DBConn().session()
+
+        print( "bootstrap_bin" )
+        for binary in s.query(DBBinary).yield_per(1000):
+            print( "binary: %s" % binary.package )
+            filename = binary.poolfile.filename
+             # Check for existing contents
+            existingq = s.execute( "select 1 from bin_contents where binary_id=:id", {'id':binary.binary_id} );
+            if existingq.fetchone():
+                log.debug( "already imported: %s" % (filename))
+            else:
+                # We don't have existing contents so import them
+                log.debug( "scanning: %s" % (filename) )
+
+                debfile = os.path.join(pooldir, filename)
+                if os.path.exists(debfile):
+                    Binary(debfile, self.reject).scan_package(binary.binary_id, True)
+                else:
+                    log.error("missing .deb: %s" % filename)
  
  
-        suites = self._suites()
-        for suite in [i.lower() for i in suites]:
-            suite_id = DBConn().get_suite_id(suite)
  
  
-            arch_list = self._arches(cursor, suite_id)
-            arch_all_id = DBConn().get_architecture_id("all")
-            for arch_id in arch_list:
-                cursor.execute( "EXECUTE debs_q(%d, %d)" % ( suite_id, arch_id[0] ) )
  
  
-                count = 0
+    def bootstrap(self):
+        """
+        scan the existing debs in the pool to populate the contents database tables
+        """
+        s = DBConn().session()
+
+        for override in s.query(Override).all():
+            binaries = s.execute("""SELECT b.binary_id, ba.arch
+                                    FROM binaries b
+                                    JOIN bin_associations ba ON ba.binary_id=b.binary_id
+                                    WHERE ba.suite=:suite
+                                    AND b.package=override.package""", {'suite':override.suite})
+            while True:
+                binary = binaries.fetchone()
+                if not binary:
+                    break
+
+                filenames = s.execute( """SELECT file from bin_contents where binary_id=:id""", { 'id': binary.binary_id } )
                  while True:
                  while True:
-                    deb = cursor.fetchone()
-                    if not deb:
+                    filename = filenames.fetchone()
+                    if not binary:
                          break
                          break
-                    count += 1
-                    cursor1 = DBConn().cursor();
-                    cursor1.execute( "EXECUTE olddeb_q(%d)" % (deb[0] ) )
-                    old = cursor1.fetchone()
-                    if old:
-                        log.debug( "already imported: %s" % (deb[1]) )
-                    else:
-                        log.debug( "scanning: %s" % (deb[1]) )
-                        debfile = os.path.join( pooldir, deb[1] )
-                        if os.path.exists( debfile ):
-                            Binary(debfile, self.reject).scan_package(deb[0],True)
-                        else:
-                            log.error("missing .deb: %s" % deb[1])
  
  
+                
+
+                    if override.type == 7:
+                        s.execute( """INSERT INTO deb_contents (file,section,package,binary_id,arch,suite,component)
+                                      VALUES (:filename, :section, :package, :binary_id, :arch, :suite, :component);""",
+                                   { 'filename' : filename,
+                                     'section' : override.section,
+                                     'package' : override.package,
+                                     'binary_id' : binary.binary_id,
+                                     'arch' : binary.arch,
+                                     'suite' : override.suite,
+                                     'component' : override.component } )
+
+                    
+                    elif override.type == 9:
+                        s.execute( """INSERT INTO deb_contents (file,section,package,binary_id,arch,suite,component)
+                                      VALUES (:filename, :section, :package, :binary_id, :arch, :suite, :component);""",
+                                   { 'filename' : filename,
+                                     'section' : override.section,
+                                     'package' : override.package,
+                                     'binary_id' : binary.binary_id,
+                                     'arch' : binary.arch,
+                                     'suite' : override.suite,
+                                     'component' : override.component } )
+
+#     def bootstrap(self):
+#         """
+#         scan the existing debs in the pool to populate the contents database tables
+#         """
+#         pooldir = Config()[ 'Dir::Pool' ]
+
+#         s = DBConn().session()
+
+#         for suite in s.query(Suite).all():
+#             for arch in get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=s):
+#                 q = s.query(BinAssociation).join(Suite)
+#                 q = q.join(Suite).filter_by(suite_name=suite.suite_name)
+#                 q = q.join(DBBinary).join(Architecture).filter_by(arch.arch_string)
+#                 for ba in q:
+#                     filename = ba.binary.poolfile.filename
+#                     # Check for existing contents
+#                     existingq = s.query(ContentAssociations).filter_by(binary_pkg=ba.binary_id).limit(1)
+#                     if existingq.count() > 0:
+#                         log.debug( "already imported: %s" % (filename))
+#                     else:
+#                         # We don't have existing contents so import them
+#                         log.debug( "scanning: %s" % (filename) )
+#                         debfile = os.path.join(pooldir, filename)
+#                         if os.path.exists(debfile):
+#                             Binary(debfile, self.reject).scan_package(ba.binary_id, True)
+#                         else:
+#                             log.error("missing .deb: %s" % filename)
      def generate(self):
          """
      def generate(self):
          """
-        Generate Contents-$arch.gz files for every available arch in each given suite.
+        Generate contents files for both deb and udeb
          """
          """
-        cursor = DBConn().cursor()
-
          DBConn().prepare("arches_q", arches_q)
          DBConn().prepare("arches_q", arches_q)
-        DBConn().prepare("contents_q", contents_q)
-        DBConn().prepare("udeb_contents_q", udeb_contents_q)
-
-        debtype_id=DBConn().get_override_type_id("deb")
-        udebtype_id=DBConn().get_override_type_id("udeb")
+        self.deb_generate()
+#        self.udeb_generate()
  
  
-        arch_all_id = DBConn().get_architecture_id("all")
+    def deb_generate(self):
+        """
+        Generate Contents-$arch.gz files for every available arch in each given suite.
+        """
+        cursor = DBConn().session()
+        debtype_id = DBConn().get_override_type_id("deb")
          suites = self._suites()
  
          suites = self._suites()
  
-
+        inputtoquery = OneAtATime()
+        querytoingest = OneAtATime()
+        ingesttosort = OneAtATime()
+        sorttooutput = OneAtATime()
+        outputtogzip = OneAtATime()
+
+        qt = QueryThread(inputtoquery,querytoingest)
+        it = IngestThread(querytoingest,ingesttosort)
+# these actually make things worse
+#        it2 = IngestThread(querytoingest,ingesttosort)
+#        it3 = IngestThread(querytoingest,ingesttosort)
+#        it4 = IngestThread(querytoingest,ingesttosort)
+        st = SortThread(ingesttosort,sorttooutput)
+        ot = OutputThread(sorttooutput,outputtogzip)
+        gt = GzipThread(outputtogzip, None)
+
+        qt.start()
+        it.start()
+#        it2.start()
+#        it3.start()
+#        it2.start()
+        st.start()
+        ot.start()
+        gt.start()
+        
          # Get our suites, and the architectures
          for suite in [i.lower() for i in suites]:
              suite_id = DBConn().get_suite_id(suite)
              arch_list = self._arches(cursor, suite_id)
  
          # Get our suites, and the architectures
          for suite in [i.lower() for i in suites]:
              suite_id = DBConn().get_suite_id(suite)
              arch_list = self._arches(cursor, suite_id)
  
-            file_writers = {}
+            for (arch_id,arch_str) in arch_list:
+                print( "suite: %s, arch: %s time: %s" %(suite_id, arch_id, datetime.datetime.now().isoformat()) )
  
  
-            try:
-                for arch_id in arch_list:
-                    file_writers[arch_id[0]] = GzippedContentWriter("dists/%s/Contents-%s.gz" % (suite, arch_id[1]))
+#                filename = "dists/%s/Contents-%s.gz" % (suite, arch_str)
+                filename = "dists/%s/Contents-%s" % (suite, arch_str)
+                cf = ContentFile(filename, suite, suite_id, arch_str, arch_id)
+                inputtoquery.enqueue( cf )
  
  
-                cursor.execute("EXECUTE contents_q(%d,%d);" % (suite_id, debtype_id))
-
-                while True:
-                    r = cursor.fetchone()
-                    if not r:
-                        break
-
-                    filename, section, package, arch = r
-
-                    if arch == arch_all_id:
-                        ## its arch all, so all contents files get it
-                        for writer in file_writers.values():
-                            writer.write(filename, section, package)
-
-                    else:
-                        file_writers[arch].write(filename, section, package)
-
-            finally:
-                # close all the files
-                for writer in file_writers.values():
-                    writer.finish()
+        inputtoquery.enqueue( EndOfContents() )
+        gt.join()
  
  
+    def udeb_generate(self):
+        """
+        Generate Contents-$arch.gz files for every available arch in each given suite.
+        """
+        cursor = DBConn().session()
+        udebtype_id=DBConn().get_override_type_id("udeb")
+        suites = self._suites()
  
  
-            # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
-            # This is HORRIBLY debian specific :-/
-        for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s.gz"),
-                                    ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
+        inputtoquery = OneAtATime()
+        querytoingest = OneAtATime()
+        ingesttosort = OneAtATime()
+        sorttooutput = OneAtATime()
+        outputtogzip = OneAtATime()
+
+        qt = QueryThread(inputtoquery,querytoingest)
+        it = IngestThread(querytoingest,ingesttosort)
+# these actually make things worse
+#        it2 = IngestThread(querytoingest,ingesttosort)
+#        it3 = IngestThread(querytoingest,ingesttosort)
+#        it4 = IngestThread(querytoingest,ingesttosort)
+        st = SortThread(ingesttosort,sorttooutput)
+        ot = OutputThread(sorttooutput,outputtogzip)
+        gt = GzipThread(outputtogzip, None)
+
+        qt.start()
+        it.start()
+#        it2.start()
+#        it3.start()
+#        it2.start()
+        st.start()
+        ot.start()
+        gt.start()
+        
+        for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s"),
+                                    ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s")]:
  
              section_id = DBConn().get_section_id(section) # all udebs should be here)
              if section_id != -1:
  
  
              section_id = DBConn().get_section_id(section) # all udebs should be here)
              if section_id != -1:
  
+                
+
                  # Get our suites, and the architectures
                  for suite in [i.lower() for i in suites]:
                      suite_id = DBConn().get_suite_id(suite)
                      arch_list = self._arches(cursor, suite_id)
  
                  # Get our suites, and the architectures
                  for suite in [i.lower() for i in suites]:
                      suite_id = DBConn().get_suite_id(suite)
                      arch_list = self._arches(cursor, suite_id)
  
-                    file_writers = {}
+                    for arch_id in arch_list:
  
  
-                    try:
-                        for arch_id in arch_list:
-                            file_writers[arch_id[0]] = GzippedContentWriter(fn_pattern % (suite, arch_id[1]))
+                        writer = GzippedContentWriter(fn_pattern % (suite, arch_id[1]))
+                        try:
  
  
-                        cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (suite_id, udebtype_id, section_id))
+                            cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (suite_id, udebtype_id, section_id, arch_id))
  
  
-                        while True:
-                            r = cursor.fetchone()
-                            if not r:
-                                break
+                            while True:
+                                r = cursor.fetchone()
+                                if not r:
+                                    break
  
  
-                            filename, section, package, arch = r
+                                filename, section, package, arch = r
+                                writer.write(filename, section, package)
+                        finally:
+                            writer.close()
  
  
-                            if not file_writers.has_key( arch ):
-                                continue
  
  
-                            if arch == arch_all_id:
-                                ## its arch all, so all contents files get it
-                                for writer in file_writers.values():
-                                    writer.write(filename, section, package)
  
  
-                            else:
-                                file_writers[arch].write(filename, section, package)
-                    finally:
-                        # close all the files
-                        for writer in file_writers.values():
-                            writer.finish()
  
  
+    def generate(self):
+        """
+        Generate Contents-$arch.gz files for every available arch in each given suite.
+        """
+        session = DBConn().session()
  
  
+        arch_all_id = get_architecture("all", session).arch_id
  
  
-################################################################################
+        # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
+        # This is HORRIBLY debian specific :-/
+        for dtype, section, fn_pattern in \
+              [('deb',  None,                        "dists/%s/Contents-%s.gz"),
+               ('udeb', "debian-installer",          "dists/%s/Contents-udeb-%s.gz"),
+               ('udeb', "non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
+
+            overridetype = get_override_type(dtype, session)
+
+            # For udebs, we only look in certain sections (see the for loop above)
+            if section is not None:
+                section = get_section(section, session)
+
+            # Get our suites
+            for suite in which_suites():
+                # Which architectures do we need to work on
+                arch_list = get_suite_architectures(suite.suite_name, skipsrc=True, skipall=True, session=session)
+
+                # Set up our file writer dictionary
+                file_writers = {}
+                try:
+                    # One file writer per arch
+                    for arch in arch_list:
+                        file_writers[arch.arch_id] = GzippedContentWriter(fn_pattern % (suite, arch.arch_string))
+
+                    for r in get_suite_contents(suite, overridetype, section, session=session).fetchall():
+                        filename, section, package, arch_id = r
+
+                        if arch_id == arch_all_id:
+                            # It's arch all, so all contents files get it
+                            for writer in file_writers.values():
+                                writer.write(filename, section, package)
+                        else:
+                            if file_writers.has_key(arch_id):
+                                file_writers[arch_id].write(filename, section, package)
  
  
+                finally:
+                    # close all the files
+                    for writer in file_writers.values():
+                        writer.finish()
      def _suites(self):
          """
          return a list of suites to operate on
      def _suites(self):
          """
          return a list of suites to operate on
@@ -448,7 +650,8 @@ class Contents(object):
          if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
              suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
          else:
          if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
              suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
          else:
-            suites = Config().SubTree("Suite").List()
+            suites = [ 'unstable', 'testing' ]
+#            suites = Config().SubTree("Suite").List()
  
          return suites
  
  
          return suites
  
@@ -468,8 +671,8 @@ class Contents(object):
  
          return arch_list
  
  
          return arch_list
  
-################################################################################
  
  
+################################################################################
  
  def main():
      cnf = Config()
  
  def main():
      cnf = Config()
@@ -481,7 +684,7 @@ def main():
                  ]
  
      commands = {'generate' : Contents.generate,
                  ]
  
      commands = {'generate' : Contents.generate,
-                'bootstrap' : Contents.bootstrap,
+                'bootstrap_bin' : Contents.bootstrap_bin,
                  'cruft' : Contents.cruft,
                  }
  
                  'cruft' : Contents.cruft,
                  }
  
@@ -507,5 +710,17 @@ def main():
  
      commands[args[0]](Contents())
  
  
      commands[args[0]](Contents())
  
+def which_suites(session):
+    """
+    return a list of suites to operate on
+    """
+    if Config().has_key( "%s::%s" %(options_prefix,"Suite")):
+        suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")])
+    else:
+        suites = Config().SubTree("Suite").List()
+
+    return [get_suite(s.lower(), session) for s in suites]
+
+
  if __name__ == '__main__':
      main()
  if __name__ == '__main__':
      main()