]> git.decadent.org.uk Git - dak.git/blobdiff - dak/contents.py
i think contents are actually working on separate thread now?
[dak.git] / dak / contents.py
old mode 100644 (file)
new mode 100755 (executable)
index c7d47bd..be1e6d0
@@ -36,12 +36,14 @@ Create all the contents files
 
 import sys
 import os
 
 import sys
 import os
-import tempfile
 import logging
 import math
 import gzip
 import logging
 import math
 import gzip
+import threading
+import Queue
 import apt_pkg
 from daklib import utils
 import apt_pkg
 from daklib import utils
+from daklib.binary import Binary
 from daklib.config import Config
 from daklib.dbconn import DBConn
 ################################################################################
 from daklib.config import Config
 from daklib.dbconn import DBConn
 ################################################################################
@@ -71,9 +73,6 @@ OPTIONS
 
      -s, --suite={stable,testing,unstable,...}
         only operate on a single suite
 
      -s, --suite={stable,testing,unstable,...}
         only operate on a single suite
-
-     -a, --arch={i386,amd64}
-        only operate on a single architecture
 """
     sys.exit(exit_code)
 
 """
     sys.exit(exit_code)
 
@@ -88,19 +87,16 @@ log = logging.getLogger()
 
 ################################################################################
 
 
 ################################################################################
 
-# we unfortunately still have broken stuff in headers
-latin1_q = """SET CLIENT_ENCODING TO 'LATIN1'"""
-
 # get all the arches delivered for a given suite
 # this should probably exist somehere common
 # get all the arches delivered for a given suite
 # this should probably exist somehere common
-arches_q = """PREPARE arches_q as
+arches_q = """PREPARE arches_q(int) as
               SELECT s.architecture, a.arch_string
               FROM suite_architectures s
               JOIN architecture a ON (s.architecture=a.id)
                   WHERE suite = $1"""
 
 # find me the .deb for a given binary id
               SELECT s.architecture, a.arch_string
               FROM suite_architectures s
               JOIN architecture a ON (s.architecture=a.id)
                   WHERE suite = $1"""
 
 # find me the .deb for a given binary id
-debs_q = """PREPARE debs_q as
+debs_q = """PREPARE debs_q(int, int) as
               SELECT b.id, f.filename FROM bin_assoc_by_arch baa
               JOIN binaries b ON baa.bin=b.id
               JOIN files f ON b.file=f.id
               SELECT b.id, f.filename FROM bin_assoc_by_arch baa
               JOIN binaries b ON baa.bin=b.id
               JOIN files f ON b.file=f.id
@@ -108,61 +104,67 @@ debs_q = """PREPARE debs_q as
                   AND arch = $2"""
 
 # ask if we already have contents associated with this binary
                   AND arch = $2"""
 
 # ask if we already have contents associated with this binary
-olddeb_q = """PREPARE olddeb_q as
+olddeb_q = """PREPARE olddeb_q(int) as
               SELECT 1 FROM content_associations
               WHERE binary_pkg = $1
               LIMIT 1"""
 
 # find me all of the contents for a given .deb
               SELECT 1 FROM content_associations
               WHERE binary_pkg = $1
               LIMIT 1"""
 
 # find me all of the contents for a given .deb
-contents_q = """PREPARE contents_q as
-              SELECT (p.path||'/'||n.file) AS fn,
-                      comma_separated_list(s.section||'/'||b.package)
-              FROM content_associations c
-              JOIN content_file_paths p ON (c.filepath=p.id)
-              JOIN content_file_names n ON (c.filename=n.id)
-              JOIN binaries b ON (b.id=c.binary_pkg)
-              JOIN bin_associations ba ON (b.id=ba.bin)
-              JOIN override o ON (o.package=b.package)
-              JOIN section s ON (s.id=o.section)
-              WHERE (b.architecture = $1 OR b.architecture = $2)
-                  AND ba.suite = $3
-                  AND o.suite = $4
-                  AND b.type = 'deb'
-                  AND o.type = '7'
-              GROUP BY fn
-              ORDER BY fn"""
+contents_q = """PREPARE contents_q(int,int) as
+                SELECT (p.path||'/'||n.file) AS fn,
+                        s.section,
+                        b.package,
+                        b.architecture
+               FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
+               JOIN content_file_names n ON (c.filename=n.id)
+               JOIN binaries b ON (b.id=c.binary_pkg)
+               JOIN override o ON (o.package=b.package)
+               JOIN section s ON (s.id=o.section)
+               WHERE o.suite = $1 AND o.type = $2
+               AND b.type='deb'
+               ORDER BY fn"""
 
 # find me all of the contents for a given .udeb
 
 # find me all of the contents for a given .udeb
-udeb_contents_q = """PREPARE udeb_contents_q as
-              SELECT (p.path||'/'||n.file) as fn,
-                      comma_separated_list(s.section||'/'||b.package)
-              FROM content_associations c
-              JOIN content_file_paths p ON (c.filepath=p.id)
-              JOIN content_file_names n ON (c.filename=n.id)
-              JOIN binaries b ON (b.id=c.binary_pkg)
-              JOIN bin_associations ba ON (b.id=ba.bin)
-              JOIN override o ON (o.package=b.package)
-              JOIN section s ON (s.id=o.section)
-              WHERE s.id = $1
-                  AND ba.suite = $2
-                  AND o.suite = $3
-                  AND b.type = 'udeb'
-                  AND o.type = '8'
-              GROUP BY fn
-              ORDER BY fn"""
+udeb_contents_q = """PREPARE udeb_contents_q(int,int,int) as
+              SELECT (p.path||'/'||n.file) AS fn,
+                        s.section,
+                        b.package,
+                        b.architecture
+               FROM content_associations c join content_file_paths p ON (c.filepath=p.id)
+               JOIN content_file_names n ON (c.filename=n.id)
+               JOIN binaries b ON (b.id=c.binary_pkg)
+               JOIN override o ON (o.package=b.package)
+               JOIN section s ON (s.id=o.section)
+               WHERE o.suite = $1 AND o.type = $2
+               AND s.id = $3
+               AND b.type='udeb'
+               ORDER BY fn"""
+
+#               FROM content_file_paths p join content_associations c ON (c.filepath=p.id)
+#               JOIN content_file_names n ON (c.filename=n.id)
+#               JOIN binaries b ON (b.id=c.binary_pkg)
+#               JOIN override o ON (o.package=b.package)
+#               JOIN section s ON (s.id=o.section)
+#               WHERE o.suite = $1 AND o.type = $2
+#               AND s.id = $3
+#               AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='udeb')
+#               GROUP BY fn
+#               ORDER BY fn;"""
+
+
 
 # clear out all of the temporarily stored content associations
 # this should be run only after p-a has run.  after a p-a
 # run we should have either accepted or rejected every package
 # so there should no longer be anything in the queue
 
 # clear out all of the temporarily stored content associations
 # this should be run only after p-a has run.  after a p-a
 # run we should have either accepted or rejected every package
 # so there should no longer be anything in the queue
-remove_temp_contents_cruft_q = """DELETE FROM temp_content_associations"""
+remove_pending_contents_cruft_q = """DELETE FROM pending_content_associations"""
 
 # delete any filenames we are storing which have no binary associated with them
 remove_filename_cruft_q = """DELETE FROM content_file_names
                              WHERE id IN (SELECT cfn.id FROM content_file_names cfn
                                           LEFT JOIN content_associations ca
                                             ON ca.filename=cfn.id
 
 # delete any filenames we are storing which have no binary associated with them
 remove_filename_cruft_q = """DELETE FROM content_file_names
                              WHERE id IN (SELECT cfn.id FROM content_file_names cfn
                                           LEFT JOIN content_associations ca
                                             ON ca.filename=cfn.id
-                                          WHERE ca.id IS NULL)""" );
+                                          WHERE ca.id IS NULL)"""
 
 # delete any paths we are storing which have no binary associated with them
 remove_filepath_cruft_q = """DELETE FROM content_file_paths
 
 # delete any paths we are storing which have no binary associated with them
 remove_filepath_cruft_q = """DELETE FROM content_file_paths
@@ -170,16 +172,85 @@ remove_filepath_cruft_q = """DELETE FROM content_file_paths
                                           LEFT JOIN content_associations ca
                                              ON ca.filepath=cfn.id
                                           WHERE ca.id IS NULL)"""
                                           LEFT JOIN content_associations ca
                                              ON ca.filepath=cfn.id
                                           WHERE ca.id IS NULL)"""
-class Contents(object):
+
+class EndOfContents(object):
     """
     """
-    Class capable of generating Contents-$arch.gz files
+    A sentry object for the end of the filename stream
+    """
+    pass
 
 
-    Usage GenerateContents().generateContents( ["main","contrib","non-free"] )
+class GzippedContentWriter(object):
+    """
+    An object which will write contents out to a Contents-$arch.gz
+    file on a separate thread
     """
 
     """
 
-    def __init__(self):
-        self.header = None
+    header = None # a class object holding the header section of contents file
+
+    def __init__(self, filename):
+        """
+        @ptype filename: string
+        @param filename: the name of the file to write to
+        """
+        self.queue = Queue.Queue()
+        self.current_file = None
+        self.first_package = True
+        self.output = self.open_file(filename)
+        self.thread = threading.Thread(target=self.write_thread,
+                                       name='Contents writer')
+        self.thread.start()
+
+    def open_file(self, filename):
+        """
+        opens a gzip stream to the contents file
+        """
+        filepath = Config()["Contents::Root"] + filename
+        filedir = os.path.dirname(filepath)
+        if not os.path.isdir(filedir):
+            os.makedirs(filedir)
+        return gzip.open(filepath, "w")
+
+    def write(self, filename, section, package):
+        """
+        enqueue content to be written to the file on a separate thread
+        """
+        self.queue.put((filename,section,package))
+
+    def write_thread(self):
+        """
+        the target of a Thread which will do the actual writing
+        """
+        while True:
+            next = self.queue.get()
+            if isinstance(next, EndOfContents):
+                self.output.write('\n')
+                self.output.close()
+                break
+
+            (filename,section,package)=next
+            if next != self.current_file:
+                # this is the first file, so write the header first
+                if not self.current_file:
+                    self.output.write(self._getHeader())
+
+                self.output.write('\n%s\t' % filename)
+                self.first_package = True
+
+            self.current_file=filename
+
+            if not self.first_package:
+                self.output.write(',')
+            else:
+                self.first_package=False
+            self.output.write('%s/%s' % (section,package))
 
 
+    def finish(self):
+        """
+        enqueue the sentry object so that writers will know to terminate
+        """
+        self.queue.put(EndOfContents())
+
+    @classmethod
     def _getHeader(self):
         """
         Internal method to return the header for Contents.gz files
     def _getHeader(self):
         """
         Internal method to return the header for Contents.gz files
@@ -187,50 +258,38 @@ class Contents(object):
         This is boilerplate which explains the contents of the file and how
         it can be used.
         """
         This is boilerplate which explains the contents of the file and how
         it can be used.
         """
-        if self.header == None:
+        if not GzippedContentWriter.header:
             if Config().has_key("Contents::Header"):
                 try:
                     h = open(os.path.join( Config()["Dir::Templates"],
                                            Config()["Contents::Header"] ), "r")
             if Config().has_key("Contents::Header"):
                 try:
                     h = open(os.path.join( Config()["Dir::Templates"],
                                            Config()["Contents::Header"] ), "r")
-                    self.header = h.read()
-                    print( "header: %s" % self.header )
+                    GzippedContentWriter.header = h.read()
                     h.close()
                 except:
                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
                                                                       traceback.format_exc() ))
                     h.close()
                 except:
                     log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"],
                                                                       traceback.format_exc() ))
-                    self.header = False
+                    GzippedContentWriter.header = None
             else:
             else:
-                print( "no header" )
-                self.header = False
+                GzippedContentWriter.header = None
 
 
-        return self.header
+        return GzippedContentWriter.header
 
 
-    # goal column for section column
-    _goal_column = 54
 
 
-    def _write_content_file(self, cursor, filename):
-        """
-        Internal method for writing all the results to a given file.
-        The cursor should have a result set generated from a query already.
-        """
-        f = gzip.open(Config()["Dir::Root"] + filename, "w")
-        try:
-            header = self._getHeader()
+class Contents(object):
+    """
+    Class capable of generating Contents-$arch.gz files
 
 
-            if header:
-                f.write(header)
+    Usage GenerateContents().generateContents( ["main","contrib","non-free"] )
+    """
 
 
-            while True:
-                contents = cursor.fetchone()
-                if not contents:
-                    return
+    def __init__(self):
+        self.header = None
 
 
-                num_tabs = max(1,
-                               int( math.ceil( (self._goal_column - len(contents[0])) / 8) ) )
-                f.write(contents[0] + ( '\t' * num_tabs ) + contents[-1] + "\n")
+    def reject(self, message):
+        log.error("E: %s" % message)
 
 
-        finally:
-            f.close()
+    # goal column for section column
+    _goal_column = 54
 
     def cruft(self):
         """
 
     def cruft(self):
         """
@@ -239,7 +298,7 @@ class Contents(object):
         """
         cursor = DBConn().cursor();
         cursor.execute( "BEGIN WORK" )
         """
         cursor = DBConn().cursor();
         cursor.execute( "BEGIN WORK" )
-        cursor.execute( remove_temp_contents_cruft_q )
+        cursor.execute( remove_pending_contents_cruft_q )
         cursor.execute( remove_filename_cruft_q )
         cursor.execute( remove_filepath_cruft_q )
         cursor.execute( "COMMIT" )
         cursor.execute( remove_filename_cruft_q )
         cursor.execute( remove_filepath_cruft_q )
         cursor.execute( "COMMIT" )
@@ -252,10 +311,9 @@ class Contents(object):
         pooldir = Config()[ 'Dir::Pool' ]
 
         cursor = DBConn().cursor();
         pooldir = Config()[ 'Dir::Pool' ]
 
         cursor = DBConn().cursor();
-        cursor.execute( latin1_q )
-        cursor.execute( debs_q )
-        cursor.execute( olddeb_q )
-        cursor.execute( arches_q )
+        DBConn().prepare("debs_q",debs_q)
+        DBConn().prepare("olddeb_q",olddeb_q)
+        DBConn().prepare("arches_q",arches_q)
 
         suites = self._suites()
         for suite in [i.lower() for i in suites]:
 
         suites = self._suites()
         for suite in [i.lower() for i in suites]:
@@ -266,60 +324,119 @@ class Contents(object):
             for arch_id in arch_list:
                 cursor.execute( "EXECUTE debs_q(%d, %d)" % ( suite_id, arch_id[0] ) )
 
             for arch_id in arch_list:
                 cursor.execute( "EXECUTE debs_q(%d, %d)" % ( suite_id, arch_id[0] ) )
 
-                debs = cursor.fetchall()
                 count = 0
                 count = 0
-                for deb in debs:
+                while True:
+                    deb = cursor.fetchone()
+                    if not deb:
+                        break
                     count += 1
                     count += 1
-                    cursor.execute( "EXECUTE olddeb_q(%d)" % (deb[0] ) )
-                    old = cursor.fetchone()
+                    cursor1 = DBConn().cursor();
+                    cursor1.execute( "EXECUTE olddeb_q(%d)" % (deb[0] ) )
+                    old = cursor1.fetchone()
                     if old:
                     if old:
-                        log.debug( "already imported: %s" % deb[1] )
+                        log.debug( "already imported: %s" % (deb[1]) )
                     else:
                     else:
+                        log.debug( "scanning: %s" % (deb[1]) )
                         debfile = os.path.join( pooldir, deb[1] )
                         if os.path.exists( debfile ):
                         debfile = os.path.join( pooldir, deb[1] )
                         if os.path.exists( debfile ):
-                            contents = utils.generate_contents_information( debfile )
-                            DBConn().insert_content_paths(deb[0], contents)
-                            log.info( "imported (%d/%d): %s" % (count,len(debs),deb[1] ) )
+                            Binary(debfile, self.reject).scan_package(deb[0],True)
                         else:
                         else:
-                            log.error( "missing .deb: %s" % deb[1] )
+                            log.error("missing .deb: %s" % deb[1])
 
     def generate(self):
         """
 
     def generate(self):
         """
-        Generate Contents-$arch.gz files for every aviailable arch in each given suite.
+        Generate Contents-$arch.gz files for every available arch in each given suite.
         """
         """
-        cursor = DBConn().cursor();
+        cursor = DBConn().cursor()
+
+        DBConn().prepare("arches_q", arches_q)
+        DBConn().prepare("contents_q", contents_q)
+        DBConn().prepare("udeb_contents_q", udeb_contents_q)
 
 
-        cursor.execute( arches_q )
-        cursor.execute( contents_q )
-        cursor.execute( udeb_contents_q )
+        debtype_id=DBConn().get_override_type_id("deb")
+        udebtype_id=DBConn().get_override_type_id("udeb")
 
 
+        arch_all_id = DBConn().get_architecture_id("all")
         suites = self._suites()
 
         suites = self._suites()
 
+
         # Get our suites, and the architectures
         for suite in [i.lower() for i in suites]:
             suite_id = DBConn().get_suite_id(suite)
             arch_list = self._arches(cursor, suite_id)
 
         # Get our suites, and the architectures
         for suite in [i.lower() for i in suites]:
             suite_id = DBConn().get_suite_id(suite)
             arch_list = self._arches(cursor, suite_id)
 
-            arch_all_id = DBConn().get_architecture_id("all")
+            file_writers = {}
+
+            try:
+                for arch_id in arch_list:
+                    file_writers[arch_id[0]] = GzippedContentWriter("dists/%s/Contents-%s.gz" % (suite, arch_id[1]))
+
+                cursor.execute("EXECUTE contents_q(%d,%d);" % (suite_id, debtype_id))
+
+                while True:
+                    r = cursor.fetchone()
+                    if not r:
+                        break
+
+                    filename, section, package, arch = r
+
+                    if arch == arch_all_id:
+                        ## its arch all, so all contents files get it
+                        for writer in file_writers.values():
+                            writer.write(filename, section, package)
+
+                    else:
+                        file_writers[arch].write(filename, section, package)
+
+            finally:
+                # close all the files
+                for writer in file_writers.values():
+                    writer.finish()
 
 
-            for arch_id in arch_list:
-                cursor.execute( "EXECUTE contents_q(%d,%d,%d,%d)" % (arch_id[0], arch_all_id, suite_id, suite_id))
-                self._write_content_file(cursor, "dists/%s/Contents-%s.gz" % (suite, arch_id[1]))
 
             # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
             # This is HORRIBLY debian specific :-/
 
             # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free)
             # This is HORRIBLY debian specific :-/
-            # First off, udeb
-            section_id = DBConn().get_section_id('debian-installer') # all udebs should be here)
+        for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s.gz"),
+                                    ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]:
+
+            section_id = DBConn().get_section_id(section) # all udebs should be here)
             if section_id != -1:
             if section_id != -1:
-                cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (section_id, suite_id, suite_id))
-                self._write_content_file(cursor, "dists/%s/Contents-udeb.gz" % suite)
 
 
-            # Once more, with non-free
-            section_id = DBConn().get_section_id('non-free/debian-installer') # all udebs should be here)
+                # Get our suites, and the architectures
+                for suite in [i.lower() for i in suites]:
+                    suite_id = DBConn().get_suite_id(suite)
+                    arch_list = self._arches(cursor, suite_id)
+
+                    file_writers = {}
+
+                    try:
+                        for arch_id in arch_list:
+                            file_writers[arch_id[0]] = GzippedContentWriter(fn_pattern % (suite, arch_id[1]))
+
+                        cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (suite_id, udebtype_id, section_id))
+
+                        while True:
+                            r = cursor.fetchone()
+                            if not r:
+                                break
+
+                            filename, section, package, arch = r
+
+                            if not file_writers.has_key( arch ):
+                                continue
+
+                            if arch == arch_all_id:
+                                ## its arch all, so all contents files get it
+                                for writer in file_writers.values():
+                                    writer.write(filename, section, package)
+
+                            else:
+                                file_writers[arch].write(filename, section, package)
+                    finally:
+                        # close all the files
+                        for writer in file_writers.values():
+                            writer.finish()
 
 
-            if section_id != -1:
-                cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d)" % (section_id, suite_id, suite_id))
-                self._write_content_file(cursor, "dists/%s/Contents-udeb-nf.gz" % suite)
 
 
 ################################################################################
 
 
 ################################################################################
@@ -339,25 +456,21 @@ class Contents(object):
         """
         return a list of archs to operate on
         """
         """
         return a list of archs to operate on
         """
-        arch_list = [ ]
-        if Config().has_key( "%s::%s" %(options_prefix,"Arch")):
-            archs = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Arch")])
-            for arch_name in archs:
-                arch_list.append((DBConn().get_architecture_id(arch_name), arch_name))
-        else:
-            cursor.execute("EXECUTE arches_q(%d)" % (suite))
-            while True:
-                r = cursor.fetchone()
-                if not r:
-                    break
+        arch_list = []
+        cursor.execute("EXECUTE arches_q(%d)" % (suite))
+        while True:
+            r = cursor.fetchone()
+            if not r:
+                break
 
 
-                if r[1] != "source" and r[1] != "all":
-                    arch_list.append((r[0], r[1]))
+            if r[1] != "source" and r[1] != "all":
+                arch_list.append((r[0], r[1]))
 
         return arch_list
 
 ################################################################################
 
 
         return arch_list
 
 ################################################################################
 
+
 def main():
     cnf = Config()
 
 def main():
     cnf = Config()
 
@@ -365,7 +478,6 @@ def main():
                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
                  ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"),
                  ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")),
                  ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")),
-                 ('a',"arch", "%s::%s" % (options_prefix,"Arch"),"HasArg"),
                 ]
 
     commands = {'generate' : Contents.generate,
                 ]
 
     commands = {'generate' : Contents.generate,
@@ -373,6 +485,14 @@ def main():
                 'cruft' : Contents.cruft,
                 }
 
                 'cruft' : Contents.cruft,
                 }
 
+    args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
+
+    if (len(args) < 1) or not commands.has_key(args[0]):
+        usage()
+
+    if cnf.has_key("%s::%s" % (options_prefix,"Help")):
+        usage()
+
     level=logging.INFO
     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
         level=logging.ERROR
     level=logging.INFO
     if cnf.has_key("%s::%s" % (options_prefix,"Quiet")):
         level=logging.ERROR
@@ -385,14 +505,6 @@ def main():
                          format='%(asctime)s %(levelname)s %(message)s',
                          stream = sys.stderr )
 
                          format='%(asctime)s %(levelname)s %(message)s',
                          stream = sys.stderr )
 
-    args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv)
-
-    if (len(args) < 1) or not commands.has_key(args[0]):
-        usage()
-
-    if cnf.has_key("%s::%s" % (options_prefix,"Help")):
-        usage()
-
     commands[args[0]](Contents())
 
 if __name__ == '__main__':
     commands[args[0]](Contents())
 
 if __name__ == '__main__':