]> git.decadent.org.uk Git - dak.git/commitdiff
generate_releases: reduce number of sql queries for by-hash files
authorJulien Cristau <jcristau@debian.org>
Wed, 18 May 2016 13:45:57 +0000 (15:45 +0200)
committerJulien Cristau <jcristau@debian.org>
Wed, 18 May 2016 13:55:15 +0000 (15:55 +0200)
Instead of doing two queries per filename and per hash function, store a
set of known hashfiles up front and group the UPDATEs in a single query.

Suggested by Colin Watson.

Signed-off-by: Julien Cristau <jcristau@debian.org>
dak/generate_releases.py

index ae3577f3db998d17e7cad6837e58673aa113bcc8..dc46b24b54a09ce235d7531b5bf9e65b3badfec1 100755 (executable)
@@ -362,37 +362,49 @@ class ReleaseWriter(object):
         out.close()
         os.rename(outfile + '.new', outfile)
 
+        # Mark all by-hash files as obsolete.  We will undo that for the ones
+        # we still reference later.
         query = """
             UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
             WHERE suite_id = :id AND unreferenced IS NULL"""
         session.execute(query, {'id': suite.suite_id})
 
         if suite.byhash:
+            query = "SELECT path FROM hashfile WHERE suite_id = :id"
+            q = session.execute(query, {'id': suite.suite_id})
+            known_hashfiles = set(row[0] for row in q)
+            updated = []
+            new = []
+
+            # Update the hashfile table with new or updated files
             for filename in fileinfo:
                 if not os.path.exists(filename):
                     # probably an uncompressed index we didn't generate
                     continue
-
+                byhashdir = os.path.join(os.path.dirname(filename), 'by-hash')
                 for h in hashes:
                     field = h.release_field
-                    hashfile = os.path.join(os.path.dirname(filename), 'by-hash', field, fileinfo[filename][field])
-                    query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id"
-                    q = session.execute(
-                            query,
-                            {'p': hashfile, 'id': suite.suite_id})
-                    if q.rowcount:
-                        session.execute('''
-                            UPDATE hashfile SET unreferenced = NULL
-                            WHERE path = :p and suite_id = :id''',
-                            {'p': hashfile, 'id': suite.suite_id})
+                    hashfile = os.path.join(byhashdir, field, fileinfo[filename][field])
+                    if hashfile in known_hashfiles:
+                        updated.append(hashfile)
                     else:
-                        session.execute('''
-                            INSERT INTO hashfile (path, suite_id)
-                            VALUES (:p, :id)''',
-                            {'p': hashfile, 'id': suite.suite_id})
+                        new.append(hashfile)
+
+            if updated:
+                session.execute("""
+                    UPDATE hashfile SET unreferenced = NULL
+                    WHERE path = ANY(:p) AND suite_id = :id""",
+                    {'p': updated, 'id': suite.suite_id})
+            if new:
+                session.execute("""
+                    INSERT INTO hashfile (path, suite_id)
+                    VALUES (:p, :id)""",
+                    [{'p': hashfile, 'id': suite.suite_id} for hashfile in new])
+
         session.commit()
 
         if suite.byhash:
+            # Create hardlinks in by-hash directories
             for filename in fileinfo:
                 if not os.path.exists(filename):
                     # probably an uncompressed index we didn't generate