]> git.decadent.org.uk Git - dak.git/commitdiff
Rewrite auto-decruft to group removals
authorNiels Thykier <niels@thykier.net>
Mon, 8 Jun 2015 06:24:13 +0000 (08:24 +0200)
committerNiels Thykier <niels@thykier.net>
Mon, 8 Jun 2015 18:38:24 +0000 (20:38 +0200)
Signed-off-by: Niels Thykier <niels@thykier.net>
dak/auto_decruft.py

index a97982058dccb8438f49c04bcd0b0480ef1491e0..78bc5a283c92e66180e318615d330e8f11764d50 100644 (file)
@@ -35,6 +35,8 @@ Check for obsolete binary packages
 
 import sys
 import apt_pkg
+from itertools import chain, product
+from collections import defaultdict
 
 from daklib.config import Config
 from daklib.dbconn import *
@@ -44,6 +46,7 @@ from daklib.rm import remove, ReverseDependencyChecker
 
 ################################################################################
 
+
 def usage(exit_code=0):
     print """Usage: dak cruft-report
 Check for obsolete or duplicated packages.
@@ -55,8 +58,87 @@ Check for obsolete or duplicated packages.
 
 ################################################################################
 
-def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug):
-    """Remove binaries without a source
+
+def compute_sourceless_groups(suite_id, session):
+    """Find binaries without a source
+
+    @type suite_id: int
+    @param suite_id: The id of the suite donated by suite_name
+
+    @type session: SQLA Session
+    @param session: The database session in use
+    """""
+    rows = query_without_source(suite_id, session)
+    message = '[auto-cruft] no longer built from source, no reverse dependencies'
+    arch_all_id_tuple = tuple([get_architecture('all', session=session)])
+    arch_all_list = ["all"]
+    for row in rows:
+        package = row[0]
+        group_info = {
+            "name": "sourceless:%s" % package,
+            "packages": tuple([package]),
+            "architectures": arch_all_list,
+            "architecture_ids": arch_all_id_tuple,
+            "message": message,
+            "removal_request": {
+                package: arch_all_list,
+            },
+        }
+        yield group_info
+
+
+def compute_nbs_groups(suite_id, suite_name, session):
+    """Find binaries no longer built
+
+    @type suite_id: int
+    @param suite_id: The id of the suite donated by suite_name
+
+    @type suite_name: string
+    @param suite_name: The name of the suite to remove from
+
+    @type session: SQLA Session
+    @param session: The database session in use
+    """""
+    rows = queryNBS(suite_id, session)
+    arch2ids = dict((a.arch_string, a.arch_id) for a in get_suite_architectures(suite_name))
+
+    for row in rows:
+        (pkg_list, arch_list, source, _) = row
+        message = '[auto-cruft] NBS (no longer built by %s, no reverse dependencies)' % source
+        removal_request = dict((pkg, arch_list) for pkg in pkg_list)
+        group_info = {
+            "name": "NBS:%s" % source,
+            "packages": tuple(sorted(pkg_list)),
+            "architectures": sorted(arch_list, cmp=utils.arch_compare_sw),
+            "architecture_ids": tuple(arch2ids[arch] for arch in arch_list),
+            "message": message,
+            "removal_request": removal_request,
+        }
+        yield group_info
+
+
+def remove_groups(groups, suite_id, suite_name, session):
+    for group in groups:
+        message = group["message"]
+        params = {
+            "architecture_ids": group["architecture_ids"],
+            "packages": group["packages"],
+            "suite_id": suite_id
+        }
+        q = session.execute("""
+            SELECT b.package, b.version, a.arch_string, b.id
+            FROM binaries b
+                JOIN bin_associations ba ON b.id = ba.bin
+                JOIN architecture a ON b.architecture = a.id
+                JOIN suite su ON ba.suite = su.id
+            WHERE a.id IN :architecture_ids AND b.package IN :packages AND su.id = :suite_id
+            """, params)
+
+        remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter")
+
+
+def auto_decruft_suite(suite_name, suite_id, session, dryrun, debug):
+    """Run the auto-decrufter on a given suite
 
     @type suite_name: string
     @param suite_name: The name of the suite to remove from
@@ -72,27 +154,51 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug):
 
     @type debug: bool
     @param debug: If True, print some extra information
-    """""
-    global Options
-    rows = query_without_source(suite_id, session)
-    arch_all_id = get_architecture('all', session=session)
-    discarded_removal = set()
-
-    message = '[auto-cruft] no longer built from source, no reverse dependencies'
-    all_packages = dict((row[0], None) for row in rows)
-    if not all_packages:
+    """
+    all_architectures = [a.arch_string for a in get_suite_architectures(suite_name)]
+    pkg_arch2groups = defaultdict(set)
+    group_order = []
+    groups = {}
+    full_removal_request = []
+    group_generator = chain(
+        compute_sourceless_groups(suite_id, session),
+        compute_nbs_groups(suite_id, suite_name, session)
+    )
+    for group in group_generator:
+        group_name = group["name"]
+        pkgs = group["packages"]
+        affected_archs = group["architectures"]
+        removal_request = group["removal_request"]
+        # If we remove an arch:all package, then the breakage can occur on any
+        # of the architectures.
+        if "all" in affected_archs:
+            affected_archs = all_architectures
+        for pkg_arch in product(pkgs, affected_archs):
+            pkg_arch2groups[pkg_arch].add(group_name)
+        groups[group_name] = group
+        group_order.append(group_name)
+
+        full_removal_request.extend(removal_request.iteritems())
+
+    if not groups:
         if debug:
             print "N: Found no candidates"
         return
     if debug:
-        print "N: Considering to remove %s" % str(sorted(all_packages.iterkeys()))
+        print "N: Considering to remove the following packages:"
+        for group_name in sorted(groups):
+            group_info = groups[group_name]
+            pkgs = group_info["packages"]
+            archs = group_info["architectures"]
+            print "N: * %s: %s [%s]" % (group_name, ", ".join(pkgs), " ".join(archs))
+
     if debug:
         print "N: Compiling ReverseDependencyChecker (RDC) - please hold ..."
-
     rdc = ReverseDependencyChecker(session, suite_name)
     if debug:
         print "N: Computing initial breakage..."
-    breakage = rdc.check_reverse_depends(all_packages)
+
+    breakage = rdc.check_reverse_depends(full_removal_request)
     while breakage:
         by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage]
         by_breakers.sort(reverse=True)
@@ -104,112 +210,74 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug):
                 broken_str = ", ".join("%s/%s" % b for b in sorted(broken))
                 print "N:    * %s => %s" % (bname, broken_str)
 
-        _, worst_package_arch, worst_breakage = by_breakers.pop(0)
-        averted_breakage = set(worst_breakage)
-        del all_packages[worst_package_arch[0]]
-        discarded_removal.add(worst_package_arch[0])
-        if debug:
-            print "N: - skip removal of %s (due to %s)" % (worst_package_arch[0], sorted(averted_breakage))
+        averted_breakage = set()
+
         for _, package_arch, breakage in by_breakers:
-            package = package_arch[0]
             if breakage <= averted_breakage:
                 # We already avoided this break
                 continue
-            if package in discarded_removal:
-                averted_breakage |= breakage
-                continue
+            guilty_groups = pkg_arch2groups[package_arch]
+
+            if not guilty_groups:
+                utils.fubar("Cannot figure what group provided %s" % str(package_arch))
+
             if debug:
-                print "N: - skip removal of %s (due to %s)" % (
-                    package, str(sorted(breakage - averted_breakage)))
-            discarded_removal.add(package)
+                # Only output it, if it truly a new group being discarded
+                # - a group can reach this part multiple times, if it breaks things on
+                #   more than one architecture.  This being rather common in fact.
+                already_discard = True
+                if any(group_name for group_name in guilty_groups if group_name in groups):
+                    already_discard = False
+
+                if not already_discard:
+                    avoided = sorted(breakage - averted_breakage)
+                    print "N: - skipping removal of %s (breakage: %s)" % (", ".join(sorted(guilty_groups)), str(avoided))
+
             averted_breakage |= breakage
-            del all_packages[package]
+            for group_name in guilty_groups:
+                if group_name in groups:
+                    del groups[group_name]
 
-        if not all_packages:
+        if not groups:
             if debug:
                 print "N: Nothing left to remove"
             return
 
         if debug:
-            print "N: Now considering to remove %s" % str(sorted(all_packages.iterkeys()))
-        breakage = rdc.check_reverse_depends(all_packages)
+            print "N: Now considering to remove: %s" % str(", ".join(sorted(groups.iterkeys())))
+
+        # Rebuild the removal request with the remaining groups and off
+        # we go to (not) break the world once more time
+        full_removal_request =  []
+        for group_info in groups.itervalues():
+            full_removal_request.extend(group_info["removal_request"].iteritems())
+        breakage = rdc.check_reverse_depends(full_removal_request)
 
     if debug:
         print "N: Removal looks good"
 
     if dryrun:
-        # Embed the -R just in case someone wants to run it manually later
-        print 'Would do:    dak rm -m "%s" -s %s -a all -p -R -b %s' % \
-              (message, suite_name, " ".join(sorted(all_packages)))
-    else:
-        params = {
-            arch_all_id: arch_all_id,
-            all_packages: tuple(all_packages),
-            suite_id: suite_id
-        }
-        q = session.execute("""
-        SELECT b.package, b.version, a.arch_string, b.id
-        FROM binaries b
-            JOIN bin_associations ba ON b.id = ba.bin
-            JOIN architecture a ON b.architecture = a.id
-            JOIN suite su ON ba.suite = su.id
-        WHERE a.id = :arch_all_id AND b.package IN :all_packages AND su.id = :suite_id
-        """, params)
-        remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter")
-
-
-
-
-
-def removeNBS(suite_name, suite_id, session, dryrun):
-    """Remove binaries no longer built
-
-    @type suite_name: string
-    @param suite_name: The name of the suite to remove from
-
-    @type suite_id: int
-    @param suite_id: The id of the suite donated by suite_name
-
-    @type session: SQLA Session
-    @param session: The database session in use
-
-    @type dryrun: bool
-    @param dryrun: If True, just print the actions rather than actually doing them
-    """""
-    global Options
-    rows = queryNBS(suite_id, session)
-    arch2ids = {}
-    for row in rows:
-        (pkg_list, arch_list, source, _) = row
-        if utils.check_reverse_depends(pkg_list, suite_name, arch_list, session, cruft=True, quiet=True):
-            continue
-        arch_string = ','.join(arch_list)
-        message = '[auto-cruft] NBS (no longer built by %s, no reverse dependencies)' % source
+        print "Would remove the equivalent of:"
+        for group_name in group_order:
+            if group_name not in groups:
+                continue
+            group_info = groups[group_name]
+            pkgs = group_info["packages"]
+            archs = group_info["architectures"]
+            message = group_info["message"]
 
-        if dryrun:
             # Embed the -R just in case someone wants to run it manually later
-            pkg_string = ' '.join(pkg_list)
-            print 'Would do:    dak rm -m "%s" -s %s -a %s -p -R -b %s' % \
-                  (message, suite_name, arch_string, pkg_string)
-        else:
-            for architecture in arch_list:
-                if architecture in arch2ids:
-                    arch2ids[architecture] = utils.get_architecture(architecture, session=session)
-            arch_ids = tuple(arch2ids[architecture] for architecture in arch_list)
-            params = {
-                suite_id: suite_id,
-                arch_ids: arch2ids,
-                pkg_list: tuple(pkg_list),
-            }
-            q = session.execute("""
-            SELECT b.package, b.version, a.arch_string, b.id
-            FROM binaries b
-                JOIN bin_associations ba ON b.id = ba.bin
-                JOIN architecture a ON b.architecture = a.id
-                JOIN suite su ON ba.suite = su.id
-            WHERE a.id IN :arch_ids AND b.package IN :pkg_db_set AND su.id = :suite_id
-            """, params)
-            remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter")
+            print '    dak rm -m "{message}" -s {suite} -a {architectures} -p -R -b {packages}'.format(
+                message=message, suite=suite_name,
+                architectures=",".join(archs), packages=" ".join(pkgs),
+            )
+
+        print
+        print "Note: The removals may be interdependent.  A non-breaking result may require the execution of all"
+        print "of the removals"
+    else:
+        remove_groups(groups.itervalues(), suite_id, suite_name, session)
+
 
 ################################################################################
 
@@ -249,8 +317,7 @@ def main ():
     suite_id = suite.suite_id
     suite_name = suite.suite_name.lower()
 
-    remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug)
-    #removeNBS(suite_name, suite_id, session, dryrun)
+    auto_decruft_suite(suite_name, suite_id, session, dryrun, debug)
 
 ################################################################################