From 7d548bdf9a43db4d42bf5623f2acc6a7c2cc727d Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Mon, 8 Jun 2015 08:24:13 +0200 Subject: [PATCH] Rewrite auto-decruft to group removals Signed-off-by: Niels Thykier --- dak/auto_decruft.py | 273 +++++++++++++++++++++++++++----------------- 1 file changed, 170 insertions(+), 103 deletions(-) diff --git a/dak/auto_decruft.py b/dak/auto_decruft.py index a9798205..78bc5a28 100644 --- a/dak/auto_decruft.py +++ b/dak/auto_decruft.py @@ -35,6 +35,8 @@ Check for obsolete binary packages import sys import apt_pkg +from itertools import chain, product +from collections import defaultdict from daklib.config import Config from daklib.dbconn import * @@ -44,6 +46,7 @@ from daklib.rm import remove, ReverseDependencyChecker ################################################################################ + def usage(exit_code=0): print """Usage: dak cruft-report Check for obsolete or duplicated packages. @@ -55,8 +58,87 @@ Check for obsolete or duplicated packages. ################################################################################ -def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug): - """Remove binaries without a source + +def compute_sourceless_groups(suite_id, session): + """Find binaries without a source + + @type suite_id: int + @param suite_id: The id of the suite donated by suite_name + + @type session: SQLA Session + @param session: The database session in use + """"" + rows = query_without_source(suite_id, session) + message = '[auto-cruft] no longer built from source, no reverse dependencies' + arch_all_id_tuple = tuple([get_architecture('all', session=session)]) + arch_all_list = ["all"] + for row in rows: + package = row[0] + group_info = { + "name": "sourceless:%s" % package, + "packages": tuple([package]), + "architectures": arch_all_list, + "architecture_ids": arch_all_id_tuple, + "message": message, + "removal_request": { + package: arch_all_list, + }, + } + yield group_info + + +def compute_nbs_groups(suite_id, suite_name, session): + """Find binaries no longer built + + @type suite_id: int + @param suite_id: The id of the suite donated by suite_name + + @type suite_name: string + @param suite_name: The name of the suite to remove from + + @type session: SQLA Session + @param session: The database session in use + """"" + rows = queryNBS(suite_id, session) + arch2ids = dict((a.arch_string, a.arch_id) for a in get_suite_architectures(suite_name)) + + for row in rows: + (pkg_list, arch_list, source, _) = row + message = '[auto-cruft] NBS (no longer built by %s, no reverse dependencies)' % source + removal_request = dict((pkg, arch_list) for pkg in pkg_list) + group_info = { + "name": "NBS:%s" % source, + "packages": tuple(sorted(pkg_list)), + "architectures": sorted(arch_list, cmp=utils.arch_compare_sw), + "architecture_ids": tuple(arch2ids[arch] for arch in arch_list), + "message": message, + "removal_request": removal_request, + } + yield group_info + + +def remove_groups(groups, suite_id, suite_name, session): + for group in groups: + message = group["message"] + params = { + "architecture_ids": group["architecture_ids"], + "packages": group["packages"], + "suite_id": suite_id + } + q = session.execute(""" + SELECT b.package, b.version, a.arch_string, b.id + FROM binaries b + JOIN bin_associations ba ON b.id = ba.bin + JOIN architecture a ON b.architecture = a.id + JOIN suite su ON ba.suite = su.id + WHERE a.id IN :architecture_ids AND b.package IN :packages AND su.id = :suite_id + """, params) + + remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter") + + +def auto_decruft_suite(suite_name, suite_id, session, dryrun, debug): + """Run the auto-decrufter on a given suite @type suite_name: string @param suite_name: The name of the suite to remove from @@ -72,27 +154,51 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug): @type debug: bool @param debug: If True, print some extra information - """"" - global Options - rows = query_without_source(suite_id, session) - arch_all_id = get_architecture('all', session=session) - discarded_removal = set() - - message = '[auto-cruft] no longer built from source, no reverse dependencies' - all_packages = dict((row[0], None) for row in rows) - if not all_packages: + """ + all_architectures = [a.arch_string for a in get_suite_architectures(suite_name)] + pkg_arch2groups = defaultdict(set) + group_order = [] + groups = {} + full_removal_request = [] + group_generator = chain( + compute_sourceless_groups(suite_id, session), + compute_nbs_groups(suite_id, suite_name, session) + ) + for group in group_generator: + group_name = group["name"] + pkgs = group["packages"] + affected_archs = group["architectures"] + removal_request = group["removal_request"] + # If we remove an arch:all package, then the breakage can occur on any + # of the architectures. + if "all" in affected_archs: + affected_archs = all_architectures + for pkg_arch in product(pkgs, affected_archs): + pkg_arch2groups[pkg_arch].add(group_name) + groups[group_name] = group + group_order.append(group_name) + + full_removal_request.extend(removal_request.iteritems()) + + if not groups: if debug: print "N: Found no candidates" return if debug: - print "N: Considering to remove %s" % str(sorted(all_packages.iterkeys())) + print "N: Considering to remove the following packages:" + for group_name in sorted(groups): + group_info = groups[group_name] + pkgs = group_info["packages"] + archs = group_info["architectures"] + print "N: * %s: %s [%s]" % (group_name, ", ".join(pkgs), " ".join(archs)) + if debug: print "N: Compiling ReverseDependencyChecker (RDC) - please hold ..." - rdc = ReverseDependencyChecker(session, suite_name) if debug: print "N: Computing initial breakage..." - breakage = rdc.check_reverse_depends(all_packages) + + breakage = rdc.check_reverse_depends(full_removal_request) while breakage: by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage] by_breakers.sort(reverse=True) @@ -104,112 +210,74 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug): broken_str = ", ".join("%s/%s" % b for b in sorted(broken)) print "N: * %s => %s" % (bname, broken_str) - _, worst_package_arch, worst_breakage = by_breakers.pop(0) - averted_breakage = set(worst_breakage) - del all_packages[worst_package_arch[0]] - discarded_removal.add(worst_package_arch[0]) - if debug: - print "N: - skip removal of %s (due to %s)" % (worst_package_arch[0], sorted(averted_breakage)) + averted_breakage = set() + for _, package_arch, breakage in by_breakers: - package = package_arch[0] if breakage <= averted_breakage: # We already avoided this break continue - if package in discarded_removal: - averted_breakage |= breakage - continue + guilty_groups = pkg_arch2groups[package_arch] + + if not guilty_groups: + utils.fubar("Cannot figure what group provided %s" % str(package_arch)) + if debug: - print "N: - skip removal of %s (due to %s)" % ( - package, str(sorted(breakage - averted_breakage))) - discarded_removal.add(package) + # Only output it, if it truly a new group being discarded + # - a group can reach this part multiple times, if it breaks things on + # more than one architecture. This being rather common in fact. + already_discard = True + if any(group_name for group_name in guilty_groups if group_name in groups): + already_discard = False + + if not already_discard: + avoided = sorted(breakage - averted_breakage) + print "N: - skipping removal of %s (breakage: %s)" % (", ".join(sorted(guilty_groups)), str(avoided)) + averted_breakage |= breakage - del all_packages[package] + for group_name in guilty_groups: + if group_name in groups: + del groups[group_name] - if not all_packages: + if not groups: if debug: print "N: Nothing left to remove" return if debug: - print "N: Now considering to remove %s" % str(sorted(all_packages.iterkeys())) - breakage = rdc.check_reverse_depends(all_packages) + print "N: Now considering to remove: %s" % str(", ".join(sorted(groups.iterkeys()))) + + # Rebuild the removal request with the remaining groups and off + # we go to (not) break the world once more time + full_removal_request = [] + for group_info in groups.itervalues(): + full_removal_request.extend(group_info["removal_request"].iteritems()) + breakage = rdc.check_reverse_depends(full_removal_request) if debug: print "N: Removal looks good" if dryrun: - # Embed the -R just in case someone wants to run it manually later - print 'Would do: dak rm -m "%s" -s %s -a all -p -R -b %s' % \ - (message, suite_name, " ".join(sorted(all_packages))) - else: - params = { - arch_all_id: arch_all_id, - all_packages: tuple(all_packages), - suite_id: suite_id - } - q = session.execute(""" - SELECT b.package, b.version, a.arch_string, b.id - FROM binaries b - JOIN bin_associations ba ON b.id = ba.bin - JOIN architecture a ON b.architecture = a.id - JOIN suite su ON ba.suite = su.id - WHERE a.id = :arch_all_id AND b.package IN :all_packages AND su.id = :suite_id - """, params) - remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter") - - - - - -def removeNBS(suite_name, suite_id, session, dryrun): - """Remove binaries no longer built - - @type suite_name: string - @param suite_name: The name of the suite to remove from - - @type suite_id: int - @param suite_id: The id of the suite donated by suite_name - - @type session: SQLA Session - @param session: The database session in use - - @type dryrun: bool - @param dryrun: If True, just print the actions rather than actually doing them - """"" - global Options - rows = queryNBS(suite_id, session) - arch2ids = {} - for row in rows: - (pkg_list, arch_list, source, _) = row - if utils.check_reverse_depends(pkg_list, suite_name, arch_list, session, cruft=True, quiet=True): - continue - arch_string = ','.join(arch_list) - message = '[auto-cruft] NBS (no longer built by %s, no reverse dependencies)' % source + print "Would remove the equivalent of:" + for group_name in group_order: + if group_name not in groups: + continue + group_info = groups[group_name] + pkgs = group_info["packages"] + archs = group_info["architectures"] + message = group_info["message"] - if dryrun: # Embed the -R just in case someone wants to run it manually later - pkg_string = ' '.join(pkg_list) - print 'Would do: dak rm -m "%s" -s %s -a %s -p -R -b %s' % \ - (message, suite_name, arch_string, pkg_string) - else: - for architecture in arch_list: - if architecture in arch2ids: - arch2ids[architecture] = utils.get_architecture(architecture, session=session) - arch_ids = tuple(arch2ids[architecture] for architecture in arch_list) - params = { - suite_id: suite_id, - arch_ids: arch2ids, - pkg_list: tuple(pkg_list), - } - q = session.execute(""" - SELECT b.package, b.version, a.arch_string, b.id - FROM binaries b - JOIN bin_associations ba ON b.id = ba.bin - JOIN architecture a ON b.architecture = a.id - JOIN suite su ON ba.suite = su.id - WHERE a.id IN :arch_ids AND b.package IN :pkg_db_set AND su.id = :suite_id - """, params) - remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter") + print ' dak rm -m "{message}" -s {suite} -a {architectures} -p -R -b {packages}'.format( + message=message, suite=suite_name, + architectures=",".join(archs), packages=" ".join(pkgs), + ) + + print + print "Note: The removals may be interdependent. A non-breaking result may require the execution of all" + print "of the removals" + else: + remove_groups(groups.itervalues(), suite_id, suite_name, session) + ################################################################################ @@ -249,8 +317,7 @@ def main (): suite_id = suite.suite_id suite_name = suite.suite_name.lower() - remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug) - #removeNBS(suite_name, suite_id, session, dryrun) + auto_decruft_suite(suite_name, suite_id, session, dryrun, debug) ################################################################################ -- 2.39.5