From: Niels Thykier Date: Sun, 7 Jun 2015 10:46:09 +0000 (+0200) Subject: auto-decruft: Batch check source-less cruft X-Git-Url: https://git.decadent.org.uk/gitweb/?p=dak.git;a=commitdiff_plain;h=b18683b50d8b822811bef9f03c307309cbf821ee auto-decruft: Batch check source-less cruft Add a ReverseDependencyChecker class for bulk testing breakage in reverse dependencies and use it in the auto-decrufter. At this point, disable the NBS removal - it will be re-added in the next commit. Signed-off-by: Niels Thykier --- diff --git a/dak/auto_decruft.py b/dak/auto_decruft.py index 5de87821..a9798205 100644 --- a/dak/auto_decruft.py +++ b/dak/auto_decruft.py @@ -40,7 +40,7 @@ from daklib.config import Config from daklib.dbconn import * from daklib import utils from daklib.cruft import * -from daklib.rm import remove +from daklib.rm import remove, ReverseDependencyChecker ################################################################################ @@ -55,7 +55,7 @@ Check for obsolete or duplicated packages. ################################################################################ -def remove_sourceless_cruft(suite_name, suite_id, session, dryrun): +def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug): """Remove binaries without a source @type suite_name: string @@ -69,31 +69,96 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun): @type dryrun: bool @param dryrun: If True, just print the actions rather than actually doing them + + @type debug: bool + @param debug: If True, print some extra information """"" global Options rows = query_without_source(suite_id, session) arch_all_id = get_architecture('all', session=session) + discarded_removal = set() message = '[auto-cruft] no longer built from source, no reverse dependencies' - for row in rows: - package = row[0] - if utils.check_reverse_depends([package], suite_name, [], session, cruft=True, quiet=True): - continue + all_packages = dict((row[0], None) for row in rows) + if not all_packages: + if debug: + print "N: Found no candidates" + return + if debug: + print "N: Considering to remove %s" % str(sorted(all_packages.iterkeys())) + if debug: + print "N: Compiling ReverseDependencyChecker (RDC) - please hold ..." + + rdc = ReverseDependencyChecker(session, suite_name) + if debug: + print "N: Computing initial breakage..." + breakage = rdc.check_reverse_depends(all_packages) + while breakage: + by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage] + by_breakers.sort(reverse=True) + if debug: + print "N: - Removal would break %s (package, architecture)-pairs" % (len(breakage)) + print "N: - full breakage:" + for _, breaker, broken in by_breakers: + bname = "%s/%s" % breaker + broken_str = ", ".join("%s/%s" % b for b in sorted(broken)) + print "N: * %s => %s" % (bname, broken_str) + + _, worst_package_arch, worst_breakage = by_breakers.pop(0) + averted_breakage = set(worst_breakage) + del all_packages[worst_package_arch[0]] + discarded_removal.add(worst_package_arch[0]) + if debug: + print "N: - skip removal of %s (due to %s)" % (worst_package_arch[0], sorted(averted_breakage)) + for _, package_arch, breakage in by_breakers: + package = package_arch[0] + if breakage <= averted_breakage: + # We already avoided this break + continue + if package in discarded_removal: + averted_breakage |= breakage + continue + if debug: + print "N: - skip removal of %s (due to %s)" % ( + package, str(sorted(breakage - averted_breakage))) + discarded_removal.add(package) + averted_breakage |= breakage + del all_packages[package] + + if not all_packages: + if debug: + print "N: Nothing left to remove" + return + + if debug: + print "N: Now considering to remove %s" % str(sorted(all_packages.iterkeys())) + breakage = rdc.check_reverse_depends(all_packages) + + if debug: + print "N: Removal looks good" + + if dryrun: + # Embed the -R just in case someone wants to run it manually later + print 'Would do: dak rm -m "%s" -s %s -a all -p -R -b %s' % \ + (message, suite_name, " ".join(sorted(all_packages))) + else: + params = { + arch_all_id: arch_all_id, + all_packages: tuple(all_packages), + suite_id: suite_id + } + q = session.execute(""" + SELECT b.package, b.version, a.arch_string, b.id + FROM binaries b + JOIN bin_associations ba ON b.id = ba.bin + JOIN architecture a ON b.architecture = a.id + JOIN suite su ON ba.suite = su.id + WHERE a.id = :arch_all_id AND b.package IN :all_packages AND su.id = :suite_id + """, params) + remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter") + + - if dryrun: - # Embed the -R just in case someone wants to run it manually later - print 'Would do: dak rm -m "%s" -s %s -a all -p -R -b %s' % \ - (message, suite_name, package) - else: - q = session.execute(""" - SELECT b.package, b.version, a.arch_string, b.id - FROM binaries b - JOIN bin_associations ba ON b.id = ba.bin - JOIN architecture a ON b.architecture = a.id - JOIN suite su ON ba.suite = su.id - WHERE a.id = :arch_all_id AND b.package = :package AND su.id = :suite_id - """, {arch_all_id: arch_all_id, package: package, suite_id: suite_id}) - remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter") def removeNBS(suite_name, suite_id, session, dryrun): @@ -154,8 +219,9 @@ def main (): Arguments = [('h',"help","Auto-Decruft::Options::Help"), ('n',"dry-run","Auto-Decruft::Options::Dry-Run"), + ('d',"debug","Auto-Decruft::Options::Debug"), ('s',"suite","Auto-Decruft::Options::Suite","HasArg")] - for i in ["help", "Dry-Run"]: + for i in ["help", "Dry-Run", "Debug"]: if not cnf.has_key("Auto-Decruft::Options::%s" % (i)): cnf["Auto-Decruft::Options::%s" % (i)] = "" @@ -167,9 +233,12 @@ def main (): if Options["Help"]: usage() + debug = False dryrun = False if Options["Dry-Run"]: dryrun = True + if Options["Debug"]: + debug = True session = DBConn().session() @@ -180,8 +249,8 @@ def main (): suite_id = suite.suite_id suite_name = suite.suite_name.lower() - remove_sourceless_cruft(suite_name, suite_id, session, dryrun) - removeNBS(suite_name, suite_id, session, dryrun) + remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug) + #removeNBS(suite_name, suite_id, session, dryrun) ################################################################################ diff --git a/daklib/rm.py b/daklib/rm.py index 892872f0..7dda5239 100644 --- a/daklib/rm.py +++ b/daklib/rm.py @@ -32,6 +32,8 @@ import commands import apt_pkg from re import sub +from collections import defaultdict +from regexes import re_build_dep_arch from daklib.dbconn import * from daklib import utils @@ -41,6 +43,247 @@ import debianbts as bts ################################################################################ +class ReverseDependencyChecker(object): + """A bulk tester for reverse dependency checks + + This class is similar to the check_reverse_depends method from "utils". However, + it is primarily focused on facilitating bulk testing of reverse dependencies. + It caches the state of the suite and then uses that as basis for answering queries. + This saves a significant amount of time if multiple reverse dependency checks are + required. + """ + + def __init__(self, session, suite): + """Creates a new ReverseDependencyChecker instance + + This will spend a significant amount of time caching data. + + @type session: SQLA Session + @param session: The database session in use + + @type suite: str + @param suite: The name of the suite that is used as basis for removal tests. + """ + self._session = session + dbsuite = get_suite(suite, session) + suite_archs2id = dict((x.arch_string, x.arch_id) for x in get_suite_architectures(suite)) + package_dependencies, arch_providors_of, arch_provided_by = self._load_package_information(session, + dbsuite.suite_id, + suite_archs2id) + self._package_dependencies = package_dependencies + self._arch_providors_of = arch_providors_of + self._arch_provided_by = arch_provided_by + self._archs_in_suite = set(suite_archs2id) + + @staticmethod + def _load_package_information(session, suite_id, suite_archs2id): + package_dependencies = defaultdict(lambda: defaultdict(set)) + arch_providors_of = defaultdict(lambda: defaultdict(set)) + arch_provided_by = defaultdict(lambda: defaultdict(set)) + source_deps = defaultdict(set) + metakey_d = get_or_set_metadatakey("Depends", session) + metakey_p = get_or_set_metadatakey("Provides", session) + params = { + 'suite_id': suite_id, + 'arch_all_id': suite_archs2id['all'], + 'metakey_d_id': metakey_d.key_id, + 'metakey_p_id': metakey_p.key_id, + } + all_arches = set(suite_archs2id) + all_arches.discard('source') + + package_dependencies['source'] = source_deps + + for architecture in all_arches: + deps = defaultdict(set) + providers_of = defaultdict(set) + provided_by = defaultdict(set) + arch_providors_of[architecture] = providers_of + arch_provided_by[architecture] = provided_by + package_dependencies[architecture] = deps + + params['arch_id'] = suite_archs2id[architecture] + + statement = ''' + SELECT b.package, + (SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends, + (SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides + FROM binaries b + JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id + WHERE b.architecture = :arch_id OR b.architecture = :arch_all_id''' + query = session.query('package', 'depends', 'provides'). \ + from_statement(statement).params(params) + for package, depends, provides in query: + + if depends is not None: + try: + parsed_dep = [] + for dep in apt_pkg.parse_depends(depends): + parsed_dep.append(frozenset(d[0] for d in dep)) + deps[package].update(parsed_dep) + except ValueError as e: + print "Error for package %s: %s" % (package, e) + # Maintain a counter for each virtual package. If a + # Provides: exists, set the counter to 0 and count all + # provides by a package not in the list for removal. + # If the counter stays 0 at the end, we know that only + # the to-be-removed packages provided this virtual + # package. + if provides is not None: + for virtual_pkg in provides.split(","): + virtual_pkg = virtual_pkg.strip() + if virtual_pkg == package: + continue + provided_by[virtual_pkg].add(package) + providers_of[package].add(virtual_pkg) + + # Check source dependencies (Build-Depends and Build-Depends-Indep) + metakey_bd = get_or_set_metadatakey("Build-Depends", session) + metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session) + params = { + 'suite_id': suite_id, + 'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id), + } + statement = ''' + SELECT s.source, string_agg(sm.value, ', ') as build_dep + FROM source s + JOIN source_metadata sm ON s.id = sm.src_id + WHERE s.id in + (SELECT source FROM src_associations + WHERE suite = :suite_id) + AND sm.key_id in :metakey_ids + GROUP BY s.id, s.source''' + query = session.query('source', 'build_dep').from_statement(statement). \ + params(params) + for source, build_dep in query: + if build_dep is not None: + # Remove [arch] information since we want to see breakage on all arches + build_dep = re_build_dep_arch.sub("", build_dep) + try: + parsed_dep = [] + for dep in apt_pkg.parse_src_depends(build_dep): + parsed_dep.append(frozenset(d[0] for d in dep)) + source_deps[source].update(parsed_dep) + except ValueError as e: + print "Error for package %s: %s" % (source, e) + + return package_dependencies, arch_providors_of, arch_provided_by + + def check_reverse_depends(self, removal_requests): + """Bulk check reverse dependencies + + Example: + removal_request = { + "eclipse-rcp": None, # means ALL architectures (incl. source) + "eclipse": None, # means ALL architectures (incl. source) + "lintian": ["source", "all"], # Only these two "architectures". + } + obj.check_reverse_depends(removal_request) + + @type removal_requests: dict (or a list of tuples) + @param removal_requests: A dictionary mapping a package name to a list of architectures. The list of + architectures decides from which the package will be removed - if the list is empty the package will + be removed on ALL architectures in the suite (including "source"). + + @rtype: dict + @return: A mapping of "removed package" (as a "(pkg, arch)"-tuple) to a set of broken + broken packages (also as "(pkg, arch)"-tuple). Note that the architecture values + in these tuples /can/ be "source" to reflect a breakage in build-dependencies. + """ + + archs_in_suite = self._archs_in_suite + removals_by_arch = defaultdict(set) + affected_virtual_by_arch = defaultdict(set) + package_dependencies = self._package_dependencies + arch_providors_of = self._arch_providors_of + arch_provided_by = self._arch_provided_by + arch_provides2removal = defaultdict(lambda: defaultdict(set)) + dep_problems = defaultdict(set) + src_deps = package_dependencies['source'] + src_removals = set() + arch_all_removals = set() + + if isinstance(removal_requests, dict): + removal_requests = removal_requests.iteritems() + + for pkg, arch_list in removal_requests: + if not arch_list: + arch_list = archs_in_suite + for arch in arch_list: + if arch == 'source': + src_removals.add(pkg) + continue + if arch == 'all': + arch_all_removals.add(pkg) + continue + removals_by_arch[arch].add(pkg) + if pkg in arch_providors_of[arch]: + affected_virtual_by_arch[arch].add(pkg) + + if arch_all_removals: + for arch in archs_in_suite: + if arch in ('all', 'source'): + continue + removals_by_arch[arch].update(arch_all_removals) + for pkg in arch_all_removals: + if pkg in arch_providors_of[arch]: + affected_virtual_by_arch[arch].add(pkg) + + if not removals_by_arch: + # Nothing to remove => no problems + return dep_problems + + for arch, removed_providers in affected_virtual_by_arch.iteritems(): + provides2removal = arch_provides2removal[arch] + removals = removals_by_arch[arch] + for virtual_pkg, virtual_providers in arch_provided_by[arch].iteritems(): + v = virtual_providers & removed_providers + if len(v) == len(virtual_providers): + # We removed all the providers of virtual_pkg + removals.add(virtual_pkg) + # Pick one to take the blame for the removal + # - we sort for determinism, optimally we would prefer to blame the same package + # to minimise the number of blamed packages. + provides2removal[virtual_pkg] = sorted(v)[0] + + for arch, removals in removals_by_arch.iteritems(): + deps = package_dependencies[arch] + provides2removal = arch_provides2removal[arch] + + # Check binary dependencies (Depends) + for package, dependencies in deps.iteritems(): + if package in removals: + continue + for clause in dependencies: + if not (clause <= removals): + # Something probably still satisfies this relation + continue + # whoops, we seemed to have removed all packages that could possibly satisfy + # this relation. Lets blame something for it + for dep_package in clause: + removal = dep_package + if dep_package in provides2removal: + removal = provides2removal[dep_package] + dep_problems[(removal, arch)].add((package, arch)) + + for source, build_dependencies in src_deps.iteritems(): + if source in src_removals: + continue + for clause in build_dependencies: + if not (clause <= removals): + # Something probably still satisfies this relation + continue + # whoops, we seemed to have removed all packages that could possibly satisfy + # this relation. Lets blame something for it + for dep_package in clause: + removal = dep_package + if dep_package in provides2removal: + removal = provides2removal[dep_package] + dep_problems[(removal, arch)].add((source, 'source')) + + return dep_problems + + def remove(session, reason, suites, removals, whoami=None, partial=False, components=None, done_bugs=None, date=None, carbon_copy=None, close_related_bugs=False):