X-Git-Url: https://git.decadent.org.uk/gitweb/?p=dak.git;a=blobdiff_plain;f=dak%2Fclean_suites.py;h=d5b0fc4e4eada96a973a7c5c418307cae0e73780;hp=8f68907a0f5c9be83440fd2ec34021d9df5e8e37;hb=98d086ddda3772fb58c1bfa97478e8044596b569;hpb=1a4282b11d64ee88f5cbc49a0394c0b5f6767853 diff --git a/dak/clean_suites.py b/dak/clean_suites.py index 8f68907a..d5b0fc4e 100755 --- a/dak/clean_suites.py +++ b/dak/clean_suites.py @@ -1,7 +1,13 @@ #!/usr/bin/env python -""" Cleans up unassociated binary and source packages """ -# Copyright (C) 2000, 2001, 2002, 2003, 2006 James Troup +""" Cleans up unassociated binary and source packages + +@contact: Debian FTPMaster +@copyright: 2000, 2001, 2002, 2003, 2006 James Troup +@copyright: 2009 Mark Hymers +@copyright: 2010 Joerg Jaspert +@license: GNU General Public License version 2 or later +""" # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,7 +34,10 @@ ################################################################################ -import os, stat, sys, time +import os +import stat +import sys +import time import apt_pkg from datetime import datetime, timedelta @@ -55,102 +64,108 @@ Clean old packages from suites. ################################################################################ -def check_binaries(now_date, delete_date, max_delete, session): - print "Checking for orphaned binary packages..." +def check_binaries(now_date, session): + Logger.log(["Checking for orphaned binary packages..."]) # Get the list of binary packages not in a suite and mark them for # deletion. - - q = session.execute(""" -SELECT b.file, f.filename FROM binaries b, files f - WHERE f.last_used IS NULL AND b.file = f.id - AND NOT EXISTS (SELECT 1 FROM bin_associations ba WHERE ba.bin = b.id)""") - - for i in q.fetchall(): - Logger.log(["set lastused", i[1]]) - session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid AND last_used IS NULL", - {'lastused': now_date, 'fileid': i[0]}) - session.commit() - # Check for any binaries which are marked for eventual deletion # but are now used again. - - q = session.execute(""" -SELECT b.file, f.filename FROM binaries b, files f - WHERE f.last_used IS NOT NULL AND f.id = b.file - AND EXISTS (SELECT 1 FROM bin_associations ba WHERE ba.bin = b.id)""") - for i in q.fetchall(): - Logger.log(["unset lastused", i[1]]) - session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", {'fileid': i[0]}) - session.commit() + query = """ + WITH usage AS ( + SELECT + af.archive_id AS archive_id, + af.file_id AS file_id, + af.component_id AS component_id, + BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba + JOIN suite s ON ba.suite = s.id + WHERE ba.bin = b.id + AND s.archive_id = af.archive_id)) + AS in_use + FROM files_archive_map af + JOIN binaries b ON af.file_id = b.file + GROUP BY af.archive_id, af.file_id, af.component_id + ) + + UPDATE files_archive_map af + SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END + FROM usage, files f, archive + WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id + AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) + AND af.file_id = f.id + AND af.archive_id = archive.id + RETURNING archive.name, f.filename, af.last_used IS NULL""" + + res = session.execute(query, {'last_used': now_date}) + for i in res: + op = "set lastused" + if i[2]: + op = "unset lastused" + Logger.log([op, i[0], i[1]]) ######################################## - -def check_sources(now_date, delete_date, max_delete, session): - print "Checking for orphaned source packages..." + +def check_sources(now_date, session): + Logger.log(["Checking for orphaned source packages..."]) # Get the list of source packages not in a suite and not used by # any binaries. - q = session.execute(""" -SELECT s.id, s.file, f.filename FROM source s, files f - WHERE f.last_used IS NULL AND s.file = f.id - AND NOT EXISTS (SELECT 1 FROM src_associations sa WHERE sa.source = s.id) - AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)""") - - #### XXX: this should ignore cases where the files for the binary b - #### have been marked for deletion (so the delay between bins go - #### byebye and sources go byebye is 0 instead of StayOfExecution) - - for i in q.fetchall(): - source_id = i[0] - dsc_file_id = i[1] - dsc_fname = i[2] - - # Mark the .dsc file for deletion - Logger.log(["set lastused", dsc_fname]) - session.execute("""UPDATE files SET last_used = :last_used - WHERE id = :dscfileid AND last_used IS NULL""", - {'last_used': now_date, 'dscfileid': dsc_file_id}) - - # Mark all other files references by .dsc too if they're not used by anyone else - x = session.execute("""SELECT f.id, f.filename FROM files f, dsc_files d - WHERE d.source = :sourceid AND d.file = f.id""", - {'sourceid': source_id}) - for j in x.fetchall(): - file_id = j[0] - file_name = j[1] - y = session.execute("SELECT id FROM dsc_files d WHERE d.file = :fileid", {'fileid': file_id}) - if len(y.fetchall()) == 1: - Logger.log(["set lastused", file_name]) - session.execute("""UPDATE files SET last_used = :lastused - WHERE id = :fileid AND last_used IS NULL""", - {'lastused': now_date, 'fileid': file_id}) - - session.commit() # Check for any sources which are marked for deletion but which # are now used again. - q = session.execute(""" -SELECT f.id, f.filename FROM source s, files f, dsc_files df - WHERE f.last_used IS NOT NULL AND s.id = df.source AND df.file = f.id - AND ((EXISTS (SELECT 1 FROM src_associations sa WHERE sa.source = s.id)) - OR (EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)))""") - - #### XXX: this should also handle deleted binaries specially (ie, not - #### reinstate sources because of them - - for i in q.fetchall(): - Logger.log(["unset lastused", i[1]]) - session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", - {'fileid': i[0]}) - - session.commit() + # TODO: the UPDATE part is the same as in check_binaries. Merge? + + query = """ + WITH usage AS ( + SELECT + af.archive_id AS archive_id, + af.file_id AS file_id, + af.component_id AS component_id, + BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa + JOIN suite s ON sa.suite = s.id + WHERE sa.source = df.source + AND s.archive_id = af.archive_id) + OR EXISTS (SELECT 1 FROM files_archive_map af_bin + JOIN binaries b ON af_bin.file_id = b.file + WHERE b.source = df.source + AND af_bin.archive_id = af.archive_id + AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date)) + OR EXISTS (SELECT 1 FROM extra_src_references esr + JOIN bin_associations ba ON esr.bin_id = ba.bin + JOIN binaries b ON ba.bin = b.id + JOIN suite s ON ba.suite = s.id + WHERE esr.src_id = df.source + AND s.archive_id = af.archive_id)) + AS in_use + FROM files_archive_map af + JOIN dsc_files df ON af.file_id = df.file + JOIN archive_delete_date ad ON af.archive_id = ad.archive_id + GROUP BY af.archive_id, af.file_id, af.component_id + ) + + UPDATE files_archive_map af + SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END + FROM usage, files f, archive + WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id + AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) + AND af.file_id = f.id + AND af.archive_id = archive.id + + RETURNING archive.name, f.filename, af.last_used IS NULL + """ + + res = session.execute(query, {'last_used': now_date}) + for i in res: + op = "set lastused" + if i[2]: + op = "unset lastused" + Logger.log([op, i[0], i[1]]) ######################################## -def check_files(now_date, delete_date, max_delete, session): +def check_files(now_date, session): # FIXME: this is evil; nothing should ever be in this state. if # they are, it's a bug. @@ -158,81 +173,117 @@ def check_files(now_date, delete_date, max_delete, session): # and then mark the file for deletion. This probably masks a bug somwhere # else but is better than collecting cruft forever - print "Checking for unused files..." + Logger.log(["Checking for unused files..."]) q = session.execute(""" -SELECT id, filename FROM files f - WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = f.id) - AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = f.id) - ORDER BY filename""") - - ql = q.fetchall() - if len(ql) > 0: - utils.warn("check_files found something it shouldn't") - for x in ql: - utils.warn("orphaned file: %s" % x) - Logger.log(["set lastused", x[1], "ORPHANED FILE"]) - session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid", - {'lastused': now_date, 'fileid': x[0]}) + UPDATE files_archive_map af + SET last_used = :last_used + FROM files f, archive + WHERE af.file_id = f.id + AND af.archive_id = archive.id + AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) + AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) + AND af.last_used IS NULL + RETURNING archive.name, f.filename""", {'last_used': now_date}) + + for x in q: + utils.warn("orphaned file: {0}".format(x)) + Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) + if not Options["No-Action"]: session.commit() -def clean_binaries(now_date, delete_date, max_delete, session): +def clean_binaries(now_date, session): # We do this here so that the binaries we remove will have their # source also removed (if possible). # XXX: why doesn't this remove the files here as well? I don't think it # buys anything keeping this separate - print "Cleaning binaries from the DB..." - print "Deleting from binaries table... " - for bin in session.query(DBBinary).join(DBBinary.poolfile).filter(PoolFile.last_used <= delete_date): - Logger.log(["delete binary", bin.poolfile.filename]) - if not Options["No-Action"]: - session.delete(bin) - if not Options["No-Action"]: - session.commit() + + Logger.log(["Deleting from binaries table... "]) + q = session.execute(""" + DELETE FROM binaries b + USING files f + WHERE f.id = b.file + AND NOT EXISTS (SELECT 1 FROM files_archive_map af + JOIN archive_delete_date ad ON af.archive_id = ad.archive_id + WHERE af.file_id = b.file + AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) + RETURNING f.filename + """) + for b in q: + Logger.log(["delete binary", b[0]]) ######################################## -def clean(now_date, delete_date, max_delete, session): +def clean(now_date, archives, max_delete, session): cnf = Config() count = 0 size = 0 - print "Cleaning out packages..." + Logger.log(["Cleaning out packages..."]) + + morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue')) + morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool') + + # Build directory as morguedir/morguesubdir/year/month/day + dest = os.path.join(morguedir, + morguesubdir, + str(now_date.year), + '%.2d' % now_date.month, + '%.2d' % now_date.day) - cur_date = now_date.strftime("%Y-%m-%d") - dest = os.path.join(cnf["Dir::Morgue"], cnf["Clean-Suites::MorgueSubDir"], cur_date) - if not os.path.exists(dest): - os.mkdir(dest) + if not Options["No-Action"] and not os.path.exists(dest): + os.makedirs(dest) # Delete from source - print "Deleting from source table... " + Logger.log(["Deleting from source table..."]) q = session.execute(""" -SELECT s.id, f.filename FROM source s, files f - WHERE f.last_used <= :deletedate - AND s.file = f.id""", {'deletedate': delete_date}) - for s in q.fetchall(): - Logger.log(["delete source", s[1], s[0]]) - if not Options["No-Action"]: - session.execute("DELETE FROM dsc_files WHERE source = :s_id", {"s_id":s[0]}) - session.execute("DELETE FROM source WHERE id = :s_id", {"s_id":s[0]}) + WITH + deleted_sources AS ( + DELETE FROM source + USING files f + WHERE source.file = f.id + AND NOT EXISTS (SELECT 1 FROM files_archive_map af + JOIN archive_delete_date ad ON af.archive_id = ad.archive_id + WHERE af.file_id = source.file + AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) + RETURNING source.id AS id, f.filename AS filename + ), + deleted_dsc_files AS ( + DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) + RETURNING df.file AS file_id + ), + now_unused_source_files AS ( + UPDATE files_archive_map af + SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc. + WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) + AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) + ) + SELECT filename FROM deleted_sources""") + for s in q: + Logger.log(["delete source", s[0]]) if not Options["No-Action"]: session.commit() # Delete files from the pool - old_files = session.query(PoolFile).filter(PoolFile.last_used <= delete_date) + old_files = session.query(ArchiveFile).filter('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)').join(Archive) if max_delete is not None: old_files = old_files.limit(max_delete) - print "Limiting removals to %d" % max_delete + Logger.log(["Limiting removals to %d" % max_delete]) + + if archives is not None: + archive_ids = [ a.archive_id for a in archives ] + old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids)) - for pf in old_files: - filename = os.path.join(pf.location.path, pf.filename) + for af in old_files: + filename = af.path if not os.path.exists(filename): - utils.warn("can not find '%s'." % (filename)) + Logger.log(["database referred to non-existing file", af.path]) + session.delete(af) continue - Logger.log(["delete pool file", filename]) + Logger.log(["delete archive file", filename]) if os.path.isfile(filename): if os.path.islink(filename): count += 1 @@ -245,29 +296,41 @@ SELECT s.id, f.filename FROM source s, files f dest_filename = dest + '/' + os.path.basename(filename) # If the destination file exists; try to find another filename to use - if os.path.exists(dest_filename): + if os.path.lexists(dest_filename): dest_filename = utils.find_next_free(dest_filename) - Logger.log(["move to morgue", filename, dest_filename]) if not Options["No-Action"]: - utils.move(filename, dest_filename) + if af.archive.use_morgue: + Logger.log(["move to morgue", filename, dest_filename]) + utils.move(filename, dest_filename) + else: + Logger.log(["removed file", filename]) + os.unlink(filename) if not Options["No-Action"]: - session.delete(pf) - + session.delete(af) + session.commit() + else: utils.fubar("%s is neither symlink nor file?!" % (filename)) + if count > 0: + Logger.log(["total", count, utils.size_type(size)]) + + # Delete entries in files no longer referenced by any archive + query = """ + DELETE FROM files f + WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) + """ + session.execute(query) + if not Options["No-Action"]: session.commit() - if count > 0: - print "Cleaned %d files, %s." % (count, utils.size_type(size)) - ################################################################################ -def clean_maintainers(now_date, delete_date, max_delete, session): - print "Cleaning out unused Maintainer entries..." +def clean_maintainers(now_date, session): + Logger.log(["Cleaning out unused Maintainer entries..."]) # TODO Replace this whole thing with one SQL statement q = session.execute(""" @@ -289,19 +352,20 @@ SELECT m.id, m.name FROM maintainer m session.commit() if count > 0: - print "Cleared out %d maintainer entries." % (count) + Logger.log(["total", count]) ################################################################################ -def clean_fingerprints(now_date, delete_date, max_delete, session): - print "Cleaning out unused fingerprint entries..." +def clean_fingerprints(now_date, session): + Logger.log(["Cleaning out unused fingerprint entries..."]) # TODO Replace this whole thing with one SQL statement q = session.execute(""" SELECT f.id, f.fingerprint FROM fingerprint f WHERE f.keyring IS NULL AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) - AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)""") + AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id) + AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""") count = 0 @@ -316,41 +380,53 @@ SELECT f.id, f.fingerprint FROM fingerprint f session.commit() if count > 0: - print "Cleared out %d fingerprint entries." % (count) + Logger.log(["total", count]) ################################################################################ -def clean_queue_build(now_date, delete_date, max_delete, session): +def clean_empty_directories(session): + """ + Removes empty directories from pool directories. + """ - cnf = Config() + Logger.log(["Cleaning out empty directories..."]) - if not cnf.ValueList("Dinstall::QueueBuildSuites") or Options["No-Action"]: - return + count = 0 - print "Cleaning out queue build symlinks..." + cursor = session.execute( + """SELECT DISTINCT(path) FROM archive""" + ) + bases = [x[0] for x in cursor.fetchall()] - our_delete_date = now_date - timedelta(seconds = int(cnf["Clean-Suites::QueueBuildStayOfExecution"])) - count = 0 + for base in bases: + for dirpath, dirnames, filenames in os.walk(base, topdown=False): + if not filenames and not dirnames: + to_remove = os.path.join(base, dirpath) + if not Options["No-Action"]: + Logger.log(["removing directory", to_remove]) + os.removedirs(to_remove) + count += 1 - for qf in session.query(QueueBuild).filter(QueueBuild.last_used <= our_delete_date): - if not os.path.exists(qf.filename): - utils.warn("%s (from queue_build) doesn't exist." % (qf.filename)) - continue + if count: + Logger.log(["total removed directories", count]) - if not cnf.FindB("Dinstall::SecurityQueueBuild") and not os.path.islink(qf.filename): - utils.fubar("%s (from queue_build) should be a symlink but isn't." % (qf.filename)) +################################################################################ - Logger.log(["delete queue build", qf.filename]) - if not Options["No-Action"]: - os.unlink(qf.filename) - session.delete(qf) - count += 1 +def set_archive_delete_dates(now_date, session): + session.execute(""" + CREATE TEMPORARY TABLE archive_delete_date ( + archive_id INT NOT NULL, + delete_date TIMESTAMP NOT NULL + )""") - if not Options["No-Action"]: - session.commit() + session.execute(""" + INSERT INTO archive_delete_date + (archive_id, delete_date) + SELECT + archive.id, :now_date - archive.stayofexecution + FROM archive""", {'now_date': now_date}) - if count: - print "Cleaned %d queue_build files." % (count) + session.flush() ################################################################################ @@ -364,11 +440,12 @@ def main(): cnf["Clean-Suites::Options::%s" % (i)] = "" Arguments = [('h',"help","Clean-Suites::Options::Help"), + ('a','archive','Clean-Suites::Options::Archive','HasArg'), ('n',"no-action","Clean-Suites::Options::No-Action"), ('m',"maximum","Clean-Suites::Options::Maximum", "HasArg")] - apt_pkg.ParseCommandLine(cnf.Cnf, Arguments, sys.argv) - Options = cnf.SubTree("Clean-Suites::Options") + apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) + Options = cnf.subtree("Clean-Suites::Options") if cnf["Clean-Suites::Options::Maximum"] != "": try: @@ -376,7 +453,7 @@ def main(): max_delete = int(cnf["Clean-Suites::Options::Maximum"]) if max_delete < 1: utils.fubar("If given, Maximum must be at least 1") - except ValueError, e: + except ValueError as e: utils.fubar("If given, Maximum must be an integer") else: max_delete = None @@ -384,21 +461,34 @@ def main(): if Options["Help"]: usage() - Logger = daklog.Logger(cnf, "clean-suites", debug=Options["No-Action"]) + program = "clean-suites" + if Options['No-Action']: + program = "clean-suites (no action)" + Logger = daklog.Logger(program, debug=Options["No-Action"]) session = DBConn().session() + archives = None + if 'Archive' in Options: + archive_names = Options['Archive'].split(',') + archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all() + if len(archives) == 0: + utils.fubar('Unknown archive.') + now_date = datetime.now() - delete_date = now_date - timedelta(seconds=int(cnf['Clean-Suites::StayOfExecution'])) - - check_binaries(now_date, delete_date, max_delete, session) - clean_binaries(now_date, delete_date, max_delete, session) - check_sources(now_date, delete_date, max_delete, session) - check_files(now_date, delete_date, max_delete, session) - clean(now_date, delete_date, max_delete, session) - clean_maintainers(now_date, delete_date, max_delete, session) - clean_fingerprints(now_date, delete_date, max_delete, session) - clean_queue_build(now_date, delete_date, max_delete, session) + + set_archive_delete_dates(now_date, session) + + check_binaries(now_date, session) + clean_binaries(now_date, session) + check_sources(now_date, session) + check_files(now_date, session) + clean(now_date, archives, max_delete, session) + clean_maintainers(now_date, session) + clean_fingerprints(now_date, session) + clean_empty_directories(session) + + session.rollback() Logger.close()