X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=dak%2Fclean_suites.py;h=806549e3895a001a5be8cb7dab5f195c380298d9;hb=27e00376e81d1c37ff327ee0d39670b266418869;hp=f459cdda154eb7565b237ed6eee405a6bf8668b7;hpb=5819288346c6ea5d9b23125586969cbec3bc6d44;p=dak.git diff --git a/dak/clean_suites.py b/dak/clean_suites.py index f459cdda..806549e3 100755 --- a/dak/clean_suites.py +++ b/dak/clean_suites.py @@ -1,7 +1,13 @@ #!/usr/bin/env python -# Cleans up unassociated binary and source packages -# Copyright (C) 2000, 2001, 2002, 2003, 2006 James Troup +""" Cleans up unassociated binary and source packages + +@contact: Debian FTPMaster +@copyright: 2000, 2001, 2002, 2003, 2006 James Troup +@copyright: 2009 Mark Hymers +@copyright: 2010 Joerg Jaspert +@license: GNU General Public License version 2 or later +""" # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,17 +34,22 @@ ################################################################################ -import os, pg, stat, sys, time +import os +import stat +import sys +import time import apt_pkg -import daklib.utils as utils +from datetime import datetime, timedelta + +from daklib.config import Config +from daklib.dbconn import * +from daklib import utils +from daklib import daklog ################################################################################ -projectB = None -Cnf = None Options = None -now_date = None; # mark newly "deleted" things as deleted "now" -delete_date = None; # delete things marked "deleted" earler than this +Logger = None ################################################################################ @@ -47,175 +58,238 @@ def usage (exit_code=0): Clean old packages from suites. -n, --no-action don't do anything - -h, --help show this help and exit""" + -h, --help show this help and exit + -m, --maximum maximum number of files to remove""" sys.exit(exit_code) ################################################################################ -def check_binaries(): - global delete_date, now_date - +def check_binaries(now_date, delete_date, max_delete, session): print "Checking for orphaned binary packages..." # Get the list of binary packages not in a suite and mark them for # deletion. - q = projectB.query(""" -SELECT b.file FROM binaries b, files f - WHERE f.last_used IS NULL AND b.file = f.id - AND NOT EXISTS (SELECT 1 FROM bin_associations ba WHERE ba.bin = b.id)""") - ql = q.getresult() - - projectB.query("BEGIN WORK") - for i in ql: - file_id = i[0] - projectB.query("UPDATE files SET last_used = '%s' WHERE id = %s AND last_used IS NULL" % (now_date, file_id)) - projectB.query("COMMIT WORK") + + q = session.execute(""" +SELECT b.file, f.filename + FROM binaries b + LEFT JOIN files f + ON (b.file = f.id) + WHERE f.last_used IS NULL + AND b.id NOT IN + (SELECT ba.bin FROM bin_associations ba) + AND f.id NOT IN + (SELECT bqf.fileid FROM build_queue_files bqf)""") + for i in q.fetchall(): + Logger.log(["set lastused", i[1]]) + if not Options["No-Action"]: + session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid AND last_used IS NULL", + {'lastused': now_date, 'fileid': i[0]}) + + if not Options["No-Action"]: + session.commit() # Check for any binaries which are marked for eventual deletion # but are now used again. - q = projectB.query(""" -SELECT b.file FROM binaries b, files f - WHERE f.last_used IS NOT NULL AND f.id = b.file - AND EXISTS (SELECT 1 FROM bin_associations ba WHERE ba.bin = b.id)""") - ql = q.getresult() - - projectB.query("BEGIN WORK") - for i in ql: - file_id = i[0] - projectB.query("UPDATE files SET last_used = NULL WHERE id = %s" % (file_id)) - projectB.query("COMMIT WORK") -######################################## + q = session.execute(""" +SELECT b.file, f.filename + FROM binaries b + LEFT JOIN files f + ON (b.file = f.id) + WHERE f.last_used IS NOT NULL + AND (b.id IN + (SELECT ba.bin FROM bin_associations ba) + OR f.id IN + (SELECT bqf.fileid FROM build_queue_files bqf))""") + + for i in q.fetchall(): + Logger.log(["unset lastused", i[1]]) + if not Options["No-Action"]: + session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", {'fileid': i[0]}) -def check_sources(): - global delete_date, now_date + if not Options["No-Action"]: + session.commit() +######################################## + +def check_sources(now_date, delete_date, max_delete, session): print "Checking for orphaned source packages..." # Get the list of source packages not in a suite and not used by # any binaries. - q = projectB.query(""" -SELECT s.id, s.file FROM source s, files f - WHERE f.last_used IS NULL AND s.file = f.id - AND NOT EXISTS (SELECT 1 FROM src_associations sa WHERE sa.source = s.id) - AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)""") + q = session.execute(""" +SELECT s.id, s.file, f.filename + FROM source s + LEFT JOIN files f + ON (s.file = f.id) + WHERE f.last_used IS NULL + AND s.id NOT IN + (SELECT sa.source FROM src_associations sa) + AND s.id NOT IN + (SELECT b.source FROM binaries b) + AND f.id NOT IN + (SELECT bqf.fileid FROM build_queue_files bqf)""") #### XXX: this should ignore cases where the files for the binary b #### have been marked for deletion (so the delay between bins go #### byebye and sources go byebye is 0 instead of StayOfExecution) - ql = q.getresult() - - projectB.query("BEGIN WORK") - for i in ql: + for i in q.fetchall(): source_id = i[0] dsc_file_id = i[1] + dsc_fname = i[2] # Mark the .dsc file for deletion - projectB.query("UPDATE files SET last_used = '%s' WHERE id = %s AND last_used IS NULL" % (now_date, dsc_file_id)) + Logger.log(["set lastused", dsc_fname]) + if not Options["No-Action"]: + session.execute("""UPDATE files SET last_used = :last_used + WHERE id = :dscfileid AND last_used IS NULL""", + {'last_used': now_date, 'dscfileid': dsc_file_id}) + # Mark all other files references by .dsc too if they're not used by anyone else - x = projectB.query("SELECT f.id FROM files f, dsc_files d WHERE d.source = %s AND d.file = f.id" % (source_id)) - for j in x.getresult(): + x = session.execute("""SELECT f.id, f.filename FROM files f, dsc_files d + WHERE d.source = :sourceid AND d.file = f.id""", + {'sourceid': source_id}) + for j in x.fetchall(): file_id = j[0] - y = projectB.query("SELECT id FROM dsc_files d WHERE d.file = %s" % (file_id)) - if len(y.getresult()) == 1: - projectB.query("UPDATE files SET last_used = '%s' WHERE id = %s AND last_used IS NULL" % (now_date, file_id)) - projectB.query("COMMIT WORK") + file_name = j[1] + y = session.execute("SELECT id FROM dsc_files d WHERE d.file = :fileid", {'fileid': file_id}) + if len(y.fetchall()) == 1: + Logger.log(["set lastused", file_name]) + if not Options["No-Action"]: + session.execute("""UPDATE files SET last_used = :lastused + WHERE id = :fileid AND last_used IS NULL""", + {'lastused': now_date, 'fileid': file_id}) + + if not Options["No-Action"]: + session.commit() # Check for any sources which are marked for deletion but which # are now used again. - - q = projectB.query(""" -SELECT f.id FROM source s, files f, dsc_files df + q = session.execute(""" +SELECT f.id, f.filename FROM source s, files f, dsc_files df WHERE f.last_used IS NOT NULL AND s.id = df.source AND df.file = f.id AND ((EXISTS (SELECT 1 FROM src_associations sa WHERE sa.source = s.id)) - OR (EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)))""") + OR (EXISTS (SELECT 1 FROM binaries b WHERE b.source = s.id)) + OR (EXISTS (SELECT 1 FROM build_queue_files bqf WHERE bqf.fileid = s.file)))""") #### XXX: this should also handle deleted binaries specially (ie, not #### reinstate sources because of them - ql = q.getresult() - # Could be done in SQL; but left this way for hysterical raisins - # [and freedom to innovate don'cha know?] - projectB.query("BEGIN WORK") - for i in ql: - file_id = i[0] - projectB.query("UPDATE files SET last_used = NULL WHERE id = %s" % (file_id)) - projectB.query("COMMIT WORK") + for i in q.fetchall(): + Logger.log(["unset lastused", i[1]]) + if not Options["No-Action"]: + session.execute("UPDATE files SET last_used = NULL WHERE id = :fileid", + {'fileid': i[0]}) -######################################## + if not Options["No-Action"]: + session.commit() -def check_files(): - global delete_date, now_date +######################################## +def check_files(now_date, delete_date, max_delete, session): # FIXME: this is evil; nothing should ever be in this state. if - # they are, it's a bug and the files should not be auto-deleted. + # they are, it's a bug. - return + # However, we've discovered it happens sometimes so we print a huge warning + # and then mark the file for deletion. This probably masks a bug somwhere + # else but is better than collecting cruft forever print "Checking for unused files..." - q = projectB.query(""" -SELECT id FROM files f + q = session.execute(""" +SELECT id, filename FROM files f WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = f.id) - AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = f.id)""") - - projectB.query("BEGIN WORK") - for i in q.getresult(): - file_id = i[0] - projectB.query("UPDATE files SET last_used = '%s' WHERE id = %s" % (now_date, file_id)) - projectB.query("COMMIT WORK") + AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = f.id) + AND NOT EXISTS (SELECT 1 FROM changes_pool_files cpf WHERE cpf.fileid = f.id) + AND NOT EXISTS (SELECT 1 FROM build_queue_files qf WHERE qf.fileid = f.id) + AND last_used IS NULL + ORDER BY filename""") + + ql = q.fetchall() + if len(ql) > 0: + utils.warn("check_files found something it shouldn't") + for x in ql: + utils.warn("orphaned file: %s" % x) + Logger.log(["set lastused", x[1], "ORPHANED FILE"]) + if not Options["No-Action"]: + session.execute("UPDATE files SET last_used = :lastused WHERE id = :fileid", + {'lastused': now_date, 'fileid': x[0]}) -def clean_binaries(): - global delete_date, now_date + if not Options["No-Action"]: + session.commit() +def clean_binaries(now_date, delete_date, max_delete, session): # We do this here so that the binaries we remove will have their # source also removed (if possible). # XXX: why doesn't this remove the files here as well? I don't think it # buys anything keeping this separate print "Cleaning binaries from the DB..." + print "Deleting from binaries table... " + for bin in session.query(DBBinary).join(DBBinary.poolfile).filter(PoolFile.last_used <= delete_date): + Logger.log(["delete binary", bin.poolfile.filename]) + if not Options["No-Action"]: + session.delete(bin) if not Options["No-Action"]: - before = time.time() - sys.stdout.write("[Deleting from binaries table... ") - projectB.query("DELETE FROM binaries WHERE EXISTS (SELECT 1 FROM files WHERE binaries.file = files.id AND files.last_used <= '%s')" % (delete_date)) - sys.stdout.write("done. (%d seconds)]\n" % (int(time.time()-before))) + session.commit() ######################################## -def clean(): - global delete_date, now_date +def clean(now_date, delete_date, max_delete, session): + cnf = Config() + count = 0 size = 0 print "Cleaning out packages..." - date = time.strftime("%Y-%m-%d") - dest = Cnf["Dir::Morgue"] + '/' + Cnf["Clean-Suites::MorgueSubDir"] + '/' + date - if not os.path.exists(dest): - os.mkdir(dest) + morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue')) + morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool') + + # Build directory as morguedir/morguesubdir/year/month/day + dest = os.path.join(morguedir, + morguesubdir, + str(now_date.year), + '%.2d' % now_date.month, + '%.2d' % now_date.day) + + if not Options["No-Action"] and not os.path.exists(dest): + os.makedirs(dest) # Delete from source + print "Deleting from source table... " + q = session.execute(""" +SELECT s.id, f.filename FROM source s, files f + WHERE f.last_used <= :deletedate + AND s.file = f.id + AND s.id NOT IN (SELECT src_id FROM extra_src_references)""", {'deletedate': delete_date}) + for s in q.fetchall(): + Logger.log(["delete source", s[1], s[0]]) + if not Options["No-Action"]: + session.execute("DELETE FROM dsc_files WHERE source = :s_id", {"s_id":s[0]}) + session.execute("DELETE FROM source WHERE id = :s_id", {"s_id":s[0]}) + if not Options["No-Action"]: - before = time.time() - sys.stdout.write("[Deleting from source table... ") - projectB.query("DELETE FROM dsc_files WHERE EXISTS (SELECT 1 FROM source s, files f, dsc_files df WHERE f.last_used <= '%s' AND s.file = f.id AND s.id = df.source AND df.id = dsc_files.id)" % (delete_date)) - projectB.query("DELETE FROM src_uploaders WHERE EXISTS (SELECT 1 FROM source s, files f WHERE f.last_used <= '%s' AND s.file = f.id AND s.id = src_uploaders.source)" % (delete_date)) - projectB.query("DELETE FROM source WHERE EXISTS (SELECT 1 FROM files WHERE source.file = files.id AND files.last_used <= '%s')" % (delete_date)) - sys.stdout.write("done. (%d seconds)]\n" % (int(time.time()-before))) + session.commit() # Delete files from the pool - q = projectB.query("SELECT l.path, f.filename FROM location l, files f WHERE f.last_used <= '%s' AND l.id = f.location" % (delete_date)) - for i in q.getresult(): - filename = i[0] + i[1] + old_files = session.query(PoolFile).filter(PoolFile.last_used <= delete_date) + if max_delete is not None: + old_files = old_files.limit(max_delete) + print "Limiting removals to %d" % max_delete + + for pf in old_files: + filename = os.path.join(pf.location.path, pf.filename) if not os.path.exists(filename): utils.warn("can not find '%s'." % (filename)) continue + Logger.log(["delete pool file", filename]) if os.path.isfile(filename): if os.path.islink(filename): count += 1 - if Options["No-Action"]: - print "Removing symlink %s..." % (filename) - else: + Logger.log(["delete symlink", filename]) + if not Options["No-Action"]: os.unlink(filename) else: size += os.stat(filename)[stat.ST_SIZE] @@ -226,130 +300,159 @@ def clean(): if os.path.exists(dest_filename): dest_filename = utils.find_next_free(dest_filename) - if Options["No-Action"]: - print "Cleaning %s -> %s ..." % (filename, dest_filename) - else: + Logger.log(["move to morgue", filename, dest_filename]) + if not Options["No-Action"]: utils.move(filename, dest_filename) + + if not Options["No-Action"]: + session.delete(pf) + session.commit() + else: utils.fubar("%s is neither symlink nor file?!" % (filename)) - # Delete from the 'files' table - if not Options["No-Action"]: - before = time.time() - sys.stdout.write("[Deleting from files table... ") - projectB.query("DELETE FROM files WHERE last_used <= '%s'" % (delete_date)) - sys.stdout.write("done. (%d seconds)]\n" % (int(time.time()-before))) if count > 0: - sys.stderr.write("Cleaned %d files, %s.\n" % (count, utils.size_type(size))) + Logger.log(["total", count, utils.size_type(size)]) + print "Cleaned %d files, %s." % (count, utils.size_type(size)) ################################################################################ -def clean_maintainers(): +def clean_maintainers(now_date, delete_date, max_delete, session): print "Cleaning out unused Maintainer entries..." - q = projectB.query(""" -SELECT m.id FROM maintainer m + # TODO Replace this whole thing with one SQL statement + q = session.execute(""" +SELECT m.id, m.name FROM maintainer m WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id) AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id) AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""") - ql = q.getresult() count = 0 - projectB.query("BEGIN WORK") - for i in ql: + + for i in q.fetchall(): maintainer_id = i[0] + Logger.log(["delete maintainer", i[1]]) if not Options["No-Action"]: - projectB.query("DELETE FROM maintainer WHERE id = %s" % (maintainer_id)) - count += 1 - projectB.query("COMMIT WORK") + session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id}) + count += 1 + + if not Options["No-Action"]: + session.commit() if count > 0: - sys.stderr.write("Cleared out %d maintainer entries.\n" % (count)) + Logger.log(["total", count]) + print "Cleared out %d maintainer entries." % (count) ################################################################################ -def clean_fingerprints(): +def clean_fingerprints(now_date, delete_date, max_delete, session): print "Cleaning out unused fingerprint entries..." - q = projectB.query(""" -SELECT f.id FROM fingerprint f + # TODO Replace this whole thing with one SQL statement + q = session.execute(""" +SELECT f.id, f.fingerprint FROM fingerprint f WHERE f.keyring IS NULL AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)""") - ql = q.getresult() count = 0 - projectB.query("BEGIN WORK") - for i in ql: + + for i in q.fetchall(): fingerprint_id = i[0] + Logger.log(["delete fingerprint", i[1]]) if not Options["No-Action"]: - projectB.query("DELETE FROM fingerprint WHERE id = %s" % (fingerprint_id)) - count += 1 - projectB.query("COMMIT WORK") + session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id}) + count += 1 + + if not Options["No-Action"]: + session.commit() if count > 0: - sys.stderr.write("Cleared out %d fingerprint entries.\n" % (count)) + Logger.log(["total", count]) + print "Cleared out %d fingerprint entries." % (count) ################################################################################ -def clean_queue_build(): - global now_date +def clean_empty_directories(session): + """ + Removes empty directories from pool directories. + """ - if not Cnf.ValueList("Dinstall::QueueBuildSuites") or Options["No-Action"]: - return + print "Cleaning out empty directories..." - print "Cleaning out queue build symlinks..." - - our_delete_date = time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time()-int(Cnf["Clean-Suites::QueueBuildStayOfExecution"]))) count = 0 - q = projectB.query("SELECT filename FROM queue_build WHERE last_used <= '%s'" % (our_delete_date)) - for i in q.getresult(): - filename = i[0] - if not os.path.exists(filename): - utils.warn("%s (from queue_build) doesn't exist." % (filename)) - continue - if not Cnf.FindB("Dinstall::SecurityQueueBuild") and not os.path.islink(filename): - utils.fubar("%s (from queue_build) should be a symlink but isn't." % (filename)) - os.unlink(filename) - count += 1 - projectB.query("DELETE FROM queue_build WHERE last_used <= '%s'" % (our_delete_date)) + cursor = session.execute( + "SELECT DISTINCT(path) FROM location WHERE type = :type", + {'type': 'pool'}, + ) + bases = [x[0] for x in cursor.fetchall()] + + for base in bases: + for dirpath, dirnames, filenames in os.walk(base, topdown=False): + if not filenames and not dirnames: + to_remove = os.path.join(base, dirpath) + if not Options["No-Action"]: + Logger.log(["removing directory", to_remove]) + os.removedirs(to_remove) + count += 1 if count: - sys.stderr.write("Cleaned %d queue_build files.\n" % (count)) + Logger.log(["total removed directories", count]) ################################################################################ def main(): - global Cnf, Options, projectB, delete_date, now_date + global Options, Logger - Cnf = utils.get_conf() - for i in ["Help", "No-Action" ]: - if not Cnf.has_key("Clean-Suites::Options::%s" % (i)): - Cnf["Clean-Suites::Options::%s" % (i)] = "" + cnf = Config() - Arguments = [('h',"help","Clean-Suites::Options::Help"), - ('n',"no-action","Clean-Suites::Options::No-Action")] + for i in ["Help", "No-Action", "Maximum" ]: + if not cnf.has_key("Clean-Suites::Options::%s" % (i)): + cnf["Clean-Suites::Options::%s" % (i)] = "" - apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv) - Options = Cnf.SubTree("Clean-Suites::Options") + Arguments = [('h',"help","Clean-Suites::Options::Help"), + ('n',"no-action","Clean-Suites::Options::No-Action"), + ('m',"maximum","Clean-Suites::Options::Maximum", "HasArg")] + + apt_pkg.ParseCommandLine(cnf.Cnf, Arguments, sys.argv) + Options = cnf.SubTree("Clean-Suites::Options") + + if cnf["Clean-Suites::Options::Maximum"] != "": + try: + # Only use Maximum if it's an integer + max_delete = int(cnf["Clean-Suites::Options::Maximum"]) + if max_delete < 1: + utils.fubar("If given, Maximum must be at least 1") + except ValueError as e: + utils.fubar("If given, Maximum must be an integer") + else: + max_delete = None if Options["Help"]: usage() - projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"])) + Logger = daklog.Logger("clean-suites", debug=Options["No-Action"]) + + session = DBConn().session() + + now_date = datetime.now() + + # Stay of execution; default to 1.5 days + soe = int(cnf.get('Clean-Suites::StayOfExecution', '129600')) + + delete_date = now_date - timedelta(seconds=soe) - now_date = time.strftime("%Y-%m-%d %H:%M") - delete_date = time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time()-int(Cnf["Clean-Suites::StayOfExecution"]))) + check_binaries(now_date, delete_date, max_delete, session) + clean_binaries(now_date, delete_date, max_delete, session) + check_sources(now_date, delete_date, max_delete, session) + check_files(now_date, delete_date, max_delete, session) + clean(now_date, delete_date, max_delete, session) + clean_maintainers(now_date, delete_date, max_delete, session) + clean_fingerprints(now_date, delete_date, max_delete, session) + clean_empty_directories(session) - check_binaries() - clean_binaries() - check_sources() - check_files() - clean() - clean_maintainers() - clean_fingerprints() - clean_queue_build() + Logger.close() ################################################################################