3 """ Cleans up unassociated binary and source packages
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7 @copyright: 2009 Mark Hymers <mhy@debian.org>
8 @copyright: 2010 Joerg Jaspert <joerg@debian.org>
9 @license: GNU General Public License version 2 or later
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 # 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it,
29 # | we're going to have to implement reference counting
30 # | through dependencies.. do we really want to go down
33 # 07:05|<Culus> elmo: Augh! <brain jumps out of skull>
35 ################################################################################
42 from datetime import datetime, timedelta
44 from daklib.config import Config
45 from daklib.dbconn import *
46 from daklib import utils
47 from daklib import daklog
49 ################################################################################
54 ################################################################################
56 def usage (exit_code=0):
57 print """Usage: dak clean-suites [OPTIONS]
58 Clean old packages from suites.
60 -n, --no-action don't do anything
61 -h, --help show this help and exit
62 -m, --maximum maximum number of files to remove"""
65 ################################################################################
67 def check_binaries(now_date, delete_date, max_delete, session):
68 print "Checking for orphaned binary packages..."
70 # Get the list of binary packages not in a suite and mark them for
72 # Check for any binaries which are marked for eventual deletion
73 # but are now used again.
78 af.archive_id AS archive_id,
79 af.file_id AS file_id,
80 af.component_id AS component_id,
81 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
82 JOIN suite s ON ba.suite = s.id
84 AND s.archive_id = af.archive_id))
86 FROM files_archive_map af
87 JOIN binaries b ON af.file_id = b.file
88 GROUP BY af.archive_id, af.file_id, af.component_id
91 UPDATE files_archive_map af
92 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
93 FROM usage, files f, archive
94 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
95 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
97 AND af.archive_id = archive.id
98 RETURNING archive.name, f.filename, af.last_used IS NULL"""
100 res = session.execute(query, {'last_used': now_date})
104 op = "unset lastused"
105 Logger.log([op, i[0], i[1]])
107 ########################################
109 def check_sources(now_date, delete_date, max_delete, session):
110 print "Checking for orphaned source packages..."
112 # Get the list of source packages not in a suite and not used by
115 #### XXX: this should ignore cases where the files for the binary b
116 #### have been marked for deletion (so the delay between bins go
117 #### byebye and sources go byebye is 0 instead of StayOfExecution)
119 # Check for any sources which are marked for deletion but which
120 # are now used again.
122 #### XXX: this should also handle deleted binaries specially (ie, not
123 #### reinstate sources because of them
125 # TODO: the UPDATE part is the same as in check_binaries. Merge?
130 af.archive_id AS archive_id,
131 af.file_id AS file_id,
132 af.component_id AS component_id,
133 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
134 JOIN suite s ON sa.suite = s.id
135 WHERE sa.source = df.source
136 AND s.archive_id = af.archive_id)
137 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
138 JOIN binaries b ON af_bin.file_id = b.file
139 WHERE b.source = df.source
140 AND af_bin.archive_id = af.archive_id)
141 OR EXISTS (SELECT 1 FROM extra_src_references esr
142 JOIN bin_associations ba ON esr.bin_id = ba.bin
143 JOIN binaries b ON ba.bin = b.id
144 JOIN suite s ON ba.suite = s.id
145 WHERE esr.src_id = df.source
146 AND s.archive_id = af.archive_id))
148 FROM files_archive_map af
149 JOIN dsc_files df ON af.file_id = df.file
150 GROUP BY af.archive_id, af.file_id, af.component_id
153 UPDATE files_archive_map af
154 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
155 FROM usage, files f, archive
156 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
157 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
158 AND af.file_id = f.id
159 AND af.archive_id = archive.id
161 RETURNING archive.name, f.filename, af.last_used IS NULL
164 res = session.execute(query, {'last_used': now_date})
168 op = "unset lastused"
169 Logger.log([op, i[0], i[1]])
171 ########################################
173 def check_files(now_date, delete_date, max_delete, session):
174 # FIXME: this is evil; nothing should ever be in this state. if
175 # they are, it's a bug.
177 # However, we've discovered it happens sometimes so we print a huge warning
178 # and then mark the file for deletion. This probably masks a bug somwhere
179 # else but is better than collecting cruft forever
181 print "Checking for unused files..."
182 q = session.execute("""
183 UPDATE files_archive_map af
184 SET last_used = :last_used
185 FROM files f, archive
186 WHERE af.file_id = f.id
187 AND af.archive_id = archive.id
188 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
189 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
190 AND af.last_used IS NULL
191 RETURNING archive.name, f.filename""", {'last_used': now_date})
194 utils.warn("orphaned file: {0}".format(x))
195 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
197 if not Options["No-Action"]:
200 def clean_binaries(now_date, delete_date, max_delete, session):
201 # We do this here so that the binaries we remove will have their
202 # source also removed (if possible).
204 # XXX: why doesn't this remove the files here as well? I don't think it
205 # buys anything keeping this separate
207 print "Deleting from binaries table... "
208 q = session.execute("""
209 DELETE FROM binaries b
212 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
213 WHERE af.file_id = b.file
214 AND (af.last_used IS NULL OR af.last_used >= :delete_date))
216 """, {'delete_date': delete_date})
218 Logger.log(["delete binary", b[0]])
220 ########################################
222 def clean(now_date, delete_date, max_delete, session):
228 print "Cleaning out packages..."
230 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue'))
231 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool')
233 # Build directory as morguedir/morguesubdir/year/month/day
234 dest = os.path.join(morguedir,
237 '%.2d' % now_date.month,
238 '%.2d' % now_date.day)
240 if not Options["No-Action"] and not os.path.exists(dest):
244 print "Deleting from source table... "
245 q = session.execute("""
250 WHERE source.file = f.id
251 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
252 WHERE af.file_id = source.file
253 AND (af.last_used IS NULL OR af.last_used >= :delete_date))
254 RETURNING source.id AS id, f.filename AS filename
256 deleted_dsc_files AS (
257 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
258 RETURNING df.file AS file_id
260 now_unused_source_files AS (
261 UPDATE files_archive_map af
262 SET last_used = :delete_date -- Kill it now. We waited long enough before removing the .dsc.
263 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
264 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
266 SELECT filename FROM deleted_sources""",
267 {'delete_date': delete_date})
269 Logger.log(["delete source", s[0]])
271 if not Options["No-Action"]:
274 # Delete files from the pool
275 old_files = session.query(ArchiveFile).filter(ArchiveFile.last_used <= delete_date).join(Archive)
276 if max_delete is not None:
277 old_files = old_files.limit(max_delete)
278 print "Limiting removals to %d" % max_delete
282 if not os.path.exists(filename):
283 Logger.log(["database referred to non-existing file", af.path])
286 Logger.log(["delete archive file", filename])
287 if os.path.isfile(filename):
288 if os.path.islink(filename):
290 Logger.log(["delete symlink", filename])
291 if not Options["No-Action"]:
294 size += os.stat(filename)[stat.ST_SIZE]
297 dest_filename = dest + '/' + os.path.basename(filename)
298 # If the destination file exists; try to find another filename to use
299 if os.path.exists(dest_filename):
300 dest_filename = utils.find_next_free(dest_filename)
302 if not Options["No-Action"]:
303 if af.archive.use_morgue:
304 Logger.log(["move to morgue", filename, dest_filename])
305 utils.move(filename, dest_filename)
307 Logger.log(["removed file", filename])
310 if not Options["No-Action"]:
315 utils.fubar("%s is neither symlink nor file?!" % (filename))
318 Logger.log(["total", count, utils.size_type(size)])
319 print "Cleaned %d files, %s." % (count, utils.size_type(size))
321 # Delete entries in files no longer referenced by any archive
324 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
326 session.execute(query)
328 if not Options["No-Action"]:
331 ################################################################################
333 def clean_maintainers(now_date, delete_date, max_delete, session):
334 print "Cleaning out unused Maintainer entries..."
336 # TODO Replace this whole thing with one SQL statement
337 q = session.execute("""
338 SELECT m.id, m.name FROM maintainer m
339 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
340 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
341 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""")
345 for i in q.fetchall():
347 Logger.log(["delete maintainer", i[1]])
348 if not Options["No-Action"]:
349 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id})
352 if not Options["No-Action"]:
356 Logger.log(["total", count])
357 print "Cleared out %d maintainer entries." % (count)
359 ################################################################################
361 def clean_fingerprints(now_date, delete_date, max_delete, session):
362 print "Cleaning out unused fingerprint entries..."
364 # TODO Replace this whole thing with one SQL statement
365 q = session.execute("""
366 SELECT f.id, f.fingerprint FROM fingerprint f
367 WHERE f.keyring IS NULL
368 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
369 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)""")
373 for i in q.fetchall():
374 fingerprint_id = i[0]
375 Logger.log(["delete fingerprint", i[1]])
376 if not Options["No-Action"]:
377 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id})
380 if not Options["No-Action"]:
384 Logger.log(["total", count])
385 print "Cleared out %d fingerprint entries." % (count)
387 ################################################################################
389 def clean_empty_directories(session):
391 Removes empty directories from pool directories.
394 print "Cleaning out empty directories..."
398 cursor = session.execute(
399 """SELECT DISTINCT(path) FROM archive"""
401 bases = [x[0] for x in cursor.fetchall()]
404 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
405 if not filenames and not dirnames:
406 to_remove = os.path.join(base, dirpath)
407 if not Options["No-Action"]:
408 Logger.log(["removing directory", to_remove])
409 os.removedirs(to_remove)
413 Logger.log(["total removed directories", count])
415 ################################################################################
418 global Options, Logger
422 for i in ["Help", "No-Action", "Maximum" ]:
423 if not cnf.has_key("Clean-Suites::Options::%s" % (i)):
424 cnf["Clean-Suites::Options::%s" % (i)] = ""
426 Arguments = [('h',"help","Clean-Suites::Options::Help"),
427 ('n',"no-action","Clean-Suites::Options::No-Action"),
428 ('m',"maximum","Clean-Suites::Options::Maximum", "HasArg")]
430 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
431 Options = cnf.subtree("Clean-Suites::Options")
433 if cnf["Clean-Suites::Options::Maximum"] != "":
435 # Only use Maximum if it's an integer
436 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
438 utils.fubar("If given, Maximum must be at least 1")
439 except ValueError as e:
440 utils.fubar("If given, Maximum must be an integer")
447 Logger = daklog.Logger("clean-suites", debug=Options["No-Action"])
449 session = DBConn().session()
451 now_date = datetime.now()
453 # Stay of execution; default to 1.5 days
454 soe = int(cnf.get('Clean-Suites::StayOfExecution', '129600'))
456 delete_date = now_date - timedelta(seconds=soe)
458 check_binaries(now_date, delete_date, max_delete, session)
459 clean_binaries(now_date, delete_date, max_delete, session)
460 check_sources(now_date, delete_date, max_delete, session)
461 check_files(now_date, delete_date, max_delete, session)
462 clean(now_date, delete_date, max_delete, session)
463 clean_maintainers(now_date, delete_date, max_delete, session)
464 clean_fingerprints(now_date, delete_date, max_delete, session)
465 clean_empty_directories(session)
471 ################################################################################
473 if __name__ == '__main__':