3 """ Cleans up unassociated binary and source packages
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7 @copyright: 2009 Mark Hymers <mhy@debian.org>
8 @copyright: 2010 Joerg Jaspert <joerg@debian.org>
9 @license: GNU General Public License version 2 or later
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 # 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it,
29 # | we're going to have to implement reference counting
30 # | through dependencies.. do we really want to go down
33 # 07:05|<Culus> elmo: Augh! <brain jumps out of skull>
35 ################################################################################
43 from datetime import datetime, timedelta
45 from daklib.config import Config
46 from daklib.dbconn import *
47 from daklib import utils
48 from daklib import daklog
50 ################################################################################
55 ################################################################################
57 def usage (exit_code=0):
58 print """Usage: dak clean-suites [OPTIONS]
59 Clean old packages from suites.
61 -n, --no-action don't do anything
62 -h, --help show this help and exit
63 -m, --maximum maximum number of files to remove"""
66 ################################################################################
68 def check_binaries(now_date, session):
69 Logger.log(["Checking for orphaned binary packages..."])
71 # Get the list of binary packages not in a suite and mark them for
73 # Check for any binaries which are marked for eventual deletion
74 # but are now used again.
79 af.archive_id AS archive_id,
80 af.file_id AS file_id,
81 af.component_id AS component_id,
82 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
83 JOIN suite s ON ba.suite = s.id
85 AND s.archive_id = af.archive_id))
87 FROM files_archive_map af
88 JOIN binaries b ON af.file_id = b.file
89 GROUP BY af.archive_id, af.file_id, af.component_id
92 UPDATE files_archive_map af
93 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
94 FROM usage, files f, archive
95 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
96 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
98 AND af.archive_id = archive.id
99 RETURNING archive.name, f.filename, af.last_used IS NULL"""
101 res = session.execute(query, {'last_used': now_date})
105 op = "unset lastused"
106 Logger.log([op, i[0], i[1]])
108 ########################################
110 def check_sources(now_date, session):
111 Logger.log(["Checking for orphaned source packages..."])
113 # Get the list of source packages not in a suite and not used by
116 # Check for any sources which are marked for deletion but which
117 # are now used again.
119 # TODO: the UPDATE part is the same as in check_binaries. Merge?
124 af.archive_id AS archive_id,
125 af.file_id AS file_id,
126 af.component_id AS component_id,
127 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
128 JOIN suite s ON sa.suite = s.id
129 WHERE sa.source = df.source
130 AND s.archive_id = af.archive_id)
131 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
132 JOIN binaries b ON af_bin.file_id = b.file
133 WHERE b.source = df.source
134 AND af_bin.archive_id = af.archive_id
135 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
136 OR EXISTS (SELECT 1 FROM extra_src_references esr
137 JOIN bin_associations ba ON esr.bin_id = ba.bin
138 JOIN binaries b ON ba.bin = b.id
139 JOIN suite s ON ba.suite = s.id
140 WHERE esr.src_id = df.source
141 AND s.archive_id = af.archive_id))
143 FROM files_archive_map af
144 JOIN dsc_files df ON af.file_id = df.file
145 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
146 GROUP BY af.archive_id, af.file_id, af.component_id
149 UPDATE files_archive_map af
150 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
151 FROM usage, files f, archive
152 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
153 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
154 AND af.file_id = f.id
155 AND af.archive_id = archive.id
157 RETURNING archive.name, f.filename, af.last_used IS NULL
160 res = session.execute(query, {'last_used': now_date})
164 op = "unset lastused"
165 Logger.log([op, i[0], i[1]])
167 ########################################
169 def check_files(now_date, session):
170 # FIXME: this is evil; nothing should ever be in this state. if
171 # they are, it's a bug.
173 # However, we've discovered it happens sometimes so we print a huge warning
174 # and then mark the file for deletion. This probably masks a bug somwhere
175 # else but is better than collecting cruft forever
177 Logger.log(["Checking for unused files..."])
178 q = session.execute("""
179 UPDATE files_archive_map af
180 SET last_used = :last_used
181 FROM files f, archive
182 WHERE af.file_id = f.id
183 AND af.archive_id = archive.id
184 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
185 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
186 AND af.last_used IS NULL
187 RETURNING archive.name, f.filename""", {'last_used': now_date})
190 utils.warn("orphaned file: {0}".format(x))
191 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
193 if not Options["No-Action"]:
196 def clean_binaries(now_date, session):
197 # We do this here so that the binaries we remove will have their
198 # source also removed (if possible).
200 # XXX: why doesn't this remove the files here as well? I don't think it
201 # buys anything keeping this separate
203 Logger.log(["Deleting from binaries table... "])
204 q = session.execute("""
205 DELETE FROM binaries b
208 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
209 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
210 WHERE af.file_id = b.file
211 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
215 Logger.log(["delete binary", b[0]])
217 ########################################
219 def clean(now_date, archives, max_delete, session):
225 Logger.log(["Cleaning out packages..."])
227 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue'))
228 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool')
230 # Build directory as morguedir/morguesubdir/year/month/day
231 dest = os.path.join(morguedir,
234 '%.2d' % now_date.month,
235 '%.2d' % now_date.day)
237 if not Options["No-Action"] and not os.path.exists(dest):
241 Logger.log(["Deleting from source table..."])
242 q = session.execute("""
247 WHERE source.file = f.id
248 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
249 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
250 WHERE af.file_id = source.file
251 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
252 RETURNING source.id AS id, f.filename AS filename
254 deleted_dsc_files AS (
255 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
256 RETURNING df.file AS file_id
258 now_unused_source_files AS (
259 UPDATE files_archive_map af
260 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
261 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
262 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
264 SELECT filename FROM deleted_sources""")
266 Logger.log(["delete source", s[0]])
268 if not Options["No-Action"]:
271 # Delete files from the pool
272 old_files = session.query(ArchiveFile).filter('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)').join(Archive)
273 if max_delete is not None:
274 old_files = old_files.limit(max_delete)
275 Logger.log(["Limiting removals to %d" % max_delete])
277 if archives is not None:
278 archive_ids = [ a.archive_id for a in archives ]
279 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
283 if not os.path.exists(filename):
284 Logger.log(["database referred to non-existing file", af.path])
287 Logger.log(["delete archive file", filename])
288 if os.path.isfile(filename):
289 if os.path.islink(filename):
291 Logger.log(["delete symlink", filename])
292 if not Options["No-Action"]:
295 size += os.stat(filename)[stat.ST_SIZE]
298 dest_filename = dest + '/' + os.path.basename(filename)
299 # If the destination file exists; try to find another filename to use
300 if os.path.lexists(dest_filename):
301 dest_filename = utils.find_next_free(dest_filename)
303 if not Options["No-Action"]:
304 if af.archive.use_morgue:
305 Logger.log(["move to morgue", filename, dest_filename])
306 utils.move(filename, dest_filename)
308 Logger.log(["removed file", filename])
311 if not Options["No-Action"]:
316 utils.fubar("%s is neither symlink nor file?!" % (filename))
319 Logger.log(["total", count, utils.size_type(size)])
321 # Delete entries in files no longer referenced by any archive
324 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
326 session.execute(query)
328 if not Options["No-Action"]:
331 ################################################################################
333 def clean_maintainers(now_date, session):
334 Logger.log(["Cleaning out unused Maintainer entries..."])
336 # TODO Replace this whole thing with one SQL statement
337 q = session.execute("""
338 SELECT m.id, m.name FROM maintainer m
339 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
340 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
341 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""")
345 for i in q.fetchall():
347 Logger.log(["delete maintainer", i[1]])
348 if not Options["No-Action"]:
349 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id})
352 if not Options["No-Action"]:
356 Logger.log(["total", count])
358 ################################################################################
360 def clean_fingerprints(now_date, session):
361 Logger.log(["Cleaning out unused fingerprint entries..."])
363 # TODO Replace this whole thing with one SQL statement
364 q = session.execute("""
365 SELECT f.id, f.fingerprint FROM fingerprint f
366 WHERE f.keyring IS NULL
367 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
368 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)
369 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""")
373 for i in q.fetchall():
374 fingerprint_id = i[0]
375 Logger.log(["delete fingerprint", i[1]])
376 if not Options["No-Action"]:
377 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id})
380 if not Options["No-Action"]:
384 Logger.log(["total", count])
386 ################################################################################
388 def clean_byhash(now_date, session):
389 Logger.log(["Cleaning out unused by-hash files..."])
391 q = session.execute("""
392 DELETE FROM hashfile h
393 USING suite s, archive a
394 WHERE s.id = h.suite_id
395 AND a.id = s.archive_id
396 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
397 RETURNING a.path, s.suite_name, h.path""")
400 if not Options["No-Action"]:
401 for base, suite, path in q:
402 filename = os.path.join(base, 'dists', suite, path)
405 except OSError as exc:
406 if exc.errno != errno.ENOENT:
408 Logger.log(['database referred to non-existing file', filename])
410 Logger.log(['delete hashfile', suite, path])
414 Logger.log(["total", count])
416 ################################################################################
418 def clean_empty_directories(session):
420 Removes empty directories from pool directories.
423 Logger.log(["Cleaning out empty directories..."])
427 cursor = session.execute(
428 """SELECT DISTINCT(path) FROM archive"""
430 bases = [x[0] for x in cursor.fetchall()]
433 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
434 if not filenames and not dirnames:
435 to_remove = os.path.join(base, dirpath)
436 if not Options["No-Action"]:
437 Logger.log(["removing directory", to_remove])
438 os.removedirs(to_remove)
442 Logger.log(["total removed directories", count])
444 ################################################################################
446 def set_archive_delete_dates(now_date, session):
448 CREATE TEMPORARY TABLE archive_delete_date (
449 archive_id INT NOT NULL,
450 delete_date TIMESTAMP NOT NULL
454 INSERT INTO archive_delete_date
455 (archive_id, delete_date)
457 archive.id, :now_date - archive.stayofexecution
458 FROM archive""", {'now_date': now_date})
462 ################################################################################
465 global Options, Logger
469 for i in ["Help", "No-Action", "Maximum" ]:
470 if not cnf.has_key("Clean-Suites::Options::%s" % (i)):
471 cnf["Clean-Suites::Options::%s" % (i)] = ""
473 Arguments = [('h',"help","Clean-Suites::Options::Help"),
474 ('a','archive','Clean-Suites::Options::Archive','HasArg'),
475 ('n',"no-action","Clean-Suites::Options::No-Action"),
476 ('m',"maximum","Clean-Suites::Options::Maximum", "HasArg")]
478 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
479 Options = cnf.subtree("Clean-Suites::Options")
481 if cnf["Clean-Suites::Options::Maximum"] != "":
483 # Only use Maximum if it's an integer
484 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
486 utils.fubar("If given, Maximum must be at least 1")
487 except ValueError as e:
488 utils.fubar("If given, Maximum must be an integer")
495 program = "clean-suites"
496 if Options['No-Action']:
497 program = "clean-suites (no action)"
498 Logger = daklog.Logger(program, debug=Options["No-Action"])
500 session = DBConn().session()
503 if 'Archive' in Options:
504 archive_names = Options['Archive'].split(',')
505 archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
506 if len(archives) == 0:
507 utils.fubar('Unknown archive.')
509 now_date = datetime.now()
511 set_archive_delete_dates(now_date, session)
513 check_binaries(now_date, session)
514 clean_binaries(now_date, session)
515 check_sources(now_date, session)
516 check_files(now_date, session)
517 clean(now_date, archives, max_delete, session)
518 clean_maintainers(now_date, session)
519 clean_fingerprints(now_date, session)
520 clean_byhash(now_date, session)
521 clean_empty_directories(session)
527 ################################################################################
529 if __name__ == '__main__':