4 Check for obsolete binary packages
6 @contact: Debian FTP Master <ftpmaster@debian.org>
7 @copyright: 2000-2006 James Troup <james@nocrew.org>
8 @copyright: 2009 Torsten Werner <twerner@debian.org>
9 @license: GNU General Public License version 2 or later
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 # ``If you're claiming that's a "problem" that needs to be "fixed",
29 # you might as well write some letters to God about how unfair entropy
30 # is while you're at it.'' -- 20020802143104.GA5628@azure.humbug.org.au
32 ## TODO: fix NBS looping for version, implement Dubious NBS, fix up output of
33 ## duplicate source package stuff, improve experimental ?, add overrides,
34 ## avoid ANAIS for duplicated packages
36 ################################################################################
38 import commands, os, sys, re
41 from daklib.config import Config
42 from daklib.dbconn import *
43 from daklib import utils
44 from daklib.regexes import re_extract_src_version
46 ################################################################################
48 no_longer_in_suite = {}; # Really should be static to add_nbs, but I'm lazy
53 ################################################################################
55 def usage(exit_code=0):
56 print """Usage: dak cruft-report
57 Check for obsolete or duplicated packages.
59 -h, --help show this help and exit.
60 -m, --mode=MODE chose the MODE to run in (full or daily).
61 -s, --suite=SUITE check suite SUITE.
62 -w, --wanna-build-dump where to find the copies of http://buildd.debian.org/stats/*.txt"""
65 ################################################################################
67 def add_nbs(nbs_d, source, version, package, suite_id, session):
68 # Ensure the package is still in the suite (someone may have already removed it)
69 if no_longer_in_suite.has_key(package):
72 q = session.execute("""SELECT b.id FROM binaries b, bin_associations ba
73 WHERE ba.bin = b.id AND ba.suite = :suite_id
74 AND b.package = :package LIMIT 1""", {'suite_id': suite_id,
77 no_longer_in_suite[package] = ""
80 nbs_d.setdefault(source, {})
81 nbs_d[source].setdefault(version, {})
82 nbs_d[source][version][package] = ""
84 ################################################################################
86 # Check for packages built on architectures they shouldn't be.
87 def do_anais(architecture, binaries_list, source, session):
88 if architecture == "any" or architecture == "all":
93 for arch in architecture.split():
94 architectures[arch.strip()] = ""
95 for binary in binaries_list:
96 q = session.execute("""SELECT a.arch_string, b.version
97 FROM binaries b, bin_associations ba, architecture a
98 WHERE ba.suite = :suiteid AND ba.bin = b.id
99 AND b.architecture = a.id AND b.package = :package""",
100 {'suiteid': suite_id, 'package': binary})
106 if architectures.has_key(arch):
107 versions.append(version)
108 versions.sort(apt_pkg.VersionCompare)
110 latest_version = versions.pop()
112 latest_version = None
113 # Check for 'invalid' architectures
118 if not architectures.has_key(arch):
119 versions_d.setdefault(version, [])
120 versions_d[version].append(arch)
123 anais_output += "\n (*) %s_%s [%s]: %s\n" % (binary, latest_version, source, architecture)
124 versions = versions_d.keys()
125 versions.sort(apt_pkg.VersionCompare)
126 for version in versions:
127 arches = versions_d[version]
129 anais_output += " o %s: %s\n" % (version, ", ".join(arches))
133 ################################################################################
135 # Check for out-of-date binaries on architectures that do not want to build that
136 # package any more, and have them listed as Not-For-Us
137 def do_nfu(nfu_packages):
142 for architecture in nfu_packages:
143 a2p[architecture] = []
144 for (package,bver,sver) in nfu_packages[architecture]:
145 output += " * [%s] does not want %s (binary %s, source %s)\n" % (architecture, package, bver, sver)
146 a2p[architecture].append(package)
150 print "Obsolete by Not-For-Us"
151 print "----------------------"
155 print "Suggested commands:"
156 for architecture in a2p:
157 if a2p[architecture]:
158 print (" dak rm -m \"[auto-cruft] NFU\" -s %s -a %s -b %s" %
159 (suite, architecture, " ".join(a2p[architecture])))
162 def parse_nfu(architecture):
164 # utils/hpodder_1.1.5.0: Not-For-Us [optional:out-of-date]
165 r = re.compile("^\w+/([^_]+)_.*: Not-For-Us")
169 filename = "%s/%s-all.txt" % (cnf["Cruft-Report::Options::Wanna-Build-Dump"], architecture)
171 # Not all architectures may have a wanna-build dump, so we want to ignore missin
173 if os.path.exists(filename):
174 f = utils.open_file(filename)
185 utils.warn("No wanna-build dump file for architecture %s" % architecture)
188 ################################################################################
190 def do_newer_version(lowersuite_name, highersuite_name, code, session):
191 lowersuite = get_suite(lowersuite_name, session)
195 highersuite = get_suite(highersuite_name, session)
199 # Check for packages in $highersuite obsoleted by versions in $lowersuite
200 q = session.execute("""
201 WITH highersuite_maxversion AS (SELECT s.source AS source, max(s.version) AS version
202 FROM src_associations sa, source s
203 WHERE sa.suite = :highersuite_id AND sa.source = s.id group by s.source)
204 SELECT s.source, s.version AS lower, s2.version AS higher
205 FROM src_associations sa, source s, source s2, src_associations sa2, highersuite_maxversion hm
206 WHERE sa.suite = :highersuite_id AND sa2.suite = :lowersuite_id AND sa.source = s.id
207 AND sa2.source = s2.id AND s.source = s2.source
208 AND hm.source = s.source AND hm.version < s2.version
209 AND s.version < s2.version""", {'lowersuite_id': lowersuite.suite_id,
210 'highersuite_id': highersuite.suite_id})
214 print "Newer version in %s" % lowersuite.suite_name
215 print "-----------------" + "-" * len(lowersuite.suite_name)
218 (source, higher_version, lower_version) = i
219 print " o %s (%s, %s)" % (source, higher_version, lower_version)
220 nv_to_remove.append(source)
222 print "Suggested command:"
223 print " dak rm -m \"[auto-cruft] %s\" -s %s %s" % (code, highersuite.suite_name,
224 " ".join(nv_to_remove))
227 ################################################################################
229 def queryWithoutSource(suite_id, session):
230 """searches for arch: all packages from suite that do no longer
231 reference a source package in the same suite
233 subquery unique_binaries: selects all packages with only 1 version
234 in suite since 'dak rm' does not allow to specify version numbers"""
237 with unique_binaries as
238 (select package, max(version) as version, max(source) as source
239 from bin_associations_binaries
240 where architecture = 2 and suite = :suite_id
241 group by package having count(*) = 1)
242 select ub.package, ub.version
243 from unique_binaries ub
244 left join src_associations_src sas
245 on ub.source = sas.src and sas.suite = :suite_id
247 order by ub.package"""
248 return session.execute(query, { 'suite_id': suite_id })
250 def reportWithoutSource(suite_name, suite_id, session):
251 rows = queryWithoutSource(suite_id, session)
252 title = 'packages without source in suite %s' % suite_name
253 if rows.rowcount > 0:
254 print '%s\n%s\n' % (title, '-' * len(title))
255 message = '"[auto-cruft] no longer built from source"'
257 (package, version) = row
258 print "* package %s in version %s is no longer built from source" % \
260 print " - suggested command:"
261 print " dak rm -m %s -s %s -a all -p -R -b %s\n" % \
262 (message, suite_name, package)
264 def queryNewerAll(suite_name, session):
265 """searches for arch != all packages that have an arch == all
266 package with a higher version in the same suite"""
269 select bab1.package, bab1.version as oldver,
270 array_to_string(array_agg(a.arch_string), ',') as oldarch,
271 bab2.version as newver
272 from bin_associations_binaries bab1
273 join bin_associations_binaries bab2
274 on bab1.package = bab2.package and bab1.version < bab2.version and
275 bab1.suite = bab2.suite and bab1.architecture > 2 and
276 bab2.architecture = 2
277 join architecture a on bab1.architecture = a.id
278 join suite s on bab1.suite = s.id
279 where s.suite_name = :suite_name
280 group by bab1.package, oldver, bab1.suite, newver"""
281 return session.execute(query, { 'suite_name': suite_name })
283 def reportNewerAll(suite_name, session):
284 rows = queryNewerAll(suite_name, session)
285 title = 'obsolete arch any packages in suite %s' % suite_name
286 if rows.rowcount > 0:
287 print '%s\n%s\n' % (title, '-' * len(title))
288 message = '"[auto-cruft] obsolete arch any package"'
290 (package, oldver, oldarch, newver) = row
291 print "* package %s is arch any in version %s but arch all in version %s" % \
292 (package, oldver, newver)
293 print " - suggested command:"
294 print " dak rm -m %s -s %s -a %s -p -b %s\n" % \
295 (message, suite_name, oldarch, package)
297 def queryNBS(suite_id, session):
298 """This one is really complex. It searches arch != all packages that
299 are no longer built from current source packages in suite.
301 temp table unique_binaries: will be populated with packages that
302 have only one version in suite because 'dak rm' does not allow
303 specifying version numbers
305 temp table newest_binaries: will be populated with packages that are
306 built from current sources
308 subquery uptodate_arch: returns all architectures built from current
311 subquery unique_binaries_uptodate_arch: returns all packages in
312 architectures from uptodate_arch
314 subquery unique_binaries_uptodate_arch_agg: same as
315 unique_binaries_uptodate_arch but with column architecture
318 subquery uptodate_packages: similar to uptodate_arch but returns all
319 packages built from current sources
321 subquery outdated_packages: returns all packages with architectures
322 no longer built from current source
326 create temp table unique_binaries (
327 package text not null,
328 architecture integer not null,
329 source integer not null);
331 insert into unique_binaries
332 select bab.package, bab.architecture, max(bab.source)
333 from bin_associations_binaries bab
334 where bab.suite = :suite_id and bab.architecture > 2
335 group by package, architecture having count(*) = 1;
337 create temp table newest_binaries (
338 package text not null,
339 architecture integer not null,
340 source text not null,
341 version debversion not null);
343 insert into newest_binaries
344 select ub.package, ub.architecture, nsa.source, nsa.version
345 from unique_binaries ub
346 join newest_src_association nsa
347 on ub.source = nsa.src and nsa.suite = :suite_id;
349 with uptodate_arch as
350 (select architecture, source, version
352 group by architecture, source, version),
353 unique_binaries_uptodate_arch as
354 (select ub.package, ub.architecture, ua.source, ua.version
355 from unique_binaries ub
358 join uptodate_arch ua
359 on ub.architecture = ua.architecture and s.source = ua.source),
360 unique_binaries_uptodate_arch_agg as
361 (select ubua.package,
362 array(select unnest(array_agg(a.arch_string)) order by 1) as arch_list,
363 ubua.source, ubua.version
364 from unique_binaries_uptodate_arch ubua
366 on ubua.architecture = a.id
367 group by ubua.source, ubua.version, ubua.package),
369 (select package, source, version
371 group by package, source, version),
373 (select array(select unnest(array_agg(package)) order by 1) as pkg_list,
374 arch_list, source, version
375 from unique_binaries_uptodate_arch_agg
377 (select package from uptodate_packages)
378 group by arch_list, source, version)
379 select * from outdated_packages order by source"""
380 return session.execute(query, { 'suite_id': suite_id })
382 def reportNBS(suite_name, suite_id):
383 session = DBConn().session()
384 nbsRows = queryNBS(suite_id, session)
385 title = 'NBS packages in suite %s' % suite_name
386 if nbsRows.rowcount > 0:
387 print '%s\n%s\n' % (title, '-' * len(title))
389 (pkg_list, arch_list, source, version) = row
390 pkg_string = ' '.join(pkg_list)
391 arch_string = ','.join(arch_list)
392 print "* source package %s version %s no longer builds" % \
394 print " binary package(s): %s" % pkg_string
395 print " on %s" % arch_string
396 print " - suggested command:"
397 message = '"[auto-cruft] NBS (no longer built by %s)"' % source
398 print " dak rm -m %s -s %s -a %s -p -R -b %s\n" % \
399 (message, suite_name, arch_string, pkg_string)
402 def reportAllNBS(suite_name, suite_id, session):
403 reportWithoutSource(suite_name, suite_id, session)
404 reportNewerAll(suite_name, session)
405 reportNBS(suite_name, suite_id)
407 ################################################################################
409 def do_dubious_nbs(dubious_nbs):
414 dubious_nbs_keys = dubious_nbs.keys()
415 dubious_nbs_keys.sort()
416 for source in dubious_nbs_keys:
417 print " * %s_%s builds: %s" % (source,
418 source_versions.get(source, "??"),
419 source_binaries.get(source, "(source does not exist)"))
420 print " won't admit to building:"
421 versions = dubious_nbs[source].keys()
422 versions.sort(apt_pkg.VersionCompare)
423 for version in versions:
424 packages = dubious_nbs[source][version].keys()
426 print " o %s: %s" % (version, ", ".join(packages))
430 ################################################################################
432 def obsolete_source(suite_name, session):
433 """returns obsolete source packages for suite_name without binaries
434 in the same suite sorted by install_date; install_date should help
435 detecting source only (or binary throw away) uploads; duplicates in
436 the suite are skipped
438 subquery 'source_suite_unique' returns source package names from
439 suite without duplicates; the rationale behind is that neither
440 cruft-report nor rm cannot handle duplicates (yet)"""
443 WITH source_suite_unique AS
444 (SELECT source, suite
445 FROM source_suite GROUP BY source, suite HAVING count(*) = 1)
446 SELECT ss.src, ss.source, ss.version,
447 to_char(ss.install_date, 'YYYY-MM-DD') AS install_date
449 JOIN source_suite_unique ssu
450 ON ss.source = ssu.source AND ss.suite = ssu.suite
451 JOIN suite s ON s.id = ss.suite
452 LEFT JOIN bin_associations_binaries bab
453 ON ss.src = bab.source AND ss.suite = bab.suite
454 WHERE s.suite_name = :suite_name AND bab.id IS NULL
455 ORDER BY install_date"""
456 args = { 'suite_name': suite_name }
457 return session.execute(query, args)
459 def source_bin(source, session):
460 """returns binaries built by source for all or no suite grouped and
461 ordered by package name"""
466 JOIN src_associations_src sas ON b.source = sas.src
467 WHERE sas.source = :source
469 ORDER BY b.package"""
470 args = { 'source': source }
471 return session.execute(query, args)
473 def newest_source_bab(suite_name, package, session):
474 """returns newest source that builds binary package in suite grouped
475 and sorted by source and package name"""
478 SELECT sas.source, MAX(sas.version) AS srcver
479 FROM src_associations_src sas
480 JOIN bin_associations_binaries bab ON sas.src = bab.source
481 JOIN suite s on s.id = bab.suite
482 WHERE s.suite_name = :suite_name AND bab.package = :package
483 GROUP BY sas.source, bab.package
484 ORDER BY sas.source, bab.package"""
485 args = { 'suite_name': suite_name, 'package': package }
486 return session.execute(query, args)
488 def report_obsolete_source(suite_name, session):
489 rows = obsolete_source(suite_name, session)
490 if rows.rowcount == 0:
493 """Obsolete source packages in suite %s
494 ----------------------------------%s\n""" % \
495 (suite_name, '-' * len(suite_name))
496 for os_row in rows.fetchall():
497 (src, old_source, version, install_date) = os_row
498 print " * obsolete source %s version %s installed at %s" % \
499 (old_source, version, install_date)
500 for sb_row in source_bin(old_source, session):
502 print " - has built binary %s" % package
503 for nsb_row in newest_source_bab(suite_name, package, session):
504 (new_source, srcver) = nsb_row
505 print " currently built by source %s version %s" % \
507 print " - suggested command:"
508 rm_opts = "-S -p -m \"[auto-cruft] obsolete source package\""
509 print " dak rm -s %s %s %s\n" % (suite_name, rm_opts, old_source)
511 def get_suite_binaries(suite, session):
512 # Initalize a large hash table of all binary packages
515 print "Getting a list of binary packages in %s..." % suite.suite_name
516 q = session.execute("""SELECT distinct b.package
517 FROM binaries b, bin_associations ba
518 WHERE ba.suite = :suiteid AND ba.bin = b.id""",
519 {'suiteid': suite.suite_id})
520 for i in q.fetchall():
525 ################################################################################
528 global suite, suite_id, source_binaries, source_versions
532 Arguments = [('h',"help","Cruft-Report::Options::Help"),
533 ('m',"mode","Cruft-Report::Options::Mode", "HasArg"),
534 ('s',"suite","Cruft-Report::Options::Suite","HasArg"),
535 ('w',"wanna-build-dump","Cruft-Report::Options::Wanna-Build-Dump","HasArg")]
537 if not cnf.has_key("Cruft-Report::Options::%s" % (i)):
538 cnf["Cruft-Report::Options::%s" % (i)] = ""
539 cnf["Cruft-Report::Options::Suite"] = cnf["Dinstall::DefaultSuite"]
541 if not cnf.has_key("Cruft-Report::Options::Mode"):
542 cnf["Cruft-Report::Options::Mode"] = "daily"
544 if not cnf.has_key("Cruft-Report::Options::Wanna-Build-Dump"):
545 cnf["Cruft-Report::Options::Wanna-Build-Dump"] = "/srv/ftp.debian.org/scripts/nfu"
547 apt_pkg.ParseCommandLine(cnf.Cnf, Arguments, sys.argv)
549 Options = cnf.SubTree("Cruft-Report::Options")
553 # Set up checks based on mode
554 if Options["Mode"] == "daily":
555 checks = [ "nbs", "nviu", "nvit", "obsolete source" ]
556 elif Options["Mode"] == "full":
557 checks = [ "nbs", "nviu", "nvit", "obsolete source", "nfu", "dubious nbs", "bnb", "bms", "anais" ]
559 utils.warn("%s is not a recognised mode - only 'full' or 'daily' are understood." % (Options["Mode"]))
562 session = DBConn().session()
576 suite = get_suite(Options["Suite"].lower(), session)
577 suite_id = suite.suite_id
578 suite_name = suite.suite_name.lower()
580 if "obsolete source" in checks:
581 report_obsolete_source(suite_name, session)
584 reportAllNBS(suite_name, suite_id, session)
589 bins_in_suite = get_suite_binaries(suite, session)
591 # Checks based on the Sources files
592 components = cnf.ValueList("Suite::%s::Components" % (suite_name))
593 for component in components:
594 filename = "%s/dists/%s/%s/source/Sources.gz" % (cnf["Dir::Root"], suite_name, component)
595 # apt_pkg.ParseTagFile needs a real file handle and can't handle a GzipFile instance...
596 (fd, temp_filename) = utils.temp_filename()
597 (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename))
599 sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output))
601 sources = utils.open_file(temp_filename)
602 Sources = apt_pkg.ParseTagFile(sources)
603 while Sources.Step():
604 source = Sources.Section.Find('Package')
605 source_version = Sources.Section.Find('Version')
606 architecture = Sources.Section.Find('Architecture')
607 binaries = Sources.Section.Find('Binary')
608 binaries_list = [ i.strip() for i in binaries.split(',') ]
611 # Check for binaries not built on any architecture.
612 for binary in binaries_list:
613 if not bins_in_suite.has_key(binary):
614 bin_not_built.setdefault(source, {})
615 bin_not_built[source][binary] = ""
617 if "anais" in checks:
618 anais_output += do_anais(architecture, binaries_list, source, session)
620 # Check for duplicated packages and build indices for checking "no source" later
621 source_index = component + '/' + source
622 #if src_pkgs.has_key(source):
623 # print " %s is a duplicated source package (%s and %s)" % (source, source_index, src_pkgs[source])
624 src_pkgs[source] = source_index
625 for binary in binaries_list:
626 if bin_pkgs.has_key(binary):
627 key_list = [ source, bin_pkgs[binary] ]
629 key = '_'.join(key_list)
630 duplicate_bins.setdefault(key, [])
631 duplicate_bins[key].append(binary)
632 bin_pkgs[binary] = source
633 source_binaries[source] = binaries
634 source_versions[source] = source_version
637 os.unlink(temp_filename)
639 # Checks based on the Packages files
640 check_components = components[:]
641 if suite_name != "experimental":
642 check_components.append('main/debian-installer');
644 for component in check_components:
645 architectures = [ a.arch_string for a in get_suite_architectures(suite_name,
646 skipsrc=True, skipall=True,
648 for architecture in architectures:
649 if component == 'main/debian-installer' and re.match("kfreebsd", architecture):
651 filename = "%s/dists/%s/%s/binary-%s/Packages.gz" % (cnf["Dir::Root"], suite_name, component, architecture)
652 # apt_pkg.ParseTagFile needs a real file handle
653 (fd, temp_filename) = utils.temp_filename()
654 (result, output) = commands.getstatusoutput("gunzip -c %s > %s" % (filename, temp_filename))
656 sys.stderr.write("Gunzip invocation failed!\n%s\n" % (output))
660 nfu_packages.setdefault(architecture,[])
661 nfu_entries = parse_nfu(architecture)
663 packages = utils.open_file(temp_filename)
664 Packages = apt_pkg.ParseTagFile(packages)
665 while Packages.Step():
666 package = Packages.Section.Find('Package')
667 source = Packages.Section.Find('Source', "")
668 version = Packages.Section.Find('Version')
671 if bin2source.has_key(package) and \
672 apt_pkg.VersionCompare(version, bin2source[package]["version"]) > 0:
673 bin2source[package]["version"] = version
674 bin2source[package]["source"] = source
676 bin2source[package] = {}
677 bin2source[package]["version"] = version
678 bin2source[package]["source"] = source
679 if source.find("(") != -1:
680 m = re_extract_src_version.match(source)
683 if not bin_pkgs.has_key(package):
684 nbs.setdefault(source,{})
685 nbs[source].setdefault(package, {})
686 nbs[source][package][version] = ""
688 previous_source = bin_pkgs[package]
689 if previous_source != source:
690 key_list = [ source, previous_source ]
692 key = '_'.join(key_list)
693 duplicate_bins.setdefault(key, [])
694 if package not in duplicate_bins[key]:
695 duplicate_bins[key].append(package)
697 if package in nfu_entries and \
698 version != source_versions[source]: # only suggest to remove out-of-date packages
699 nfu_packages[architecture].append((package,version,source_versions[source]))
702 os.unlink(temp_filename)
704 # Distinguish dubious (version numbers match) and 'real' NBS (they don't)
706 for source in nbs.keys():
707 for package in nbs[source].keys():
708 versions = nbs[source][package].keys()
709 versions.sort(apt_pkg.VersionCompare)
710 latest_version = versions.pop()
711 source_version = source_versions.get(source,"0")
712 if apt_pkg.VersionCompare(latest_version, source_version) == 0:
713 add_nbs(dubious_nbs, source, latest_version, package, suite_id, session)
716 do_newer_version('unstable', 'experimental', 'NVIU', session)
719 do_newer_version('testing', 'testing-proposed-updates', 'NVIT', session)
723 if Options["Mode"] == "full":
731 print "Unbuilt binary packages"
732 print "-----------------------"
734 keys = bin_not_built.keys()
737 binaries = bin_not_built[source].keys()
739 print " o %s: %s" % (source, ", ".join(binaries))
743 print "Built from multiple source packages"
744 print "-----------------------------------"
746 keys = duplicate_bins.keys()
749 (source_a, source_b) = key.split("_")
750 print " o %s & %s => %s" % (source_a, source_b, ", ".join(duplicate_bins[key]))
753 if "anais" in checks:
754 print "Architecture Not Allowed In Source"
755 print "----------------------------------"
759 if "dubious nbs" in checks:
760 do_dubious_nbs(dubious_nbs)
763 ################################################################################
765 if __name__ == '__main__':