X-Git-Url: https://git.decadent.org.uk/gitweb/?p=dak.git;a=blobdiff_plain;f=dak%2Fcontents.py;h=407b3c06475dfae80229d676175bbec415d7fd35;hp=b2e9816f874a019af77a4fe1a7ec02252da6fae1;hb=17c5cab4eb8d5181ec7a81267a4e2e6b43c0fc65;hpb=0e1df5636799153ae78c13d299b772b0654106c8 diff --git a/dak/contents.py b/dak/contents.py index b2e9816f..407b3c06 100755 --- a/dak/contents.py +++ b/dak/contents.py @@ -5,6 +5,7 @@ Create all the contents files @contact: Debian FTPMaster @copyright: 2008, 2009 Michael Casadevall @copyright: 2009 Mike O'Connor +@copyright: 2011 Torsten Werner @license: GNU General Public License version 2 or later """ @@ -35,360 +36,128 @@ Create all the contents files ################################################################################ import sys -import os -import logging -import math -import gzip import apt_pkg -from daklib import utils -from daklib.binary import Binary + from daklib.config import Config -from daklib.dbconn import DBConn +from daklib.dbconn import * +from daklib.contents import BinaryContentsScanner, ContentsWriter, \ + SourceContentsScanner +from daklib import daklog +from daklib import utils + ################################################################################ def usage (exit_code=0): - print """Usage: dak contents [options] command [arguments] + print """Usage: dak contents [options] subcommand -COMMANDS +SUBCOMMANDS generate generate Contents-$arch.gz files - bootstrap - scan the debs in the existing pool and load contents in the the database + scan-source + scan the source packages in the existing pool and load contents into + the src_contents table - cruft - remove files/paths which are no longer referenced by a binary + scan-binary + scan the (u)debs in the existing pool and load contents into the + bin_contents table OPTIONS -h, --help show this help and exit - -v, --verbose - show verbose information messages - - -q, --quiet - supress all output but errors +OPTIONS for generate + -a, --archive=ARCHIVE + only operate on suites in the specified archive -s, --suite={stable,testing,unstable,...} - only operate on a single suite + only operate on specified suite names + + -c, --component={main,contrib,non-free} + only operate on specified components - -a, --arch={i386,amd64} - only operate on a single architecture + -f, --force + write Contents files for suites marked as untouchable, too + +OPTIONS for scan-source and scan-binary + -l, --limit=NUMBER + maximum number of packages to scan """ sys.exit(exit_code) ################################################################################ -# where in dak.conf all of our configuration will be stowed - -options_prefix = "Contents" -options_prefix = "%s::Options" % options_prefix - -log = logging.getLogger() +def write_all(cnf, archive_names = [], suite_names = [], component_names = [], force = None): + Logger = daklog.Logger('contents generate') + ContentsWriter.write_all(Logger, archive_names, suite_names, component_names, force) + Logger.close() ################################################################################ -# get all the arches delivered for a given suite -# this should probably exist somehere common -arches_q = """PREPARE arches_q(int) as - SELECT s.architecture, a.arch_string - FROM suite_architectures s - JOIN architecture a ON (s.architecture=a.id) - WHERE suite = $1""" - -# find me the .deb for a given binary id -debs_q = """PREPARE debs_q(int, int) as - SELECT b.id, f.filename FROM bin_assoc_by_arch baa - JOIN binaries b ON baa.bin=b.id - JOIN files f ON b.file=f.id - WHERE suite = $1 - AND arch = $2""" - -# ask if we already have contents associated with this binary -olddeb_q = """PREPARE olddeb_q(int) as - SELECT 1 FROM content_associations - WHERE binary_pkg = $1 - LIMIT 1""" - -# find me all of the contents for a given .deb -contents_q = """PREPARE contents_q(int,int,int,int) as - SELECT (p.path||'/'||n.file) AS fn, - comma_separated_list(s.section||'/'||b.package) - from content_associations c join content_file_paths p ON (c.filepath=p.id) - JOIN content_file_names n ON (c.filename=n.id) - JOIN binaries b ON (b.id=c.binary_pkg) - JOIN override o ON (o.package=b.package) - JOIN section s ON (s.id=o.section) - WHERE o.suite = $1 AND o.type = $2 - AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='deb') - GROUP BY fn - ORDER BY fn;""" - -# find me all of the contents for a given .udeb -udeb_contents_q = """PREPARE udeb_contents_q(int,int,int,int,int) as - SELECT (p.path||'/'||n.file) AS fn, - comma_separated_list(s.section||'/'||b.package) - FROM content_file_paths p join content_associations c ON (c.filepath=p.id) - JOIN content_file_names n ON (c.filename=n.id) - JOIN binaries b ON (b.id=c.binary_pkg) - JOIN override o ON (o.package=b.package) - JOIN section s ON (s.id=o.section) - WHERE o.suite = $1 AND o.type = $2 - AND s.id = $3 - AND b.id in (SELECT ba.bin from bin_associations ba join binaries b on b.id=ba.bin where (b.architecture=$3 or b.architecture=$4)and ba.suite=$1 and b.type='udeb') - GROUP BY fn - ORDER BY fn;""" - - - -# clear out all of the temporarily stored content associations -# this should be run only after p-a has run. after a p-a -# run we should have either accepted or rejected every package -# so there should no longer be anything in the queue -remove_pending_contents_cruft_q = """DELETE FROM pending_content_associations""" - -# delete any filenames we are storing which have no binary associated with them -remove_filename_cruft_q = """DELETE FROM content_file_names - WHERE id IN (SELECT cfn.id FROM content_file_names cfn - LEFT JOIN content_associations ca - ON ca.filename=cfn.id - WHERE ca.id IS NULL)""" - -# delete any paths we are storing which have no binary associated with them -remove_filepath_cruft_q = """DELETE FROM content_file_paths - WHERE id IN (SELECT cfn.id FROM content_file_paths cfn - LEFT JOIN content_associations ca - ON ca.filepath=cfn.id - WHERE ca.id IS NULL)""" -class Contents(object): - """ - Class capable of generating Contents-$arch.gz files - - Usage GenerateContents().generateContents( ["main","contrib","non-free"] ) - """ - - def __init__(self): - self.header = None - - def reject(self, message): - log.error("E: %s" % message) - - def _getHeader(self): - """ - Internal method to return the header for Contents.gz files - - This is boilerplate which explains the contents of the file and how - it can be used. - """ - if self.header == None: - if Config().has_key("Contents::Header"): - try: - h = open(os.path.join( Config()["Dir::Templates"], - Config()["Contents::Header"] ), "r") - self.header = h.read() - h.close() - except: - log.error( "error opening header file: %d\n%s" % (Config()["Contents::Header"], - traceback.format_exc() )) - self.header = False - else: - self.header = False - - return self.header - - # goal column for section column - _goal_column = 54 - - def _write_content_file(self, cursor, filename): - """ - Internal method for writing all the results to a given file. - The cursor should have a result set generated from a query already. - """ - filepath = Config()["Contents::Root"] + filename - filedir = os.path.dirname(filepath) - if not os.path.isdir(filedir): - os.makedirs(filedir) - f = gzip.open(filepath, "w") - try: - header = self._getHeader() - - if header: - f.write(header) - - while True: - contents = cursor.fetchone() - if not contents: - return - - f.write("%s\t%s\n" % contents ) - - finally: - f.close() - - def cruft(self): - """ - remove files/paths from the DB which are no longer referenced - by binaries and clean the temporary table - """ - cursor = DBConn().cursor(); - cursor.execute( "BEGIN WORK" ) - cursor.execute( remove_pending_contents_cruft_q ) - cursor.execute( remove_filename_cruft_q ) - cursor.execute( remove_filepath_cruft_q ) - cursor.execute( "COMMIT" ) - - - def bootstrap(self): - """ - scan the existing debs in the pool to populate the contents database tables - """ - pooldir = Config()[ 'Dir::Pool' ] - - cursor = DBConn().cursor(); - DBConn().prepare("debs_q",debs_q) - DBConn().prepare("olddeb_q",olddeb_q) - DBConn().prepare("arches_q",arches_q) - - suites = self._suites() - for suite in [i.lower() for i in suites]: - suite_id = DBConn().get_suite_id(suite) - - arch_list = self._arches(cursor, suite_id) - arch_all_id = DBConn().get_architecture_id("all") - for arch_id in arch_list: - cursor.execute( "EXECUTE debs_q(%d, %d)" % ( suite_id, arch_id[0] ) ) - - count = 0 - while True: - deb = cursor.fetchone() - if not deb: - break - count += 1 - cursor1 = DBConn().cursor(); - cursor1.execute( "EXECUTE olddeb_q(%d)" % (deb[0] ) ) - old = cursor1.fetchone() - if old: - log.debug( "already imported: %s" % (deb[1]) ) - else: - log.debug( "scanning: %s" % (deb[1]) ) - debfile = os.path.join( pooldir, deb[1] ) - if os.path.exists( debfile ): - Binary(debfile, self.reject).scan_package(deb[0],True) - else: - log.error("missing .deb: %s" % deb[1]) - - def generate(self): - """ - Generate Contents-$arch.gz files for every available arch in each given suite. - """ - cursor = DBConn().cursor() - - DBConn().prepare("arches_q", arches_q) - DBConn().prepare("contents_q", contents_q) - DBConn().prepare("udeb_contents_q", udeb_contents_q) - - debtype_id=DBConn().get_override_type_id("deb") - udebtype_id=DBConn().get_override_type_id("udeb") - - suites = self._suites() - - # Get our suites, and the architectures - for suite in [i.lower() for i in suites]: - suite_id = DBConn().get_suite_id(suite) - arch_list = self._arches(cursor, suite_id) - - arch_all_id = DBConn().get_architecture_id("all") - - for arch_id in arch_list: - cursor.execute("EXECUTE contents_q(%d,%d,%d,%d)" % (suite_id, debtype_id, arch_all_id, arch_id[0] )) - self._write_content_file(cursor, "dists/%s/Contents-%s.gz" % (suite, arch_id[1])) - - # The MORE fun part. Ok, udebs need their own contents files, udeb, and udeb-nf (not-free) - # This is HORRIBLY debian specific :-/ - for section, fn_pattern in [("debian-installer","dists/%s/Contents-udeb-%s.gz"), - ("non-free/debian-installer", "dists/%s/Contents-udeb-nf-%s.gz")]: - - for arch_id in arch_list: - section_id = DBConn().get_section_id(section) # all udebs should be here) - if section_id != -1: - cursor.execute("EXECUTE udeb_contents_q(%d,%d,%d,%d,%d)" % (suite_id, udebtype_id, section_id, arch_id[0], arch_all_id)) - - self._write_content_file(cursor, fn_pattern % (suite, arch_id[1])) - +def binary_scan_all(cnf, limit): + Logger = daklog.Logger('contents scan-binary') + result = BinaryContentsScanner.scan_all(limit) + processed = '%(processed)d packages processed' % result + remaining = '%(remaining)d packages remaining' % result + Logger.log([processed, remaining]) + Logger.close() ################################################################################ - def _suites(self): - """ - return a list of suites to operate on - """ - if Config().has_key( "%s::%s" %(options_prefix,"Suite")): - suites = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Suite")]) - else: - suites = Config().SubTree("Suite").List() - - return suites - - def _arches(self, cursor, suite): - """ - return a list of archs to operate on - """ - arch_list = [ ] - if Config().has_key( "%s::%s" %(options_prefix,"Arch")): - archs = utils.split_args(Config()[ "%s::%s" %(options_prefix,"Arch")]) - for arch_name in archs: - arch_list.append((DBConn().get_architecture_id(arch_name), arch_name)) - else: - cursor.execute("EXECUTE arches_q(%d)" % (suite)) - while True: - r = cursor.fetchone() - if not r: - break - - if r[1] != "source" and r[1] != "all": - arch_list.append((r[0], r[1])) - - return arch_list +def source_scan_all(cnf, limit): + Logger = daklog.Logger('contents scan-source') + result = SourceContentsScanner.scan_all(limit) + processed = '%(processed)d packages processed' % result + remaining = '%(remaining)d packages remaining' % result + Logger.log([processed, remaining]) + Logger.close() ################################################################################ - def main(): cnf = Config() - - arguments = [('h',"help", "%s::%s" % (options_prefix,"Help")), - ('s',"suite", "%s::%s" % (options_prefix,"Suite"),"HasArg"), - ('q',"quiet", "%s::%s" % (options_prefix,"Quiet")), - ('v',"verbose", "%s::%s" % (options_prefix,"Verbose")), - ('a',"arch", "%s::%s" % (options_prefix,"Arch"),"HasArg"), + cnf['Contents::Options::Help'] = '' + cnf['Contents::Options::Suite'] = '' + cnf['Contents::Options::Component'] = '' + cnf['Contents::Options::Limit'] = '' + cnf['Contents::Options::Force'] = '' + arguments = [('h', "help", 'Contents::Options::Help'), + ('a', 'archive', 'Contents::Options::Archive', 'HasArg'), + ('s', "suite", 'Contents::Options::Suite', "HasArg"), + ('c', "component", 'Contents::Options::Component', "HasArg"), + ('l', "limit", 'Contents::Options::Limit', "HasArg"), + ('f', "force", 'Contents::Options::Force'), ] + args = apt_pkg.parse_commandline(cnf.Cnf, arguments, sys.argv) + options = cnf.subtree('Contents::Options') - commands = {'generate' : Contents.generate, - 'bootstrap' : Contents.bootstrap, - 'cruft' : Contents.cruft, - } + if (len(args) != 1) or options['Help']: + usage() - args = apt_pkg.ParseCommandLine(cnf.Cnf, arguments,sys.argv) + limit = None + if len(options['Limit']) > 0: + limit = int(options['Limit']) - if (len(args) < 1) or not commands.has_key(args[0]): - usage() + if args[0] == 'scan-source': + source_scan_all(cnf, limit) + return - if cnf.has_key("%s::%s" % (options_prefix,"Help")): - usage() + if args[0] == 'scan-binary': + binary_scan_all(cnf, limit) + return - level=logging.INFO - if cnf.has_key("%s::%s" % (options_prefix,"Quiet")): - level=logging.ERROR + archive_names = utils.split_args(options['Archive']) + suite_names = utils.split_args(options['Suite']) + component_names = utils.split_args(options['Component']) - elif cnf.has_key("%s::%s" % (options_prefix,"Verbose")): - level=logging.DEBUG + force = bool(options['Force']) + if args[0] == 'generate': + write_all(cnf, archive_names, suite_names, component_names, force) + return - logging.basicConfig( level=level, - format='%(asctime)s %(levelname)s %(message)s', - stream = sys.stderr ) + usage() - commands[args[0]](Contents()) if __name__ == '__main__': main()