From 1a2f4f83246eb79711bb8f31d20712f120bac249 Mon Sep 17 00:00:00 2001 From: Ansgar Burchardt Date: Tue, 11 Aug 2015 21:39:27 +0200 Subject: [PATCH] Initial work on import-repository The new subcommand allows importing packages from an existing repository. It is currently intended to be used on Debian Ports to import source and arch:all packages from the main archive. There are still several places that need work. --- dak/import_repository.py | 182 +++++++++++++++++++++++++++++ daklib/import_repository.py | 225 ++++++++++++++++++++++++++++++++++++ 2 files changed, 407 insertions(+) create mode 100644 dak/import_repository.py create mode 100644 daklib/import_repository.py diff --git a/dak/import_repository.py b/dak/import_repository.py new file mode 100644 index 00000000..5522be0f --- /dev/null +++ b/dak/import_repository.py @@ -0,0 +1,182 @@ +#! /usr/bin/env python +# +# Copyright (C) 2015, Ansgar Burchardt +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from __future__ import print_function + +import daklib.archive +import daklib.config +import daklib.dbconn +import daklib.import_repository +import daklib.utils + +import apt_pkg +import sys + +from collections import defaultdict + +def usage(status=0): + print(""" +dak import-repository + --keyring=/usr/share/keyring/debian-archive-keyring.gpg + [--key=${fingerprint}] + [--architectures=a,b,c (default: architectures in origin suite)] + [--components=main,contrib (default: components in origin suite)] + [--target-suite=${suite} (default: origin suite name)] + [--add-overrides] + http://httpredir.debian.org/debian unstable + +Things to think about: + - Import Built-Using sources + - all / only referenced + - Remove old packages: + - by-source: remove source X_v, if no X exists upstream + - by-version: remove source X_v, if no X_v exists upstream + (X denotes package name, v version, X_v package at a specific version) + - Import all or only newest? + - Expire binary packages? +""") + sys.exit(status) + +def entry_is_newer(entry, packages): + version = entry['Version'] + for p in packages[entry['Package']]: + if apt_pkg.version_compare(version, p.version) <= 0: + return False + return True + +def entry_in_packages(entry, packages): + return entry['Package'] in packages + +def get_packages_in_suite(suite): + sources = defaultdict(list) + for s in suite.sources: + sources[s.source].append(s) + + packages = defaultdict(list) + for b in suite.binaries: + packages[b.package].append(b) + + return sources, packages + +def main(argv=None): + if argv is None: + argv = sys.argv + + arguments = [ + ('h', 'help', 'Import-Repository::Help'), + ('k', 'keyring', 'Import-Repository::Keyring', 'HasArg'), + ('K', 'key', 'Import-Repository::Key', 'HasArg'), + ('a', 'architectures', 'Import-Repository::Architectures', 'HasArg'), + ('c', 'components', 'Import-Repository::Components', 'HasArg'), + ('t', 'target-suite', 'Import-Repository::Target-Suite', 'HasArg'), + ('A', 'add-overrides', 'Import-Repository::AddOverrides'), + ] + + cnf = daklib.config.Config(); + argv = apt_pkg.parse_commandline(cnf.Cnf, arguments, argv) + options = cnf.subtree('Import-Repository') + + if 'Help' in options or len(argv) < 2: + usage(0) + + keyring = options.find('Keyring') or None + if keyring is None: + print("Error: No keyring specified") + print() + + if 'Key' in options: + raise Exception('Not implemented.') + + if 'AddOverrides' in options: + raise Exception('Not implemented.') + + base, suite = argv[0:2] + + target_suite_name = options.find('Target-Suite') or suite + + print("Importing packages from {0}/dists/{1} to {2}".format(base, suite, target_suite_name)) + with daklib.archive.ArchiveTransaction() as transaction: + target_suite = daklib.dbconn.get_suite(target_suite_name, transaction.session) + if target_suite is None: + daklib.utils.fubar("Target suite '{0}' is unknown.".format(target_suite_name)) + + release = daklib.import_repository.obtain_release(base, suite, keyring) + target_sources, target_binaries = get_packages_in_suite(target_suite) + + if 'Architectures' in options: + architectures = options['Architectures'].split(',') + else: + architectures = ['all'] + release.architectures() + + if 'Components' in options: + components = options['Components'].split(',') + else: + components = release.components() + + # TODO: Clean this up... + + extra_sources = dict() + extra_sources_comp = defaultdict(set) + for c in components: + component = daklib.dbconn.get_component(c, transaction.session) + print("Processing {0}/source...".format(c)) + sources = release.sources(c) + for entry in sources: + if entry.get('Extra-Source-Only', 'no') == 'yes': + # Remember package, we might need to import it later. + key = (entry['Package'], entry['Version']) + extra_sources[key] = entry + extra_sources_comp[key].add(c) + continue + if not entry_in_packages(entry, target_sources) or entry_is_newer(entry, target_sources): + print("Importing {0}={1}".format(entry['Package'], entry['Version'])) + daklib.import_repository.import_source_to_suite(base, entry, transaction, target_suite, component) + #transaction.commit() + + for c in components: + component = daklib.dbconn.get_component(c, transaction.session) + for architecture in architectures: + print("Processing {0}/{1}...".format(c, architecture)) + packages = release.packages(c, architecture) + for entry in packages: + if not entry_in_packages(entry, target_binaries) or entry_is_newer(entry, target_binaries): + print("Importing {0}={1} ({2})".format(entry['Package'], entry['Version'], architecture)) + # Import Built-Using sources: + for bu_source, bu_version in daklib.utils.parse_built_using(entry): + if not daklib.import_repository.source_in_archive(bu_source, bu_version, target_suite.archive): + print("Importing extra source {0}={1}".format(bu_source, bu_version)) + key = (bu_source, bu_version) + extra_entry = extra_sources.get(key) + if extra_entry is None: + raise Exception("Extra source {0}={1} referenced by {2}={3} ({4}) not found in source suite.".format(bu_source, bu_version, entry['Package'], entry['Version'], architecture)) + extra_components = extra_sources_comp[key] + if c in components: + extra_component = component + else: + # TODO: Take preferred components from those listed... + raise Exception("Not implemented.") + # e.g. a contrib binary package Built-Using a main source + daklib.import_repository.import_source_to_suite(base, extra_entry, transaction, target_suite, extra_component) + # Import binary: + daklib.import_repository.import_package_to_suite(base, entry, transaction, target_suite, component) + #transaction.commit() + + transaction.rollback() + +if __name__ == '__main__': + main() diff --git a/daklib/import_repository.py b/daklib/import_repository.py new file mode 100644 index 00000000..78801978 --- /dev/null +++ b/daklib/import_repository.py @@ -0,0 +1,225 @@ +# Copyright (C) 2015, Ansgar Burchardt +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import daklib.compress +import daklib.config +import daklib.dbconn +import daklib.gpg +import daklib.upload +import daklib.regexes + +import apt_pkg +import os +import shutil +import tempfile +import urllib2 + +from daklib.dbconn import DBSource, PoolFile +from sqlalchemy.orm import object_session + +# Hmm, maybe use APT directly for all of this? + +_release_hashes_fields = ('MD5Sum', 'SHA1', 'SHA256') + +class Release(object): + def __init__(self, base, suite_name, data): + self._base = base + self._suite_name = suite_name + self._dict = apt_pkg.TagSection(data) + self._hashes = daklib.upload.parse_file_list(self._dict, False, daklib.regexes.re_file_safe_slash, _release_hashes_fields) + def architectures(self): + return self._dict['Architectures'].split() + def components(self): + return self._dict['Components'].split() + def packages(self, component, architecture): + fn = '{0}/binary-{1}/Packages'.format(component, architecture) + tmp = obtain_release_file(self, fn) + return apt_pkg.TagFile(tmp.fh()) + def sources(self, component): + fn = '{0}/source/Sources'.format(component) + tmp = obtain_release_file(self, fn) + return apt_pkg.TagFile(tmp.fh()) + def suite(self): + return self._dict['Suite'] + def codename(self): + return self._dict['Codename'] + # TODO: Handle Date/Valid-Until to make sure we import + # a newer version than before + +class File(object): + def __init__(self): + config = daklib.config.Config() + self._tmp = tempfile.NamedTemporaryFile(dir=config['Dir::TempPath']) + def fh(self): + self._tmp.seek(0) + return self._tmp + def hashes(self): + return apt_pkg.Hashes(self.fh()) + +def obtain_file(base, path): + """Obtain a file 'path' located below 'base' + + Returns: daklib.import_repository.File + + Note: return type can still change + """ + fn = '{0}/{1}'.format(base, path) + tmp = File() + if fn.startswith('http://'): + fh = urllib2.urlopen(fn, timeout=300) + shutil.copyfileobj(fh, tmp._tmp) + fh.close() + else: + with open(fn, 'r') as fh: + shutil.copyfileobj(fh, tmp._tmp) + return tmp + +def obtain_release(base, suite_name, keyring, fingerprint=None): + """Obtain release information + + Returns: daklib.import_repository.Release + """ + tmp = obtain_file(base, 'dists/{0}/InRelease'.format(suite_name)) + data = tmp.fh().read() + f = daklib.gpg.SignedFile(data, [keyring]) + r = Release(base, suite_name, f.contents) + if r.suite() != suite_name and r.codename() != suite_name: + raise Exception("Suite {0} doesn't match suite or codename from Release file.".format(suite_name)) + return r + +_compressions = ('.xz', '.gz', '.bz2') + +def obtain_release_file(release, filename): + """Obtain file referenced from Release + + A compressed version is automatically selected and decompressed if it exists. + + Returns: daklib.import_repository.File + """ + if filename not in release._hashes: + raise IOError("File {0} not referenced in Release".format(filename)) + + compressed = False + for ext in _compressions: + compressed_file = filename + ext + if compressed_file in release._hashes: + compressed = True + filename = compressed_file + break + + # Obtain file and check hashes + tmp = obtain_file(release._base, 'dists/{0}/{1}'.format(release._suite_name, filename)) + hashedfile = release._hashes[filename] + hashedfile.check_fh(tmp.fh()) + + if compressed: + tmp2 = File() + daklib.compress.decompress(tmp.fh(), tmp2.fh(), filename) + tmp = tmp2 + + return tmp + +def import_source_to_archive(base, entry, transaction, archive, component): + """Import source package described by 'entry' into the given 'archive' and 'component' + + 'entry' needs to be a dict-like object with at least the following + keys as used in a Sources index: Directory, Files, Checksums-Sha1, + Checksums-Sha256 + + Return: daklib.dbconn.DBSource + + """ + # Obtain and verify files + if not daklib.regexes.re_file_safe_slash.match(entry['Directory']): + raise Exception("Unsafe path in Directory field") + hashed_files = daklib.upload.parse_file_list(entry, False) + files = [] + for f in hashed_files.values(): + path = os.path.join(entry['Directory'], f.filename) + tmp = obtain_file(base, path) + f.check_fh(tmp.fh()) + files.append(tmp) + directory, f.input_filename = os.path.split(tmp.fh().name) + + # Inject files into archive + source = daklib.upload.Source(directory, hashed_files.values(), [], require_signature=False) + # TODO: ugly hack! + for f in hashed_files.keys(): + if f.endswith('.dsc'): + continue + source.files[f].input_filename = hashed_files[f].input_filename + + # TODO: allow changed_by to be NULL + changed_by = source.dsc['Maintainer'] + db_changed_by = daklib.dbconn.get_or_set_maintainer(changed_by, transaction.session) + db_source = transaction.install_source_to_archive(directory, source, archive, component, db_changed_by) + + return db_source + +def import_package_to_suite(base, entry, transaction, suite, component): + """Import binary package described by 'entry' into the given 'suite' and 'component' + + 'entry' needs to be a dict-like object with at least the following + keys as used in a Packages index: Filename, Size, MD5sum, SHA1, + SHA256 + + Returns: daklib.dbconn.DBBinary + """ + # Obtain and verify file + filename = entry['Filename'] + tmp = obtain_file(base, filename) + directory, fn = os.path.split(tmp.fh().name) + hashedfile = daklib.upload.HashedFile(os.path.basename(filename), long(entry['Size']), entry['MD5sum'], entry['SHA1'], entry['SHA256'], input_filename=fn) + hashedfile.check_fh(tmp.fh()) + + # Inject file into archive + binary = daklib.upload.Binary(directory, hashedfile) + db_binary = transaction.install_binary(directory, binary, suite, component) + transaction.flush() + + return db_binary + +def import_source_to_suite(base, entry, transaction, suite, component): + """Import source package described by 'entry' into the given 'suite' and 'component' + + 'entry' needs to be a dict-like object with at least the following + keys as used in a Sources index: Directory, Files, Checksums-Sha1, + Checksums-Sha256 + + Returns: daklib.dbconn.DBBinary + """ + source = import_source_to_archive(base, entry, transaction, suite.archive, component) + source.suites.append(suite) + transaction.flush() + +def source_in_archive(source, version, archive, component=None): + """Check that source package 'source' with version 'version' exists in 'archive', + with an optional check for the given component 'component'. + + @type source: str + @type version: str + @type archive: daklib.dbconn.Archive + @type component: daklib.dbconn.Component or None + @rtype: boolean + + Note: This should probably be moved somewhere else + """ + session = object_session(archive) + query = session.query(DBSource).filter_by(source=source, version=version) \ + .join(DBSource.poolfile).join(PoolFile.archives).filter_by(archive=archive) + if component is not None: + query = query.filter_by(component=component) + return session.query(query.exists()).scalar() -- 2.39.5