]> git.decadent.org.uk Git - dak.git/commitdiff
Initial work on import-repository
authorAnsgar Burchardt <ansgar@debian.org>
Tue, 11 Aug 2015 19:39:27 +0000 (21:39 +0200)
committerAnsgar Burchardt <ansgar@debian.org>
Fri, 14 Aug 2015 11:10:09 +0000 (13:10 +0200)
The new subcommand allows importing packages from an existing
repository. It is currently intended to be used on Debian Ports to
import source and arch:all packages from the main archive.

There are still several places that need work.

dak/import_repository.py [new file with mode: 0644]
daklib/import_repository.py [new file with mode: 0644]

diff --git a/dak/import_repository.py b/dak/import_repository.py
new file mode 100644 (file)
index 0000000..5522be0
--- /dev/null
@@ -0,0 +1,182 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2015, Ansgar Burchardt <ansgar@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from __future__ import print_function
+
+import daklib.archive
+import daklib.config
+import daklib.dbconn
+import daklib.import_repository
+import daklib.utils
+
+import apt_pkg
+import sys
+
+from collections import defaultdict
+
+def usage(status=0):
+    print("""
+dak import-repository
+  --keyring=/usr/share/keyring/debian-archive-keyring.gpg
+  [--key=${fingerprint}]
+  [--architectures=a,b,c (default: architectures in origin suite)]
+  [--components=main,contrib (default: components in origin suite)]
+  [--target-suite=${suite} (default: origin suite name)]
+  [--add-overrides]
+  http://httpredir.debian.org/debian unstable
+
+Things to think about:
+ - Import Built-Using sources
+   - all / only referenced
+ - Remove old packages:
+   - by-source: remove source X_v, if no X exists upstream
+   - by-version: remove source X_v, if no X_v exists upstream
+   (X denotes package name, v version, X_v package at a specific version)
+ - Import all or only newest?
+ - Expire binary packages?
+""")
+    sys.exit(status)
+
+def entry_is_newer(entry, packages):
+    version = entry['Version']
+    for p in packages[entry['Package']]:
+        if apt_pkg.version_compare(version, p.version) <= 0:
+            return False
+    return True
+
+def entry_in_packages(entry, packages):
+    return entry['Package'] in packages
+
+def get_packages_in_suite(suite):
+    sources = defaultdict(list)
+    for s in suite.sources:
+        sources[s.source].append(s)
+
+    packages = defaultdict(list)
+    for b in suite.binaries:
+        packages[b.package].append(b)
+
+    return sources, packages
+
+def main(argv=None):
+    if argv is None:
+        argv = sys.argv
+
+    arguments = [
+        ('h', 'help', 'Import-Repository::Help'),
+        ('k', 'keyring', 'Import-Repository::Keyring', 'HasArg'),
+        ('K', 'key', 'Import-Repository::Key', 'HasArg'),
+        ('a', 'architectures', 'Import-Repository::Architectures', 'HasArg'),
+        ('c', 'components', 'Import-Repository::Components', 'HasArg'),
+        ('t', 'target-suite', 'Import-Repository::Target-Suite', 'HasArg'),
+        ('A', 'add-overrides', 'Import-Repository::AddOverrides'),
+        ]
+
+    cnf = daklib.config.Config();
+    argv = apt_pkg.parse_commandline(cnf.Cnf, arguments, argv)
+    options = cnf.subtree('Import-Repository')
+
+    if 'Help' in options or len(argv) < 2:
+        usage(0)
+
+    keyring = options.find('Keyring') or None
+    if keyring is None:
+        print("Error: No keyring specified")
+        print()
+
+    if 'Key' in options:
+        raise Exception('Not implemented.')
+
+    if 'AddOverrides' in options:
+        raise Exception('Not implemented.')
+
+    base, suite = argv[0:2]
+
+    target_suite_name = options.find('Target-Suite') or suite
+
+    print("Importing packages from {0}/dists/{1} to {2}".format(base, suite, target_suite_name))
+    with daklib.archive.ArchiveTransaction() as transaction:
+        target_suite = daklib.dbconn.get_suite(target_suite_name, transaction.session)
+        if target_suite is None:
+            daklib.utils.fubar("Target suite '{0}' is unknown.".format(target_suite_name))
+
+        release = daklib.import_repository.obtain_release(base, suite, keyring)
+        target_sources, target_binaries = get_packages_in_suite(target_suite)
+
+        if 'Architectures' in options:
+            architectures = options['Architectures'].split(',')
+        else:
+            architectures = ['all'] + release.architectures()
+
+        if 'Components' in options:
+            components = options['Components'].split(',')
+        else:
+            components = release.components()
+
+        # TODO: Clean this up...
+
+        extra_sources = dict()
+        extra_sources_comp = defaultdict(set)
+        for c in components:
+            component = daklib.dbconn.get_component(c, transaction.session)
+            print("Processing {0}/source...".format(c))
+            sources = release.sources(c)
+            for entry in sources:
+                if entry.get('Extra-Source-Only', 'no') == 'yes':
+                    # Remember package, we might need to import it later.
+                    key = (entry['Package'], entry['Version'])
+                    extra_sources[key] = entry
+                    extra_sources_comp[key].add(c)
+                    continue
+                if not entry_in_packages(entry, target_sources) or entry_is_newer(entry, target_sources):
+                    print("Importing {0}={1}".format(entry['Package'], entry['Version']))
+                    daklib.import_repository.import_source_to_suite(base, entry, transaction, target_suite, component)
+                    #transaction.commit()
+
+        for c in components:
+            component = daklib.dbconn.get_component(c, transaction.session)
+            for architecture in architectures:
+                print("Processing {0}/{1}...".format(c, architecture))
+                packages = release.packages(c, architecture)
+                for entry in packages:
+                    if not entry_in_packages(entry, target_binaries) or entry_is_newer(entry, target_binaries):
+                        print("Importing {0}={1} ({2})".format(entry['Package'], entry['Version'], architecture))
+                        # Import Built-Using sources:
+                        for bu_source, bu_version in daklib.utils.parse_built_using(entry):
+                            if not daklib.import_repository.source_in_archive(bu_source, bu_version, target_suite.archive):
+                                print("Importing extra source {0}={1}".format(bu_source, bu_version))
+                                key = (bu_source, bu_version)
+                                extra_entry = extra_sources.get(key)
+                                if extra_entry is None:
+                                    raise Exception("Extra source {0}={1} referenced by {2}={3} ({4}) not found in source suite.".format(bu_source, bu_version, entry['Package'], entry['Version'], architecture))
+                                extra_components = extra_sources_comp[key]
+                                if c in components:
+                                    extra_component = component
+                                else:
+                                    # TODO: Take preferred components from those listed...
+                                    raise Exception("Not implemented.")
+                                # e.g. a contrib binary package Built-Using a main source
+                                daklib.import_repository.import_source_to_suite(base, extra_entry, transaction, target_suite, extra_component)
+                        # Import binary:
+                        daklib.import_repository.import_package_to_suite(base, entry, transaction, target_suite, component)
+                        #transaction.commit()
+
+        transaction.rollback()
+
+if __name__ == '__main__':
+    main()
diff --git a/daklib/import_repository.py b/daklib/import_repository.py
new file mode 100644 (file)
index 0000000..7880197
--- /dev/null
@@ -0,0 +1,225 @@
+# Copyright (C) 2015, Ansgar Burchardt <ansgar@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import daklib.compress
+import daklib.config
+import daklib.dbconn
+import daklib.gpg
+import daklib.upload
+import daklib.regexes
+
+import apt_pkg
+import os
+import shutil
+import tempfile
+import urllib2
+
+from daklib.dbconn import DBSource, PoolFile
+from sqlalchemy.orm import object_session
+
+# Hmm, maybe use APT directly for all of this?
+
+_release_hashes_fields = ('MD5Sum', 'SHA1', 'SHA256')
+
+class Release(object):
+    def __init__(self, base, suite_name, data):
+        self._base = base
+        self._suite_name = suite_name
+        self._dict = apt_pkg.TagSection(data)
+        self._hashes = daklib.upload.parse_file_list(self._dict, False, daklib.regexes.re_file_safe_slash, _release_hashes_fields)
+    def architectures(self):
+        return self._dict['Architectures'].split()
+    def components(self):
+        return self._dict['Components'].split()
+    def packages(self, component, architecture):
+        fn = '{0}/binary-{1}/Packages'.format(component, architecture)
+        tmp = obtain_release_file(self, fn)
+        return apt_pkg.TagFile(tmp.fh())
+    def sources(self, component):
+        fn = '{0}/source/Sources'.format(component)
+        tmp = obtain_release_file(self, fn)
+        return apt_pkg.TagFile(tmp.fh())
+    def suite(self):
+        return self._dict['Suite']
+    def codename(self):
+        return self._dict['Codename']
+    # TODO: Handle Date/Valid-Until to make sure we import
+    # a newer version than before
+
+class File(object):
+    def __init__(self):
+        config = daklib.config.Config()
+        self._tmp = tempfile.NamedTemporaryFile(dir=config['Dir::TempPath'])
+    def fh(self):
+        self._tmp.seek(0)
+        return self._tmp
+    def hashes(self):
+        return apt_pkg.Hashes(self.fh())
+
+def obtain_file(base, path):
+    """Obtain a file 'path' located below 'base'
+
+    Returns: daklib.import_repository.File
+
+    Note: return type can still change
+    """
+    fn = '{0}/{1}'.format(base, path)
+    tmp = File()
+    if fn.startswith('http://'):
+        fh = urllib2.urlopen(fn, timeout=300)
+        shutil.copyfileobj(fh, tmp._tmp)
+        fh.close()
+    else:
+        with open(fn, 'r') as fh:
+            shutil.copyfileobj(fh, tmp._tmp)
+    return tmp
+
+def obtain_release(base, suite_name, keyring, fingerprint=None):
+    """Obtain release information
+
+    Returns: daklib.import_repository.Release
+    """
+    tmp = obtain_file(base, 'dists/{0}/InRelease'.format(suite_name))
+    data = tmp.fh().read()
+    f = daklib.gpg.SignedFile(data, [keyring])
+    r = Release(base, suite_name, f.contents)
+    if r.suite() != suite_name and r.codename() != suite_name:
+        raise Exception("Suite {0} doesn't match suite or codename from Release file.".format(suite_name))
+    return r
+
+_compressions = ('.xz', '.gz', '.bz2')
+
+def obtain_release_file(release, filename):
+    """Obtain file referenced from Release
+
+    A compressed version is automatically selected and decompressed if it exists.
+
+    Returns: daklib.import_repository.File
+    """
+    if filename not in release._hashes:
+        raise IOError("File {0} not referenced in Release".format(filename))
+
+    compressed = False
+    for ext in _compressions:
+        compressed_file = filename + ext
+        if compressed_file in release._hashes:
+            compressed = True
+            filename = compressed_file
+            break
+
+    # Obtain file and check hashes
+    tmp = obtain_file(release._base, 'dists/{0}/{1}'.format(release._suite_name, filename))
+    hashedfile = release._hashes[filename]
+    hashedfile.check_fh(tmp.fh())
+
+    if compressed:
+        tmp2 = File()
+        daklib.compress.decompress(tmp.fh(), tmp2.fh(), filename)
+        tmp = tmp2
+
+    return tmp
+
+def import_source_to_archive(base, entry, transaction, archive, component):
+    """Import source package described by 'entry' into the given 'archive' and 'component'
+
+    'entry' needs to be a dict-like object with at least the following
+    keys as used in a Sources index: Directory, Files, Checksums-Sha1,
+    Checksums-Sha256
+
+    Return: daklib.dbconn.DBSource
+
+    """
+    # Obtain and verify files
+    if not daklib.regexes.re_file_safe_slash.match(entry['Directory']):
+        raise Exception("Unsafe path in Directory field")
+    hashed_files = daklib.upload.parse_file_list(entry, False)
+    files = []
+    for f in hashed_files.values():
+        path = os.path.join(entry['Directory'], f.filename)
+        tmp = obtain_file(base, path)
+        f.check_fh(tmp.fh())
+        files.append(tmp)
+        directory, f.input_filename = os.path.split(tmp.fh().name)
+
+    # Inject files into archive
+    source = daklib.upload.Source(directory, hashed_files.values(), [], require_signature=False)
+    # TODO: ugly hack!
+    for f in hashed_files.keys():
+        if f.endswith('.dsc'):
+            continue
+        source.files[f].input_filename = hashed_files[f].input_filename
+
+    # TODO: allow changed_by to be NULL
+    changed_by = source.dsc['Maintainer']
+    db_changed_by = daklib.dbconn.get_or_set_maintainer(changed_by, transaction.session)
+    db_source = transaction.install_source_to_archive(directory, source, archive, component, db_changed_by)
+
+    return db_source
+
+def import_package_to_suite(base, entry, transaction, suite, component):
+    """Import binary package described by 'entry' into the given 'suite' and 'component'
+
+    'entry' needs to be a dict-like object with at least the following
+    keys as used in a Packages index: Filename, Size, MD5sum, SHA1,
+    SHA256
+
+    Returns: daklib.dbconn.DBBinary
+    """
+    # Obtain and verify file
+    filename = entry['Filename']
+    tmp = obtain_file(base, filename)
+    directory, fn = os.path.split(tmp.fh().name)
+    hashedfile = daklib.upload.HashedFile(os.path.basename(filename), long(entry['Size']), entry['MD5sum'], entry['SHA1'], entry['SHA256'], input_filename=fn)
+    hashedfile.check_fh(tmp.fh())
+
+    # Inject file into archive
+    binary = daklib.upload.Binary(directory, hashedfile)
+    db_binary = transaction.install_binary(directory, binary, suite, component)
+    transaction.flush()
+
+    return db_binary
+
+def import_source_to_suite(base, entry, transaction, suite, component):
+    """Import source package described by 'entry' into the given 'suite' and 'component'
+
+    'entry' needs to be a dict-like object with at least the following
+    keys as used in a Sources index: Directory, Files, Checksums-Sha1,
+    Checksums-Sha256
+
+    Returns: daklib.dbconn.DBBinary
+    """
+    source = import_source_to_archive(base, entry, transaction, suite.archive, component)
+    source.suites.append(suite)
+    transaction.flush()
+
+def source_in_archive(source, version, archive, component=None):
+    """Check that source package 'source' with version 'version' exists in 'archive',
+    with an optional check for the given component 'component'.
+
+    @type source: str
+    @type version: str
+    @type archive: daklib.dbconn.Archive
+    @type component: daklib.dbconn.Component or None
+    @rtype: boolean
+
+    Note: This should probably be moved somewhere else
+    """
+    session = object_session(archive)
+    query = session.query(DBSource).filter_by(source=source, version=version) \
+        .join(DBSource.poolfile).join(PoolFile.archives).filter_by(archive=archive)
+    if component is not None:
+        query = query.filter_by(component=component)
+    return session.query(query.exists()).scalar()