From: Mark Hymers Date: Thu, 24 Mar 2011 18:26:33 +0000 (+0000) Subject: Merge remote branch 'mhy/g-r' into merge X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=b4022bd29f8d8b8b6923c943071f7e9849c9d244;hp=c70c74dc4e298a7dd2cef8d11d82e1a9d281546d;p=dak.git Merge remote branch 'mhy/g-r' into merge Signed-off-by: Mark Hymers --- diff --git a/config/debian/cron.unchecked b/config/debian/cron.unchecked index 10bd930c..9a77fc4d 100755 --- a/config/debian/cron.unchecked +++ b/config/debian/cron.unchecked @@ -106,5 +106,6 @@ if [ ! -z "$changes" ]; then do_dists fi -dak contents -l 10000 binary-scan +dak contents -l 10000 scan-binary +dak contents -l 1000 scan-source pg_timestamp postunchecked diff --git a/dak/dakdb/update51.py b/dak/dakdb/update51.py new file mode 100755 index 00000000..2aea858e --- /dev/null +++ b/dak/dakdb/update51.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Fix table for source contents. + +@contact: Debian FTP Master +@copyright: 2011 Torsten Werner +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import psycopg2 +from daklib.dak_exceptions import DBUpdateError +from socket import gethostname; + +################################################################################ +def do_update(self): + """ + Fix table for source contents. + """ + print __doc__ + try: + c = self.db.cursor() + + c.execute(""" + CREATE INDEX ON src_contents (source_id)""") + c.execute(""" + ALTER TABLE src_contents ADD COLUMN created TIMESTAMP WITH TIME ZONE + NOT NULL DEFAULT now()""") + c.execute(""" + ALTER TABLE src_contents ADD COLUMN modified TIMESTAMP WITH TIME ZONE + NOT NULL DEFAULT now()""") + c.execute(""" + CREATE TRIGGER modified_src_contents BEFORE UPDATE ON src_contents + FOR EACH ROW EXECUTE PROCEDURE tfunc_set_modified()""") + + c.execute("UPDATE config SET value = '51' WHERE name = 'db_revision'") + self.db.commit() + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + raise DBUpdateError, 'Unable to apply sick update 51, rollback issued. Error message : %s' % (str(msg)) diff --git a/dak/update_db.py b/dak/update_db.py index 86f5c2e5..97248202 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -46,7 +46,7 @@ from daklib.daklog import Logger ################################################################################ Cnf = None -required_database_schema = 50 +required_database_schema = 51 ################################################################################ diff --git a/daklib/contents.py b/daklib/contents.py index 2a29b2e5..a5950524 100755 --- a/daklib/contents.py +++ b/daklib/contents.py @@ -34,17 +34,13 @@ from subprocess import Popen, PIPE, check_call from tempfile import mkdtemp import os.path +import signal -class ContentsWriter(object): +class BinaryContentsWriter(object): ''' - ContentsWriter writes the Contents-$arch.gz files. + BinaryContentsWriter writes the Contents-$arch.gz files. ''' def __init__(self, suite, architecture, overridetype, component = None): - ''' - The constructor clones its arguments into a new session object to make - sure that the new ContentsWriter object can be executed in a different - thread. - ''' self.suite = suite self.architecture = architecture self.overridetype = overridetype @@ -193,6 +189,134 @@ select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package os.chmod(temp_filename, 0664) os.rename(temp_filename, final_filename) + +class SourceContentsWriter(object): + ''' + SourceContentsWriter writes the Contents-source.gz files. + ''' + def __init__(self, suite, component): + self.suite = suite + self.component = component + self.session = suite.session() + + def query(self): + ''' + Returns a query object that is doing most of the work. + ''' + params = { + 'suite_id': self.suite.suite_id, + 'component_id': self.component.component_id, + } + + sql = ''' +create temp table newest_sources ( + id integer primary key, + source text); + +create index sources_binaries_by_source on newest_sources (source); + +insert into newest_sources (id, source) + select distinct on (source) s.id, s.source from source s + join files f on f.id = s.file + join location l on l.id = f.location + where s.id in (select source from src_associations where suite = :suite_id) + and l.component = :component_id + order by source, version desc; + +select sc.file, string_agg(s.source, ',' order by s.source) as pkglist + from newest_sources s, src_contents sc + where s.id = sc.source_id group by sc.file''' + + return self.session.query("file", "pkglist").from_statement(sql). \ + params(params) + + def formatline(self, filename, package_list): + ''' + Returns a formatted string for the filename argument. + ''' + return "%s\t%s\n" % (filename, package_list) + + def fetch(self): + ''' + Yields a new line of the Contents-source.gz file in filename order. + ''' + for filename, package_list in self.query().yield_per(100): + yield self.formatline(filename, package_list) + # end transaction to return connection to pool + self.session.rollback() + + def get_list(self): + ''' + Returns a list of lines for the Contents-source.gz file. + ''' + return [item for item in self.fetch()] + + def output_filename(self): + ''' + Returns the name of the output file. + ''' + values = { + 'root': Config()['Dir::Root'], + 'suite': self.suite.suite_name, + 'component': self.component.component_name + } + return "%(root)s/dists/%(suite)s/%(component)s/Contents-source.gz" % values + + def write_file(self): + ''' + Write the output file. + ''' + command = ['gzip', '--rsyncable'] + final_filename = self.output_filename() + temp_filename = final_filename + '.new' + output_file = open(temp_filename, 'w') + gzip = Popen(command, stdin = PIPE, stdout = output_file) + for item in self.fetch(): + gzip.stdin.write(item) + gzip.stdin.close() + output_file.close() + gzip.wait() + os.chmod(temp_filename, 0664) + os.rename(temp_filename, final_filename) + + +def binary_helper(suite_id, arch_id, overridetype_id, component_id = None): + ''' + This function is called in a new subprocess and multiprocessing wants a top + level function. + ''' + session = DBConn().session() + suite = Suite.get(suite_id, session) + architecture = Architecture.get(arch_id, session) + overridetype = OverrideType.get(overridetype_id, session) + log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype] + if component_id is None: + component = None + else: + component = Component.get(component_id, session) + log_message.append(component.component_name) + contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component) + contents_writer.write_file() + return log_message + +def source_helper(suite_id, component_id): + ''' + This function is called in a new subprocess and multiprocessing wants a top + level function. + ''' + session = DBConn().session() + suite = Suite.get(suite_id, session) + component = Component.get(component_id, session) + log_message = [suite.suite_name, 'source', component.component_name] + contents_writer = SourceContentsWriter(suite, component) + contents_writer.write_file() + return log_message + +class ContentsWriter(object): + ''' + Loop over all suites, architectures, overridetypes, and components to write + all contents files. + ''' @classmethod def log_result(class_, result): ''' @@ -217,42 +341,32 @@ select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package deb_id = get_override_type('deb', session).overridetype_id udeb_id = get_override_type('udeb', session).overridetype_id main_id = get_component('main', session).component_id + contrib_id = get_component('contrib', session).component_id non_free_id = get_component('non-free', session).component_id pool = Pool() for suite in suite_query: suite_id = suite.suite_id + # handle source packages + pool.apply_async(source_helper, (suite_id, main_id), + callback = class_.log_result) + pool.apply_async(source_helper, (suite_id, contrib_id), + callback = class_.log_result) + pool.apply_async(source_helper, (suite_id, non_free_id), + callback = class_.log_result) for architecture in suite.get_architectures(skipsrc = True, skipall = True): arch_id = architecture.arch_id # handle 'deb' packages - pool.apply_async(generate_helper, (suite_id, arch_id, deb_id), \ + pool.apply_async(binary_helper, (suite_id, arch_id, deb_id), \ callback = class_.log_result) # handle 'udeb' packages for 'main' and 'non-free' - pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, main_id), \ + pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, main_id), \ callback = class_.log_result) - pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, non_free_id), \ + pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, non_free_id), \ callback = class_.log_result) pool.close() pool.join() session.close() -def generate_helper(suite_id, arch_id, overridetype_id, component_id = None): - ''' - This function is called in a new subprocess. - ''' - session = DBConn().session() - suite = Suite.get(suite_id, session) - architecture = Architecture.get(arch_id, session) - overridetype = OverrideType.get(overridetype_id, session) - log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype] - if component_id is None: - component = None - else: - component = Component.get(component_id, session) - log_message.append(component.component_name) - contents_writer = ContentsWriter(suite, architecture, overridetype, component) - contents_writer.write_file() - return log_message - class BinaryContentsScanner(object): ''' @@ -313,6 +427,11 @@ def binary_scan_helper(binary_id): scanner.scan() +def subprocess_setup(): + # Python installs a SIGPIPE handler by default. This is usually not what + # non-Python subprocesses expect. + signal.signal(signal.SIGPIPE, signal.SIG_DFL) + class UnpackedSource(object): ''' UnpackedSource extracts a source package into a temporary location and @@ -323,12 +442,9 @@ class UnpackedSource(object): The dscfilename is a name of a DSC file that will be extracted. ''' self.root_directory = os.path.join(mkdtemp(), 'root') - command = ('dpkg-source', '--no-copy', '--no-check', '-x', dscfilename, - self.root_directory) - # dpkg-source does not have a --quiet option - devnull = open(os.devnull, 'w') - check_call(command, stdout = devnull, stderr = devnull) - devnull.close() + command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x', + dscfilename, self.root_directory) + check_call(command, preexec_fn = subprocess_setup) def get_root_directory(self): ''' diff --git a/daklib/lists.py b/daklib/lists.py index 13555e2b..a8d3cedb 100755 --- a/daklib/lists.py +++ b/daklib/lists.py @@ -24,6 +24,8 @@ Helper functions for list generating commands (Packages, Sources). ################################################################################ +from dbconn import get_architecture + def fetch(query, args, session): for (id, path, filename) in session.execute(query, args).fetchall(): yield (id, path + filename) @@ -77,7 +79,7 @@ INSERT INTO b_candidates (id, source, file, architecture) FROM binaries b JOIN bin_associations ba ON b.id = ba.bin WHERE b.type = :type AND ba.suite = :suite AND - b.architecture IN (2, :architecture) %s; + b.architecture IN (:arch_all, :architecture) %s; CREATE TEMP TABLE gf_candidates ( id integer, @@ -98,17 +100,17 @@ INSERT INTO gf_candidates (id, filename, path, architecture, src, source) WITH arch_any AS (SELECT id, path, filename FROM gf_candidates - WHERE architecture > 2), + WHERE architecture <> :arch_all), arch_all_with_any AS (SELECT id, path, filename FROM gf_candidates - WHERE architecture = 2 AND - src IN (SELECT src FROM gf_candidates WHERE architecture > 2)), + WHERE architecture = :arch_all AND + src IN (SELECT src FROM gf_candidates WHERE architecture <> :arch_all)), arch_all_without_any AS (SELECT id, path, filename FROM gf_candidates - WHERE architecture = 2 AND - source NOT IN (SELECT DISTINCT source FROM gf_candidates WHERE architecture > 2)), + WHERE architecture = :arch_all AND + source NOT IN (SELECT DISTINCT source FROM gf_candidates WHERE architecture <> :arch_all)), filelist AS (SELECT * FROM arch_any @@ -122,6 +124,7 @@ WITH arch_any AS args = { 'suite': suite.suite_id, 'component': component.component_id, 'architecture': architecture.arch_id, + 'arch_all': get_architecture('all', session).arch_id, 'type': type } return fetch(query, args, session) diff --git a/tests/dbtest_contents.py b/tests/dbtest_contents.py index e3128161..0f23053c 100755 --- a/tests/dbtest_contents.py +++ b/tests/dbtest_contents.py @@ -3,8 +3,8 @@ from db_test import DBDakTestCase, fixture from daklib.dbconn import * -from daklib.contents import ContentsWriter, BinaryContentsScanner, \ - UnpackedSource, SourceContentsScanner +from daklib.contents import BinaryContentsWriter, BinaryContentsScanner, \ + UnpackedSource, SourceContentsScanner, SourceContentsWriter from os.path import normpath from sqlalchemy.exc import FlushError, IntegrityError @@ -131,9 +131,9 @@ class ContentsTestCase(DBDakTestCase): self.assertEqual(self.override['hello_sid_main_udeb'], \ self.otype['udeb'].overrides.one()) - def test_contentswriter(self): + def test_binarycontentswriter(self): ''' - Test the ContentsWriter class. + Test the BinaryContentsWriter class. ''' self.setup_suites() self.setup_architectures() @@ -142,7 +142,7 @@ class ContentsTestCase(DBDakTestCase): self.setup_overrides() self.binary['hello_2.2-1_i386'].contents.append(BinContents(file = '/usr/bin/hello')) self.session.commit() - cw = ContentsWriter(self.suite['squeeze'], self.arch['i386'], self.otype['deb']) + cw = BinaryContentsWriter(self.suite['squeeze'], self.arch['i386'], self.otype['deb']) self.assertEqual(['/usr/bin/hello python/hello\n'], \ cw.get_list()) # test formatline and sort order @@ -151,7 +151,7 @@ class ContentsTestCase(DBDakTestCase): # test output_filename self.assertEqual('tests/fixtures/ftp/dists/squeeze/Contents-i386.gz', \ normpath(cw.output_filename())) - cw = ContentsWriter(self.suite['squeeze'], self.arch['i386'], \ + cw = BinaryContentsWriter(self.suite['squeeze'], self.arch['i386'], \ self.otype['udeb'], self.comp['main']) self.assertEqual('tests/fixtures/ftp/dists/squeeze/main/Contents-i386.gz', \ normpath(cw.output_filename())) @@ -201,6 +201,22 @@ class ContentsTestCase(DBDakTestCase): SourceContentsScanner(source.source_id).scan() self.assertTrue(source.contents.count() > 0) + def test_sourcecontentswriter(self): + ''' + Test the SourceContentsWriter class. + ''' + self.setup_sources() + self.session.flush() + # remove newer package from sid because it disturbs the test + self.source['hello_2.2-2'].suites = [] + self.session.commit() + source = self.source['hello_2.2-1'] + SourceContentsScanner(source.source_id).scan() + cw = SourceContentsWriter(source.suites[0], source.poolfile.location.component) + result = cw.get_list() + self.assertEqual(8, len(result)) + self.assertTrue('debian/changelog\thello\n' in result) + def classes_to_clean(self): return [Override, Suite, BinContents, DBBinary, DBSource, Architecture, Section, \ OverrideType, Maintainer, Component, Priority, PoolFile]