From: Joerg Jaspert Date: Tue, 27 Oct 2009 14:41:19 +0000 (+0100) Subject: Merge commit 'lamby/deb-src-3.0-sqla' into merge X-Git-Url: https://git.decadent.org.uk/gitweb/?a=commitdiff_plain;h=28f3e6f0659504214d482d682f4b2b4791872cc6;hp=760a440bb41fcf153c0c0a80aab6e0b71fab03b7;p=dak.git Merge commit 'lamby/deb-src-3.0-sqla' into merge * commit 'lamby/deb-src-3.0-sqla': (29 commits) Drop lzma support. Only enable not-"1.0" formats on t-p-u and unstable and experimental. Reject uploads that contain a native tarball and a diff. dak heresy Fix reference to cls.requires. Add unittest.main() to test_regexes. Add shebang to regex tests. Simplify rejecting logic Avoid if/elif/elif.. with lookup table. Use loop to check for illegal duplication of file types in .dsc Use a collections.defaultdict to avoid boilerplate definitions. Correct grammar when rejecting a package with insufficient files. Require source formats to have a name and interpolate it when returning msgs Compare with None using "is", not "==" Remove has_ prefix inside srcformats. Don't pass dsc_filename to reject_msgs; prepend it in check_dsc_files. Move rejection message generation into srcformats Add regex to match .dsc "Format: lala" value and compile it on creation. Add SourceFormat class and track definitions with metaclass Make dbconn.get_suite_src_formats use @session_wrapper. ... Signed-off-by: Joerg Jaspert --- diff --git a/dak/check_archive.py b/dak/check_archive.py index 6ca84c69..6e3c795b 100755 --- a/dak/check_archive.py +++ b/dak/check_archive.py @@ -68,7 +68,7 @@ The following MODEs are available: missing-overrides - check for missing overrides source-in-one-dir - ensure the source for each package is in one directory timestamps - check for future timestamps in .deb's - tar-gz-in-dsc - ensure each .dsc lists a .tar.gz file + files-in-dsc - ensure each .dsc references appropriate Files validate-indices - ensure files mentioned in Packages & Sources exist files-not-symlinks - check files in the database aren't symlinks validate-builddeps - validate build-dependencies of .dsc files in the archive @@ -320,9 +320,10 @@ def check_timestamps(): ################################################################################ -def check_missing_tar_gz_in_dsc(): +def check_files_in_dsc(): """ - Ensure each .dsc lists a .tar.gz file + Ensure each .dsc lists appropriate files in its Files field (according + to the format announced in its Format field). """ count = 0 @@ -343,19 +344,11 @@ def check_missing_tar_gz_in_dsc(): except: utils.fubar("error parsing .dsc file '%s'." % (filename)) - dsc_files = utils.build_file_list(dsc, is_a_dsc=1) - has_tar = 0 + reasons = utils.check_dsc_files(filename, dsc) + for r in reasons: + utils.warn(r) - for f in dsc_files.keys(): - m = re_issource.match(f) - if not m: - utils.fubar("%s not recognised as source." % (f)) - ftype = m.group(3) - if ftype == "orig.tar.gz" or ftype == "tar.gz": - has_tar = 1 - - if not has_tar: - utils.warn("%s has no .tar.gz in the .dsc file." % (f)) + if len(reasons) > 0: count += 1 if count: @@ -526,8 +519,8 @@ def main (): check_source_in_one_dir() elif mode == "timestamps": check_timestamps() - elif mode == "tar-gz-in-dsc": - check_missing_tar_gz_in_dsc() + elif mode == "files-in-dsc": + check_files_in_dsc() elif mode == "validate-indices": check_indices_files_exist() elif mode == "files-not-symlinks": diff --git a/dak/dakdb/update15.py b/dak/dakdb/update15.py new file mode 100644 index 00000000..84f1f27b --- /dev/null +++ b/dak/dakdb/update15.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# coding=utf8 + +""" +Adding table for allowed source formats + +@contact: Debian FTP Master +@copyright: 2009 Raphael Hertzog +@license: GNU General Public License version 2 or later +""" + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + + +################################################################################ + +import psycopg2 +import time +from daklib.dak_exceptions import DBUpdateError + +################################################################################ + +def do_update(self): + print "Adding tables listing allowed source formats" + + try: + c = self.db.cursor() + c.execute(""" + CREATE TABLE src_format ( + id SERIAL PRIMARY KEY, + format_name TEXT NOT NULL, + unique (format_name) + ) + """) + c.execute("INSERT INTO src_format (format_name) VALUES('1.0')") + c.execute("INSERT INTO src_format (format_name) VALUES('3.0 (quilt)')") + c.execute("INSERT INTO src_format (format_name) VALUES('3.0 (native)')") + + c.execute(""" + CREATE TABLE suite_src_formats ( + suite INT4 NOT NULL, + src_format INT4 NOT NULL, + unique (suite, src_format) + ) + """) + + print "Authorize format 1.0 on all suites by default" + suites = c.fetchall() + for s in suites: + c.execute("INSERT INTO suite_src_formats (suite, src_format) VALUES('%s', '%s')" % (s[0], '1.0')) + + print "Authorize all other formats on tpu, unstable & experimental by default" + c.execute("SELECT id FROM suite WHERE suite_name IN ('testing-proposed-updates', 'unstable', 'experimental')") + suites = c.fetchall() + c.execute("SELECT id FROM src_format WHERE format_name != '1.0'") + formats = c.fetchall() + for s in suites: + for f in formats: + c.execute("INSERT INTO suite_src_formats (suite, src_format) VALUES('%s', '%s')" % (s[0], f[0])) + + c.execute("UPDATE config SET value = '15' WHERE name = 'db_revision'") + self.db.commit() + + except psycopg2.ProgrammingError, msg: + self.db.rollback() + raise DBUpdateError, "Unable to apply source format update 15, rollback issued. Error message : %s" % (str(msg)) diff --git a/dak/process_accepted.py b/dak/process_accepted.py index d7db1172..51c6a5d7 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -210,7 +210,7 @@ def add_dsc_to_db(u, filename, session): df = DSCFile() df.source_id = source.source_id - # If the .orig.tar.gz is already in the pool, it's + # If the .orig tarball is already in the pool, it's # files id is stored in dsc_files by check_dsc(). files_id = dentry.get("files id", None) @@ -353,32 +353,37 @@ def install(u, session, log_urgency=True): add_deb_to_db(u, newfile, session) # If this is a sourceful diff only upload that is moving - # cross-component we need to copy the .orig.tar.gz into the new + # cross-component we need to copy the .orig files into the new # component too for the same reasons as above. - # - if u.pkg.changes["architecture"].has_key("source") and u.pkg.orig_tar_id and \ - u.pkg.orig_tar_location != dsc_location_id: - - oldf = get_poolfile_by_id(u.pkg.orig_tar_id, session) - old_filename = os.path.join(oldf.location.path, oldf.filename) - old_dat = {'size': oldf.filesize, 'md5sum': oldf.md5sum, - 'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum} - - new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename)) - - # TODO: Care about size/md5sum collisions etc - (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session) - - if newf is None: - utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename)) - newf = add_poolfile(new_filename, old_dat, dsc_location_id, session) - - # TODO: Check that there's only 1 here - source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0] - dscf = get_dscfiles(source_id = source.source_id, poolfile_id=u.pkg.orig_tar_id, session=session)[0] - dscf.poolfile_id = newf.file_id - session.add(dscf) - session.flush() + if u.pkg.changes["architecture"].has_key("source"): + for orig_file in u.pkg.orig_files.keys(): + if not u.pkg.orig_files[orig_file].has_key("id"): + continue # Skip if it's not in the pool + orig_file_id = u.pkg.orig_files[orig_file]["id"] + if u.pkg.orig_files[orig_file]["location"] == dsc_location_id: + continue # Skip if the location didn't change + + # Do the move + oldf = get_poolfile_by_id(orig_file_id, session) + old_filename = os.path.join(oldf.location.path, oldf.filename) + old_dat = {'size': oldf.filesize, 'md5sum': oldf.md5sum, + 'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum} + + new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename)) + + # TODO: Care about size/md5sum collisions etc + (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session) + + if newf is None: + utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename)) + newf = add_poolfile(new_filename, old_dat, dsc_location_id, session) + + # TODO: Check that there's only 1 here + source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0] + dscf = get_dscfiles(source_id=source.source_id, poolfile_id=orig_file_id, session=session)[0] + dscf.poolfile_id = newf.file_id + session.add(dscf) + session.flush() # Install the files into the pool for newfile, entry in u.pkg.files.items(): @@ -452,15 +457,17 @@ def install(u, session, log_urgency=True): os.unlink(dest) os.symlink(src, dest) - # Update last_used on any non-upload .orig.tar.gz symlink - if u.pkg.orig_tar_id: + # Update last_used on any non-uploaded .orig symlink + for orig_file in u.pkg.orig_files.keys(): # Determine the .orig.tar.gz file name - for dsc_file in u.pkg.dsc_files.keys(): - if dsc_file.endswith(".orig.tar.gz"): - u.pkg.orig_tar_gz = os.path.join(dest_dir, dsc_file) + if not u.pkg.orig_files[orig_file].has_key("id"): + continue # Skip files not in the pool + # XXX: do we really want to update the orig_files dict here + # instead of using a temporary variable? + u.pkg.orig_files[orig_file]["path"] = os.path.join(dest_dir, orig_file) # Remove it from the list of packages for later processing by apt-ftparchive - qb = get_queue_build(u.pkg.orig_tar_gz, suite.suite_id, session) + qb = get_queue_build(u.pkg.orig_files[orig_file]["path"], suite.suite_id, session) if qb: qb.in_queue = False qb.last_used = now_date diff --git a/dak/show_new.py b/dak/show_new.py index be3d5114..b21efcce 100755 --- a/dak/show_new.py +++ b/dak/show_new.py @@ -32,6 +32,7 @@ import examine_package from daklib.queue import determine_new, check_valid from daklib import utils +from daklib.regexes import re_source_ext # Globals Cnf = None @@ -160,8 +161,9 @@ def do_pkg(changes_file): filestoexamine = [] for pkg in new.keys(): for fn in new[pkg]["files"]: - if ( c.files[fn].has_key("new") and not - c.files[fn]["type"] in [ "orig.tar.gz", "orig.tar.bz2", "tar.gz", "tar.bz2", "diff.gz", "diff.bz2"] ): + if (c.files[fn].has_key("new") and + (c.files[fn]["type"] == "dsc" or + not re_source_ext.match(c.files[fn]["type"]))): filestoexamine.append(fn) html_header(c.changes["source"], filestoexamine) diff --git a/dak/update_db.py b/dak/update_db.py index 4999af3a..ecf5cd2a 100755 --- a/dak/update_db.py +++ b/dak/update_db.py @@ -44,7 +44,7 @@ from daklib.dak_exceptions import DBUpdateError ################################################################################ Cnf = None -required_database_schema = 14 +required_database_schema = 15 ################################################################################ diff --git a/daklib/changes.py b/daklib/changes.py index 1bb90753..59c7da17 100755 --- a/daklib/changes.py +++ b/daklib/changes.py @@ -76,6 +76,10 @@ CHANGESFIELDS_DSCFILES_OPTIONAL = [ "files id" ] __all__.append('CHANGESFIELDS_DSCFILES_OPTIONAL') +CHANGESFIELDS_ORIGFILES = [ "id", "location" ] + +__all__.append('CHANGESFIELDS_ORIGFILES') + ############################################################################### class Changes(object): @@ -91,10 +95,7 @@ class Changes(object): self.dsc = {} self.files = {} self.dsc_files = {} - - self.orig_tar_id = None - self.orig_tar_location = "" - self.orig_tar_gz = None + self.orig_files = {} def file_summary(self): # changes["distribution"] may not exist in corner cases @@ -189,8 +190,24 @@ class Changes(object): self.files.update(p.load()) self.dsc_files.update(p.load()) - self.orig_tar_id = p.load() - self.orig_tar_location = p.load() + next_obj = p.load() + if type(next_obj) is DictType: + self.pkg.orig_files.update(next_obj) + else: + # Auto-convert old dak files to new format supporting + # multiple tarballs + orig_tar_gz = None + for dsc_file in self.dsc_files.keys(): + if dsc_file.endswith(".orig.tar.gz"): + orig_tar_gz = dsc_file + self.orig_files[orig_tar_gz] = {} + if next_obj != None: + self.orig_files[orig_tar_gz]["id"] = next_obj + next_obj = p.load() + if next_obj != None and next_obj != "": + self.orig_files[orig_tar_gz]["location"] = next_obj + if len(self.orig_files[orig_tar_gz]) == 0: + del self.orig_files[orig_tar_gz] dump_file.close() @@ -240,6 +257,17 @@ class Changes(object): return ret + def sanitised_orig_files(self): + ret = {} + for name, entry in self.orig_files.items(): + ret[name] = {} + # Optional orig_files fields + for i in CHANGESFIELDS_ORIGFILES: + if entry.has_key(i): + ret[name][i] = entry[i] + + return ret + def write_dot_dak(self, dest_dir): """ Dump ourself into a cPickle file. @@ -281,8 +309,7 @@ class Changes(object): p.dump(self.sanitised_dsc()) p.dump(self.sanitised_files()) p.dump(self.sanitised_dsc_files()) - p.dump(self.orig_tar_id) - p.dump(self.orig_tar_location) + p.dump(self.sanitised_orig_files()) dump_file.close() diff --git a/daklib/dbconn.py b/daklib/dbconn.py index 48953f60..2f5fef30 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -1410,23 +1410,26 @@ class Queue(object): session.add(qb) - # If the .orig.tar.gz is in the pool, create a symlink to - # it (if one doesn't already exist) - if changes.orig_tar_id: - # Determine the .orig.tar.gz file name - for dsc_file in changes.dsc_files.keys(): - if dsc_file.endswith(".orig.tar.gz"): - filename = dsc_file - - dest = os.path.join(dest_dir, filename) + # If the .orig tarballs are in the pool, create a symlink to + # them (if one doesn't already exist) + for dsc_file in changes.dsc_files.keys(): + # Skip all files except orig tarballs + if not re_is_orig_source.match(dsc_file): + continue + # Skip orig files not identified in the pool + if not (changes.orig_files.has_key(dsc_file) and + changes.orig_files[dsc_file].has_key("id")): + continue + orig_file_id = changes.orig_files[dsc_file]["id"] + dest = os.path.join(dest_dir, dsc_file) # If it doesn't exist, create a symlink if not os.path.exists(dest): q = session.execute("SELECT l.path, f.filename FROM location l, files f WHERE f.id = :id and f.location = l.id", - {'id': changes.orig_tar_id}) + {'id': orig_file_id}) res = q.fetchone() if not res: - return "[INTERNAL ERROR] Couldn't find id %s in files table." % (changes.orig_tar_id) + return "[INTERNAL ERROR] Couldn't find id %s in files table." % (orig_file_id) src = os.path.join(res[0], res[1]) os.symlink(src, dest) @@ -1766,6 +1769,17 @@ __all__.append('SrcAssociation') ################################################################################ +class SrcFormat(object): + def __init__(self, *args, **kwargs): + pass + + def __repr__(self): + return '' % (self.format_name) + +__all__.append('SrcFormat') + +################################################################################ + class SrcUploader(object): def __init__(self, *args, **kwargs): pass @@ -1936,6 +1950,42 @@ __all__.append('get_suite_architectures') ################################################################################ +class SuiteSrcFormat(object): + def __init__(self, *args, **kwargs): + pass + + def __repr__(self): + return '' % (self.suite_id, self.src_format_id) + +__all__.append('SuiteSrcFormat') + +@session_wrapper +def get_suite_src_formats(suite, session=None): + """ + Returns list of allowed SrcFormat for C{suite}. + + @type suite: str + @param suite: Suite name to search for + + @type session: Session + @param session: Optional SQL session object (a temporary one will be + generated if not supplied) + + @rtype: list + @return: the list of allowed source formats for I{suite} + """ + + q = session.query(SrcFormat) + q = q.join(SuiteSrcFormat) + q = q.join(Suite).filter_by(suite_name=suite) + q = q.order_by('format_name') + + return q.all() + +__all__.append('get_suite_src_formats') + +################################################################################ + class Uid(object): def __init__(self, *args, **kwargs): pass @@ -2066,9 +2116,11 @@ class DBConn(Singleton): self.tbl_section = Table('section', self.db_meta, autoload=True) self.tbl_source = Table('source', self.db_meta, autoload=True) self.tbl_src_associations = Table('src_associations', self.db_meta, autoload=True) + self.tbl_src_format = Table('src_format', self.db_meta, autoload=True) self.tbl_src_uploaders = Table('src_uploaders', self.db_meta, autoload=True) self.tbl_suite = Table('suite', self.db_meta, autoload=True) self.tbl_suite_architectures = Table('suite_architectures', self.db_meta, autoload=True) + self.tbl_suite_src_formats = Table('suite_src_formats', self.db_meta, autoload=True) self.tbl_uid = Table('uid', self.db_meta, autoload=True) def __setupmappers(self): @@ -2230,6 +2282,10 @@ class DBConn(Singleton): source_id = self.tbl_src_associations.c.source, source = relation(DBSource))) + mapper(SrcFormat, self.tbl_src_format, + properties = dict(src_format_id = self.tbl_src_format.c.id, + format_name = self.tbl_src_format.c.format_name)) + mapper(SrcUploader, self.tbl_src_uploaders, properties = dict(uploader_id = self.tbl_src_uploaders.c.id, source_id = self.tbl_src_uploaders.c.source, @@ -2248,6 +2304,12 @@ class DBConn(Singleton): arch_id = self.tbl_suite_architectures.c.architecture, architecture = relation(Architecture))) + mapper(SuiteSrcFormat, self.tbl_suite_src_formats, + properties = dict(suite_id = self.tbl_suite_src_formats.c.suite, + suite = relation(Suite, backref='suitesrcformats'), + src_format_id = self.tbl_suite_src_formats.c.src_format, + src_format = relation(SrcFormat))) + mapper(Uid, self.tbl_uid, properties = dict(uid_id = self.tbl_uid.c.id, fingerprint = relation(Fingerprint))) diff --git a/daklib/queue.py b/daklib/queue.py index b1383be0..d70e60b1 100755 --- a/daklib/queue.py +++ b/daklib/queue.py @@ -50,7 +50,7 @@ from config import Config from holding import Holding from dbconn import * from summarystats import SummaryStats -from utils import parse_changes +from utils import parse_changes, check_dsc_files from textutils import fix_maintainer from binary import Binary @@ -72,8 +72,8 @@ def get_type(f, session): """ # Determine the type if f.has_key("dbtype"): - file_type = f["dbtype"] - elif f["type"] in [ "orig.tar.gz", "orig.tar.bz2", "tar.gz", "tar.bz2", "diff.gz", "diff.bz2", "dsc" ]: + file_type = file["dbtype"] + elif re_source_ext.match(f["type"]): file_type = "dsc" else: utils.fubar("invalid type (%s) for new. Dazed, confused and sure as heck not continuing." % (file_type)) @@ -713,7 +713,7 @@ class Upload(object): self.rejects.append("%s: changes file doesn't say %s for Source" % (f, entry["package"])) # Ensure the source version matches the version in the .changes file - if entry["type"] == "orig.tar.gz": + if re_is_orig_source.match(f): changes_version = self.pkg.changes["chopversion2"] else: changes_version = self.pkg.changes["chopversion"] @@ -921,7 +921,7 @@ class Upload(object): self.rejects.append("source only uploads are not supported.") ########################################################################### - def check_dsc(self, action=True): + def check_dsc(self, action=True, session=None): """Returns bool indicating whether or not the source changes are valid""" # Ensure there is source to check if not self.pkg.changes["architecture"].has_key("source"): @@ -981,10 +981,11 @@ class Upload(object): if not re_valid_version.match(self.pkg.dsc["version"]): self.rejects.append("%s: invalid version number '%s'." % (dsc_filename, self.pkg.dsc["version"])) - # Bumping the version number of the .dsc breaks extraction by stable's - # dpkg-source. So let's not do that... - if self.pkg.dsc["format"] != "1.0": - self.rejects.append("%s: incompatible 'Format' version produced by a broken version of dpkg-dev 1.9.1{3,4}." % (dsc_filename)) + # Only a limited list of source formats are allowed in each suite + for dist in self.pkg.changes["distribution"].keys(): + allowed = [ x.format_name for x in get_suite_src_formats(dist, session) ] + if self.pkg.dsc["format"] not in allowed: + self.rejects.append("%s: source format '%s' not allowed in %s (accepted: %s) " % (dsc_filename, self.pkg.dsc["format"], dist, ", ".join(allowed))) # Validate the Maintainer field try: @@ -1016,19 +1017,8 @@ class Upload(object): if epochless_dsc_version != self.pkg.files[dsc_filename]["version"]: self.rejects.append("version ('%s') in .dsc does not match version ('%s') in .changes." % (epochless_dsc_version, changes_version)) - # Ensure there is a .tar.gz in the .dsc file - has_tar = False - for f in self.pkg.dsc_files.keys(): - m = re_issource.match(f) - if not m: - self.rejects.append("%s: %s in Files field not recognised as source." % (dsc_filename, f)) - continue - ftype = m.group(3) - if ftype == "orig.tar.gz" or ftype == "tar.gz": - has_tar = True - - if not has_tar: - self.rejects.append("%s: no .tar.gz or .orig.tar.gz in 'Files' field." % (dsc_filename)) + # Ensure the Files field contain only what's expected + self.rejects.extend(check_dsc_files(dsc_filename, self.pkg.dsc, self.pkg.dsc_files)) # Ensure source is newer than existing source in target suites session = DBConn().session() @@ -1065,16 +1055,19 @@ class Upload(object): if not os.path.exists(src): return ftype = m.group(3) - if ftype == "orig.tar.gz" and self.pkg.orig_tar_gz: + if re_is_orig_source.match(f) and pkg.orig_files.has_key(f) and \ + pkg.orig_files[f].has_key("path"): continue dest = os.path.join(os.getcwd(), f) os.symlink(src, dest) - # If the orig.tar.gz is not a part of the upload, create a symlink to the - # existing copy. - if self.pkg.orig_tar_gz: - dest = os.path.join(os.getcwd(), os.path.basename(self.pkg.orig_tar_gz)) - os.symlink(self.pkg.orig_tar_gz, dest) + # If the orig files are not a part of the upload, create symlinks to the + # existing copies. + for orig_file in self.pkg.orig_files.keys(): + if not self.pkg.orig_files[orig_file].has_key("path"): + continue + dest = os.path.join(os.getcwd(), os.path.basename(orig_file)) + os.symlink(self.pkg.orig_files[orig_file]["path"], dest) # Extract the source cmd = "dpkg-source -sn -x %s" % (dsc_filename) @@ -1117,10 +1110,11 @@ class Upload(object): # We should probably scrap or rethink the whole reprocess thing # Bail out if: # a) there's no source - # or b) reprocess is 2 - we will do this check next time when orig.tar.gz is in 'files' - # or c) the orig.tar.gz is MIA + # or b) reprocess is 2 - we will do this check next time when orig + # tarball is in 'files' + # or c) the orig files are MIA if not self.pkg.changes["architecture"].has_key("source") or self.reprocess == 2 \ - or self.pkg.orig_tar_gz == -1: + or len(self.pkg.orig_files) == 0: return tmpdir = utils.temp_dirname() @@ -2057,7 +2051,7 @@ distribution.""" """ @warning: NB: this function can remove entries from the 'files' index [if - the .orig.tar.gz is a duplicate of the one in the archive]; if + the orig tarball is a duplicate of the one in the archive]; if you're iterating over 'files' and call this function as part of the loop, be sure to add a check to the top of the loop to ensure you haven't just tried to dereference the deleted entry. @@ -2065,7 +2059,8 @@ distribution.""" """ Cnf = Config() - self.pkg.orig_tar_gz = None + self.pkg.orig_files = {} # XXX: do we need to clear it? + orig_files = self.pkg.orig_files # Try and find all files mentioned in the .dsc. This has # to work harder to cope with the multiple possible @@ -2099,7 +2094,7 @@ distribution.""" if len(ql) > 0: # Ignore exact matches for .orig.tar.gz match = 0 - if dsc_name.endswith(".orig.tar.gz"): + if re_is_orig_source.match(dsc_name): for i in ql: if self.pkg.files.has_key(dsc_name) and \ int(self.pkg.files[dsc_name]["size"]) == int(i.filesize) and \ @@ -2109,13 +2104,15 @@ distribution.""" # This would fix the stupidity of changing something we often iterate over # whilst we're doing it del self.pkg.files[dsc_name] - self.pkg.orig_tar_gz = os.path.join(i.location.path, i.filename) + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["path"] = os.path.join(i.location.path, i.filename) match = 1 if not match: self.rejects.append("can not overwrite existing copy of '%s' already in the archive." % (dsc_name)) - elif dsc_name.endswith(".orig.tar.gz"): + elif re_is_orig_source.match(dsc_name): # Check in the pool ql = get_poolfile_like_name(dsc_name, session) @@ -2153,9 +2150,11 @@ distribution.""" # need this for updating dsc_files in install() dsc_entry["files id"] = x.file_id # See install() in process-accepted... - self.pkg.orig_tar_id = x.file_id - self.pkg.orig_tar_gz = old_file - self.pkg.orig_tar_location = x.location.location_id + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["id"] = x.file_id + orig_files[dsc_name]["path"] = old_file + orig_files[dsc_name]["location"] = x.location.location_id else: # TODO: Record the queues and info in the DB so we don't hardcode all this crap # Not there? Check the queue directories... @@ -2169,11 +2168,12 @@ distribution.""" in_otherdir_fh.close() actual_size = os.stat(in_otherdir)[stat.ST_SIZE] found = in_otherdir - self.pkg.orig_tar_gz = in_otherdir + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["path"] = in_otherdir if not found: self.rejects.append("%s refers to %s, but I can't find it in the queue or in the pool." % (file, dsc_name)) - self.pkg.orig_tar_gz = -1 continue else: self.rejects.append("%s refers to %s, but I can't find it in the queue." % (file, dsc_name)) diff --git a/daklib/regexes.py b/daklib/regexes.py index d1f0d381..00896243 100755 --- a/daklib/regexes.py +++ b/daklib/regexes.py @@ -42,7 +42,11 @@ re_arch_from_filename = re.compile(r"/binary-[^/]+/") re_extract_src_version = re.compile (r"(\S+)\s*\((.*)\)") re_isadeb = re.compile (r"(.+?)_(.+?)_(.+)\.u?deb$") -re_issource = re.compile (r"(.+)_(.+?)\.(orig\.tar\.gz|diff\.gz|tar\.gz|dsc)$") +orig_source_ext_re = r"orig(?:-.+)?\.tar\.(?:gz|bz2)" +re_orig_source_ext = re.compile(orig_source_ext_re + "$") +re_source_ext = re.compile("(" + orig_source_ext_re + r"|debian\.tar\.(?:gz|bz2)|diff\.gz|tar\.(?:gz|bz2)|dsc)$") +re_issource = re.compile(r"(.+)_(.+?)\." + re_source_ext.pattern) +re_is_orig_source = re.compile (r"(.+)_(.+?)\.orig(?:-.+)?\.tar\.(?:gz|bz2)$") re_single_line_field = re.compile(r"^(\S*?)\s*:\s*(.*)") re_multi_line_field = re.compile(r"^\s(.*)") diff --git a/daklib/srcformats.py b/daklib/srcformats.py new file mode 100644 index 00000000..0a74c192 --- /dev/null +++ b/daklib/srcformats.py @@ -0,0 +1,65 @@ +import re + +srcformats = [] + +class SourceFormat(type): + def __new__(cls, name, bases, attrs): + klass = super(SourceFormat, cls).__new__(cls, name, bases, attrs) + srcformats.append(klass) + + assert str(klass.name) + assert iter(klass.requires) + assert iter(klass.disallowed) + + klass.re_format = re.compile(klass.format) + + return klass + + @classmethod + def reject_msgs(cls, has): + if len(cls.requires) != len([x for x in cls.requires if has[x]]): + yield "lack of required files for format %s" % cls.name + + for key in cls.disallowed: + if has[key]: + yield "contains source files not allowed in format %s" % cls.name + +class FormatOne(SourceFormat): + __metaclass__ = SourceFormat + + name = '1.0' + format = r'1.0' + + requires = () + disallowed = ('debian_tar', 'more_orig_tar') + + @classmethod + def reject_msgs(cls, has): + if not (has['native_tar_gz'] or (has['orig_tar_gz'] and has['debian_diff'])): + yield "no .tar.gz or .orig.tar.gz+.diff.gz in 'Files' field." + if has['native_tar_gz'] and has['debian_diff']: + yield "native package with diff makes no sense" + if (has['orig_tar_gz'] != has['orig_tar']) or \ + (has['native_tar_gz'] != has['native_tar']): + yield "contains source files not allowed in format %s" % cls.name + + for msg in super(FormatOne, cls).reject_msgs(has): + yield msg + +class FormatThree(SourceFormat): + __metaclass__ = SourceFormat + + name = '3.x (native)' + format = r'3\.\d+ \(native\)' + + requires = ('native_tar',) + disallowed = ('orig_tar', 'debian_diff', 'debian_tar', 'more_orig_tar') + +class FormatThreeQuilt(SourceFormat): + __metaclass__ = SourceFormat + + name = '3.x (quilt)' + format = r'3\.\d+ \(quilt\)' + + requires = ('orig_tar', 'debian_tar') + disallowed = ('debian_diff', 'native_tar') diff --git a/daklib/utils.py b/daklib/utils.py index a9dea920..6e363266 100755 --- a/daklib/utils.py +++ b/daklib/utils.py @@ -45,8 +45,11 @@ from dak_exceptions import * from textutils import fix_maintainer from regexes import re_html_escaping, html_escaping, re_single_line_field, \ re_multi_line_field, re_srchasver, re_verwithext, \ - re_parse_maintainer, re_taint_free, re_gpg_uid, re_re_mark, \ - re_whitespace_comment + re_parse_maintainer, re_taint_free, re_gpg_uid, \ + re_re_mark, re_whitespace_comment, re_issource + +from srcformats import srcformats +from collections import defaultdict ################################################################################ @@ -332,6 +335,83 @@ def check_size(where, files): ################################################################################ +def check_dsc_files(dsc_filename, dsc=None, dsc_files=None): + """ + Verify that the files listed in the Files field of the .dsc are + those expected given the announced Format. + + @type dsc_filename: string + @param dsc_filename: path of .dsc file + + @type dsc: dict + @param dsc: the content of the .dsc parsed by C{parse_changes()} + + @type dsc_files: dict + @param dsc_files: the file list returned by C{build_file_list()} + + @rtype: list + @return: all errors detected + """ + rejmsg = [] + + # Parse the file if needed + if dsc is None: + dsc = parse_changes(dsc_filename, signing_rules=1); + + if dsc_files is None: + dsc_files = build_file_list(dsc, is_a_dsc=1) + + # Ensure .dsc lists proper set of source files according to the format + # announced + has = defaultdict(lambda: 0) + + ftype_lookup = ( + (r'orig.tar.gz', ('orig_tar_gz', 'orig_tar')), + (r'diff.gz', ('debian_diff',)), + (r'tar.gz', ('native_tar_gz', 'native_tar')), + (r'debian\.tar\.(gz|bz2)', ('debian_tar',)), + (r'orig\.tar\.(gz|bz2)', ('orig_tar',)), + (r'tar\.(gz|bz2)', ('native_tar',)), + (r'orig-.+\.tar\.(gz|bz2)', ('more_orig_tar',)), + ) + + for f in dsc_files.keys(): + m = re_issource.match(f) + if not m: + rejmsg.append("%s: %s in Files field not recognised as source." + % (dsc_filename, f)) + continue + + # Populate 'has' dictionary by resolving keys in lookup table + matched = False + for regex, keys in ftype_lookup: + if re.match(regex, m.group(3)): + matched = True + for key in keys: + has[key] += 1 + break + + # File does not match anything in lookup table; reject + if not matched: + reject("%s: unexpected source file '%s'" % (dsc_filename, f)) + + # Check for multiple files + for file_type in ('orig_tar', 'native_tar', 'debian_tar', 'debian_diff'): + if has[file_type] > 1: + rejmsg.append("%s: lists multiple %s" % (dsc_filename, file_type)) + + # Source format specific tests + for format in srcformats: + if format.re_format.match(dsc['format']): + rejmsg.extend([ + '%s: %s' % (dsc_filename, x) for x in format.reject_msgs(has) + ]) + break + + return rejmsg + +################################################################################ + def check_hash_fields(what, manifest): """ check_hash_fields ensures that there are no checksum fields in the @@ -442,10 +522,10 @@ def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"): format = format[:2] if is_a_dsc: - # format = (1,0) are the only formats we currently accept, # format = (0,0) are missing format headers of which we still # have some in the archive. - if format != (1,0) and format != (0,0): + if format != (1,0) and format != (0,0) and \ + format != (3,0,"quilt") and format != (3,0,"native"): raise UnknownFormatError, "%s" % (changes.get("format","0.0")) else: if (format < (1,5) or format > (1,8)): diff --git a/tests/test_regexes.py b/tests/test_regexes.py index 7c43d097..32fd4c1d 100755 --- a/tests/test_regexes.py +++ b/tests/test_regexes.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + import unittest import os, sys @@ -29,3 +31,6 @@ class re_single_line_field(unittest.TestCase): self.assertEqual(self.MATCH(': ::').groups(), ('', '::')) self.assertEqual(self.MATCH('Foo::bar').groups(), ('Foo', ':bar')) self.assertEqual(self.MATCH('Foo: :bar').groups(), ('Foo', ':bar')) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_srcformats.py b/tests/test_srcformats.py new file mode 100755 index 00000000..9fec4a87 --- /dev/null +++ b/tests/test_srcformats.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +import unittest + +import os, sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from collections import defaultdict + +from daklib import srcformats + +class SourceFormatTestCase(unittest.TestCase): + def get_rejects(self, has_vars): + has = defaultdict(lambda: 0) + has.update(has_vars) + return list(self.fmt.reject_msgs(has)) + + def assertAccepted(self, has): + self.assertEqual(self.get_rejects(has), []) + + def assertRejected(self, has): + self.assertNotEqual(self.get_rejects(has), []) + +class FormatOneTestCase(SourceFormatTestCase): + fmt = srcformats.FormatOne + + def testEmpty(self): + self.assertRejected({}) + + def testNative(self): + self.assertAccepted({'native_tar': 1, 'native_tar_gz': 1}) + + def testStandard(self): + self.assertAccepted({ + 'orig_tar': 1, + 'orig_tar_gz': 1, + 'debian_diff': 1, + }) + + def testDisallowed(self): + self.assertRejected({ + 'native_tar': 1, + 'native_tar_gz': 1, + 'debian_tar': 1, + }) + self.assertRejected({ + 'orig_tar': 1, + 'orig_tar_gz': 1, + 'debian_diff': 0, + }) + self.assertRejected({ + 'native_tar': 1, + 'native_tar_gz': 1, + 'more_orig_tar': 1, + }) + self.assertRejected({ + 'native_tar': 1, + 'native_tar_gz': 1, + 'debian_diff': 1, + }) + +class FormatTreeTestCase(SourceFormatTestCase): + fmt = srcformats.FormatThree + + def testEmpty(self): + self.assertRejected({}) + + def testSimple(self): + self.assertAccepted({'native_tar': 1}) + + def testDisallowed(self): + self.assertRejected({'native_tar': 1, 'orig_tar': 1}) + self.assertRejected({'native_tar': 1, 'debian_diff': 1}) + self.assertRejected({'native_tar': 1, 'debian_tar': 1}) + self.assertRejected({'native_tar': 1, 'more_orig_tar': 1}) + +class FormatTreeQuiltTestCase(SourceFormatTestCase): + fmt = srcformats.FormatThreeQuilt + + def testEmpty(self): + self.assertRejected({}) + + def testSimple(self): + self.assertAccepted({'orig_tar': 1, 'debian_tar': 1}) + + def testMultipleTarballs(self): + self.assertAccepted({ + 'orig_tar': 1, + 'debian_tar': 1, + 'more_orig_tar': 42, + }) + + def testDisallowed(self): + self.assertRejected({ + 'orig_tar': 1, + 'debian_tar': 1, + 'debian_diff': 1 + }) + self.assertRejected({ + 'orig_tar': 1, + 'debian_tar': 1, + 'native_tar': 1, + }) + +if __name__ == '__main__': + unittest.main()