From 2495bbed9a0904fbe1a15976cad2974b305fb9ce Mon Sep 17 00:00:00 2001 From: =?utf8?q?Rapha=C3=ABl=20Hertzog?= Date: Tue, 27 Oct 2009 02:39:53 +0100 Subject: [PATCH] Add support for multiple orig tarballs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Structure of Upload.pkg is adapted to handle multiple upstream tarballs. This also means that the .dak format is modified. Since the .dak format has been modified, improve the object .dak parser funtion to be able to read both the new and the old format. When reading the old format, it auto-converts the data to the new structure. It uses python's type() introspection to know what format is used. This allows in-place upgrade even when old .dak are still stored in various queues. Several other adaptations are also required in other methods of Upload object, in process-accepted, in autobuild_queue(). Signed-off-by: Raphaël Hertzog --- dak/process_accepted.py | 71 ++++++++++++++++++++++------------------- daklib/changes.py | 43 ++++++++++++++++++++----- daklib/dbconn.py | 25 ++++++++------- daklib/queue.py | 48 +++++++++++++++++----------- 4 files changed, 117 insertions(+), 70 deletions(-) diff --git a/dak/process_accepted.py b/dak/process_accepted.py index d7db1172..51c6a5d7 100755 --- a/dak/process_accepted.py +++ b/dak/process_accepted.py @@ -210,7 +210,7 @@ def add_dsc_to_db(u, filename, session): df = DSCFile() df.source_id = source.source_id - # If the .orig.tar.gz is already in the pool, it's + # If the .orig tarball is already in the pool, it's # files id is stored in dsc_files by check_dsc(). files_id = dentry.get("files id", None) @@ -353,32 +353,37 @@ def install(u, session, log_urgency=True): add_deb_to_db(u, newfile, session) # If this is a sourceful diff only upload that is moving - # cross-component we need to copy the .orig.tar.gz into the new + # cross-component we need to copy the .orig files into the new # component too for the same reasons as above. - # - if u.pkg.changes["architecture"].has_key("source") and u.pkg.orig_tar_id and \ - u.pkg.orig_tar_location != dsc_location_id: - - oldf = get_poolfile_by_id(u.pkg.orig_tar_id, session) - old_filename = os.path.join(oldf.location.path, oldf.filename) - old_dat = {'size': oldf.filesize, 'md5sum': oldf.md5sum, - 'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum} - - new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename)) - - # TODO: Care about size/md5sum collisions etc - (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session) - - if newf is None: - utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename)) - newf = add_poolfile(new_filename, old_dat, dsc_location_id, session) - - # TODO: Check that there's only 1 here - source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0] - dscf = get_dscfiles(source_id = source.source_id, poolfile_id=u.pkg.orig_tar_id, session=session)[0] - dscf.poolfile_id = newf.file_id - session.add(dscf) - session.flush() + if u.pkg.changes["architecture"].has_key("source"): + for orig_file in u.pkg.orig_files.keys(): + if not u.pkg.orig_files[orig_file].has_key("id"): + continue # Skip if it's not in the pool + orig_file_id = u.pkg.orig_files[orig_file]["id"] + if u.pkg.orig_files[orig_file]["location"] == dsc_location_id: + continue # Skip if the location didn't change + + # Do the move + oldf = get_poolfile_by_id(orig_file_id, session) + old_filename = os.path.join(oldf.location.path, oldf.filename) + old_dat = {'size': oldf.filesize, 'md5sum': oldf.md5sum, + 'sha1sum': oldf.sha1sum, 'sha256sum': oldf.sha256sum} + + new_filename = os.path.join(utils.poolify(u.pkg.changes["source"], dsc_component), os.path.basename(old_filename)) + + # TODO: Care about size/md5sum collisions etc + (found, newf) = check_poolfile(new_filename, file_size, file_md5sum, dsc_location_id, session) + + if newf is None: + utils.copy(old_filename, os.path.join(cnf["Dir::Pool"], new_filename)) + newf = add_poolfile(new_filename, old_dat, dsc_location_id, session) + + # TODO: Check that there's only 1 here + source = get_sources_from_name(u.pkg.changes["source"], u.pkg.changes["version"])[0] + dscf = get_dscfiles(source_id=source.source_id, poolfile_id=orig_file_id, session=session)[0] + dscf.poolfile_id = newf.file_id + session.add(dscf) + session.flush() # Install the files into the pool for newfile, entry in u.pkg.files.items(): @@ -452,15 +457,17 @@ def install(u, session, log_urgency=True): os.unlink(dest) os.symlink(src, dest) - # Update last_used on any non-upload .orig.tar.gz symlink - if u.pkg.orig_tar_id: + # Update last_used on any non-uploaded .orig symlink + for orig_file in u.pkg.orig_files.keys(): # Determine the .orig.tar.gz file name - for dsc_file in u.pkg.dsc_files.keys(): - if dsc_file.endswith(".orig.tar.gz"): - u.pkg.orig_tar_gz = os.path.join(dest_dir, dsc_file) + if not u.pkg.orig_files[orig_file].has_key("id"): + continue # Skip files not in the pool + # XXX: do we really want to update the orig_files dict here + # instead of using a temporary variable? + u.pkg.orig_files[orig_file]["path"] = os.path.join(dest_dir, orig_file) # Remove it from the list of packages for later processing by apt-ftparchive - qb = get_queue_build(u.pkg.orig_tar_gz, suite.suite_id, session) + qb = get_queue_build(u.pkg.orig_files[orig_file]["path"], suite.suite_id, session) if qb: qb.in_queue = False qb.last_used = now_date diff --git a/daklib/changes.py b/daklib/changes.py index 1bb90753..59c7da17 100755 --- a/daklib/changes.py +++ b/daklib/changes.py @@ -76,6 +76,10 @@ CHANGESFIELDS_DSCFILES_OPTIONAL = [ "files id" ] __all__.append('CHANGESFIELDS_DSCFILES_OPTIONAL') +CHANGESFIELDS_ORIGFILES = [ "id", "location" ] + +__all__.append('CHANGESFIELDS_ORIGFILES') + ############################################################################### class Changes(object): @@ -91,10 +95,7 @@ class Changes(object): self.dsc = {} self.files = {} self.dsc_files = {} - - self.orig_tar_id = None - self.orig_tar_location = "" - self.orig_tar_gz = None + self.orig_files = {} def file_summary(self): # changes["distribution"] may not exist in corner cases @@ -189,8 +190,24 @@ class Changes(object): self.files.update(p.load()) self.dsc_files.update(p.load()) - self.orig_tar_id = p.load() - self.orig_tar_location = p.load() + next_obj = p.load() + if type(next_obj) is DictType: + self.pkg.orig_files.update(next_obj) + else: + # Auto-convert old dak files to new format supporting + # multiple tarballs + orig_tar_gz = None + for dsc_file in self.dsc_files.keys(): + if dsc_file.endswith(".orig.tar.gz"): + orig_tar_gz = dsc_file + self.orig_files[orig_tar_gz] = {} + if next_obj != None: + self.orig_files[orig_tar_gz]["id"] = next_obj + next_obj = p.load() + if next_obj != None and next_obj != "": + self.orig_files[orig_tar_gz]["location"] = next_obj + if len(self.orig_files[orig_tar_gz]) == 0: + del self.orig_files[orig_tar_gz] dump_file.close() @@ -240,6 +257,17 @@ class Changes(object): return ret + def sanitised_orig_files(self): + ret = {} + for name, entry in self.orig_files.items(): + ret[name] = {} + # Optional orig_files fields + for i in CHANGESFIELDS_ORIGFILES: + if entry.has_key(i): + ret[name][i] = entry[i] + + return ret + def write_dot_dak(self, dest_dir): """ Dump ourself into a cPickle file. @@ -281,8 +309,7 @@ class Changes(object): p.dump(self.sanitised_dsc()) p.dump(self.sanitised_files()) p.dump(self.sanitised_dsc_files()) - p.dump(self.orig_tar_id) - p.dump(self.orig_tar_location) + p.dump(self.sanitised_orig_files()) dump_file.close() diff --git a/daklib/dbconn.py b/daklib/dbconn.py index ff00135f..c0facc46 100755 --- a/daklib/dbconn.py +++ b/daklib/dbconn.py @@ -1609,23 +1609,26 @@ class Queue(object): session.add(qb) - # If the .orig.tar.gz is in the pool, create a symlink to - # it (if one doesn't already exist) - if changes.orig_tar_id: - # Determine the .orig.tar.gz file name - for dsc_file in changes.dsc_files.keys(): - if dsc_file.endswith(".orig.tar.gz"): - filename = dsc_file - - dest = os.path.join(dest_dir, filename) + # If the .orig tarballs are in the pool, create a symlink to + # them (if one doesn't already exist) + for dsc_file in changes.dsc_files.keys(): + # Skip all files except orig tarballs + if not re_is_orig_source.match(dsc_file): + continue + # Skip orig files not identified in the pool + if not (changes.orig_files.has_key(dsc_file) and + changes.orig_files[dsc_file].has_key("id")): + continue + orig_file_id = changes.orig_files[dsc_file]["id"] + dest = os.path.join(dest_dir, dsc_file) # If it doesn't exist, create a symlink if not os.path.exists(dest): q = session.execute("SELECT l.path, f.filename FROM location l, files f WHERE f.id = :id and f.location = l.id", - {'id': changes.orig_tar_id}) + {'id': orig_file_id}) res = q.fetchone() if not res: - return "[INTERNAL ERROR] Couldn't find id %s in files table." % (changes.orig_tar_id) + return "[INTERNAL ERROR] Couldn't find id %s in files table." % (orig_file_id) src = os.path.join(res[0], res[1]) os.symlink(src, dest) diff --git a/daklib/queue.py b/daklib/queue.py index 96bf37d8..03bc7e05 100755 --- a/daklib/queue.py +++ b/daklib/queue.py @@ -1055,16 +1055,19 @@ class Upload(object): if not os.path.exists(src): return ftype = m.group(3) - if ftype == "orig.tar.gz" and self.pkg.orig_tar_gz: + if re_is_orig_source.match(f) and pkg.orig_files.has_key(f) and \ + pkg.orig_files[f].has_key("path"): continue dest = os.path.join(os.getcwd(), f) os.symlink(src, dest) - # If the orig.tar.gz is not a part of the upload, create a symlink to the - # existing copy. - if self.pkg.orig_tar_gz: - dest = os.path.join(os.getcwd(), os.path.basename(self.pkg.orig_tar_gz)) - os.symlink(self.pkg.orig_tar_gz, dest) + # If the orig files are not a part of the upload, create symlinks to the + # existing copies. + for orig_file in self.pkg.orig_files.keys(): + if not self.pkg.orig_files[orig_file].has_key("path"): + continue + dest = os.path.join(os.getcwd(), os.path.basename(orig_file)) + os.symlink(self.pkg.orig_files[orig_file]["path"], dest) # Extract the source cmd = "dpkg-source -sn -x %s" % (dsc_filename) @@ -1107,10 +1110,11 @@ class Upload(object): # We should probably scrap or rethink the whole reprocess thing # Bail out if: # a) there's no source - # or b) reprocess is 2 - we will do this check next time when orig.tar.gz is in 'files' - # or c) the orig.tar.gz is MIA + # or b) reprocess is 2 - we will do this check next time when orig + # tarball is in 'files' + # or c) the orig files are MIA if not self.pkg.changes["architecture"].has_key("source") or self.reprocess == 2 \ - or self.pkg.orig_tar_gz == -1: + or len(self.pkg.orig_files) == 0: return tmpdir = utils.temp_dirname() @@ -2047,7 +2051,7 @@ distribution.""" """ @warning: NB: this function can remove entries from the 'files' index [if - the .orig.tar.gz is a duplicate of the one in the archive]; if + the orig tarball is a duplicate of the one in the archive]; if you're iterating over 'files' and call this function as part of the loop, be sure to add a check to the top of the loop to ensure you haven't just tried to dereference the deleted entry. @@ -2055,7 +2059,8 @@ distribution.""" """ Cnf = Config() - self.pkg.orig_tar_gz = None + self.pkg.orig_files = {} # XXX: do we need to clear it? + orig_files = self.pkg.orig_files # Try and find all files mentioned in the .dsc. This has # to work harder to cope with the multiple possible @@ -2089,7 +2094,7 @@ distribution.""" if len(ql) > 0: # Ignore exact matches for .orig.tar.gz match = 0 - if dsc_name.endswith(".orig.tar.gz"): + if re_is_orig_source.match(dsc_name): for i in ql: if self.pkg.files.has_key(dsc_name) and \ int(self.pkg.files[dsc_name]["size"]) == int(i.filesize) and \ @@ -2099,13 +2104,15 @@ distribution.""" # This would fix the stupidity of changing something we often iterate over # whilst we're doing it del self.pkg.files[dsc_name] - self.pkg.orig_tar_gz = os.path.join(i.location.path, i.filename) + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["path"] = os.path.join(i.location.path, i.filename) match = 1 if not match: self.rejects.append("can not overwrite existing copy of '%s' already in the archive." % (dsc_name)) - elif dsc_name.endswith(".orig.tar.gz"): + elif re_is_orig_source.match(dsc_name): # Check in the pool ql = get_poolfile_like_name(dsc_name, session) @@ -2143,9 +2150,11 @@ distribution.""" # need this for updating dsc_files in install() dsc_entry["files id"] = x.file_id # See install() in process-accepted... - self.pkg.orig_tar_id = x.file_id - self.pkg.orig_tar_gz = old_file - self.pkg.orig_tar_location = x.location.location_id + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["id"] = x.file_id + orig_files[dsc_name]["path"] = old_file + orig_files[dsc_name]["location"] = x.location.location_id else: # TODO: Record the queues and info in the DB so we don't hardcode all this crap # Not there? Check the queue directories... @@ -2159,11 +2168,12 @@ distribution.""" in_otherdir_fh.close() actual_size = os.stat(in_otherdir)[stat.ST_SIZE] found = in_otherdir - self.pkg.orig_tar_gz = in_otherdir + if not orig_files.has_key(dsc_name): + orig_files[dsc_name] = {} + orig_files[dsc_name]["path"] = in_otherdir if not found: self.rejects.append("%s refers to %s, but I can't find it in the queue or in the pool." % (file, dsc_name)) - self.pkg.orig_tar_gz = -1 continue else: self.rejects.append("%s refers to %s, but I can't find it in the queue." % (file, dsc_name)) -- 2.39.5