4 # Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup <james@nocrew.org>
5 # $Id: utils.py,v 1.63 2004-02-27 20:07:40 troup Exp $
7 ################################################################################
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 ################################################################################
25 import commands, os, pwd, re, select, socket, shutil, string, sys, tempfile, traceback;
29 ################################################################################
31 re_comments = re.compile(r"\#.*")
32 re_no_epoch = re.compile(r"^\d*\:")
33 re_no_revision = re.compile(r"\-[^-]*$")
34 re_arch_from_filename = re.compile(r"/binary-[^/]+/")
35 re_extract_src_version = re.compile (r"(\S+)\s*\((.*)\)")
36 re_isadeb = re.compile (r"(.+?)_(.+?)_(.+)\.u?deb$");
37 re_issource = re.compile (r"(.+)_(.+?)\.(orig\.tar\.gz|diff\.gz|tar\.gz|dsc)$");
39 re_single_line_field = re.compile(r"^(\S*)\s*:\s*(.*)");
40 re_multi_line_field = re.compile(r"^\s(.*)");
41 re_taint_free = re.compile(r"^[-+~\.\w]+$");
43 re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\> \t]+)\>");
45 changes_parse_error_exc = "Can't parse line in .changes file";
46 invalid_dsc_format_exc = "Invalid .dsc file";
47 nk_format_exc = "Unknown Format: in .changes file";
48 no_files_exc = "No Files: field in .dsc or .changes file.";
49 cant_open_exc = "Can't read file.";
50 unknown_hostname_exc = "Unknown hostname";
51 cant_overwrite_exc = "Permission denied; can't overwrite existent file."
52 file_exists_exc = "Destination file exists";
53 sendmail_failed_exc = "Sendmail invocation failed";
54 tried_too_hard_exc = "Tried too hard to find a free filename.";
56 default_config = "/etc/katie/katie.conf";
57 default_apt_config = "/etc/katie/apt.conf";
59 ################################################################################
61 def open_file(filename, mode='r'):
63 f = open(filename, mode);
65 raise cant_open_exc, filename;
68 ################################################################################
70 def our_raw_input(prompt=""):
72 sys.stdout.write(prompt);
78 sys.stderr.write("\nUser interrupt (^D).\n");
81 ################################################################################
85 if c not in string.digits:
89 ################################################################################
91 def extract_component_from_section(section):
94 if section.find('/') != -1:
95 component = section.split('/')[0];
96 if component.lower() == "non-us" and section.find('/') != -1:
97 s = component + '/' + section.split('/')[1];
98 if Cnf.has_key("Component::%s" % s): # Avoid e.g. non-US/libs
101 if section.lower() == "non-us":
102 component = "non-US/main";
104 # non-US prefix is case insensitive
105 if component.lower()[:6] == "non-us":
106 component = "non-US"+component[6:];
108 # Expand default component
110 if Cnf.has_key("Component::%s" % section):
114 elif component == "non-US":
115 component = "non-US/main";
117 return (section, component);
119 ################################################################################
121 # Parses a changes file and returns a dictionary where each field is a
122 # key. The mandatory first argument is the filename of the .changes
125 # dsc_whitespace_rules is an optional boolean argument which defaults
126 # to off. If true, it turns on strict format checking to avoid
127 # allowing in source packages which are unextracable by the
128 # inappropriately fragile dpkg-source.
132 # o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
133 # followed by any PGP header data and must end with a blank line.
135 # o The data section must end with a blank line and must be followed by
136 # "-----BEGIN PGP SIGNATURE-----".
138 def parse_changes(filename, dsc_whitespace_rules=0):
142 changes_in = open_file(filename);
143 lines = changes_in.readlines();
146 raise changes_parse_error_exc, "[Empty changes file]";
148 # Reindex by line number so we can easily verify the format of
154 indexed_lines[index] = line[:-1];
156 inside_signature = 0;
158 num_of_lines = len(indexed_lines.keys());
161 while index < num_of_lines:
163 line = indexed_lines[index];
165 if dsc_whitespace_rules:
167 if index > num_of_lines:
168 raise invalid_dsc_format_exc, index;
169 line = indexed_lines[index];
170 if not line.startswith("-----BEGIN PGP SIGNATURE"):
171 raise invalid_dsc_format_exc, index;
172 inside_signature = 0;
176 if line.startswith("-----BEGIN PGP SIGNATURE"):
178 if line.startswith("-----BEGIN PGP SIGNED MESSAGE"):
179 inside_signature = 1;
180 if dsc_whitespace_rules:
181 while index < num_of_lines and line != "":
183 line = indexed_lines[index];
185 # If we're not inside the signed data, don't process anything
186 if not inside_signature:
188 slf = re_single_line_field.match(line);
190 field = slf.groups()[0].lower();
191 changes[field] = slf.groups()[1];
195 changes[field] += '\n';
197 mlf = re_multi_line_field.match(line);
200 raise changes_parse_error_exc, "'%s'\n [Multi-line field continuing on from nothing?]" % (line);
201 if first == 1 and changes[field] != "":
202 changes[field] += '\n';
204 changes[field] += mlf.groups()[0] + '\n';
208 if dsc_whitespace_rules and inside_signature:
209 raise invalid_dsc_format_exc, index;
212 changes["filecontents"] = "".join(lines);
215 raise changes_parse_error_exc, error;
219 ################################################################################
221 # Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
223 def build_file_list(changes, is_a_dsc=0):
226 # Make sure we have a Files: field to parse...
227 if not changes.has_key("files"):
230 # Make sure we recognise the format of the Files: field
231 format = changes.get("format", "");
233 format = float(format);
234 if not is_a_dsc and (format < 1.5 or format > 2.0):
235 raise nk_format_exc, format;
237 # Parse each entry/line:
238 for i in changes["files"].split('\n'):
242 section = priority = "";
245 (md5, size, name) = s;
247 (md5, size, section, priority, name) = s;
249 raise changes_parse_error_exc, i;
256 (section, component) = extract_component_from_section(section);
258 files[name] = Dict(md5sum=md5, size=size, section=section,
259 priority=priority, component=component);
263 ################################################################################
265 # Fix the `Maintainer:' field to be an RFC822 compatible address.
266 # cf. Debian Policy Manual (D.2.4)
268 # 06:28|<Culus> 'The standard sucks, but my tool is supposed to
269 # interoperate with it. I know - I'll fix the suckage
270 # and make things incompatible!'
272 def fix_maintainer (maintainer):
273 m = re_parse_maintainer.match(maintainer);
277 if m != None and len(m.groups()) == 2:
280 if name.find(',') != -1 or name.find('.') != -1:
281 rfc822 = "%s (%s)" % (email, name);
282 return (rfc822, name, email)
284 ################################################################################
286 # sendmail wrapper, takes _either_ a message string or a file as arguments
287 def send_mail (message, filename=""):
288 # If we've been passed a string dump it into a temporary file
290 filename = tempfile.mktemp();
291 fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, 0700);
292 os.write (fd, message);
296 (result, output) = commands.getstatusoutput("%s < %s" % (Cnf["Dinstall::SendmailCommand"], filename));
298 raise sendmail_failed_exc, output;
300 # Clean up any temporary files
302 os.unlink (filename);
304 ################################################################################
306 def poolify (source, component):
309 # FIXME: this is nasty
310 component = component.lower().replace("non-us/", "non-US/");
311 if source[:3] == "lib":
312 return component + source[:4] + '/' + source + '/'
314 return component + source[:1] + '/' + source + '/'
316 ################################################################################
318 def move (src, dest, overwrite = 0, perms = 0664):
319 if os.path.exists(dest) and os.path.isdir(dest):
322 dest_dir = os.path.dirname(dest);
323 if not os.path.exists(dest_dir):
324 umask = os.umask(00000);
325 os.makedirs(dest_dir, 02775);
327 #print "Moving %s to %s..." % (src, dest);
328 if os.path.exists(dest) and os.path.isdir(dest):
329 dest += '/' + os.path.basename(src);
330 # Don't overwrite unless forced to
331 if os.path.exists(dest):
333 fubar("Can't move %s to %s - file already exists." % (src, dest));
335 if not os.access(dest, os.W_OK):
336 fubar("Can't move %s to %s - can't write to existing file." % (src, dest));
337 shutil.copy2(src, dest);
338 os.chmod(dest, perms);
341 def copy (src, dest, overwrite = 0, perms = 0664):
342 if os.path.exists(dest) and os.path.isdir(dest):
345 dest_dir = os.path.dirname(dest);
346 if not os.path.exists(dest_dir):
347 umask = os.umask(00000);
348 os.makedirs(dest_dir, 02775);
350 #print "Copying %s to %s..." % (src, dest);
351 if os.path.exists(dest) and os.path.isdir(dest):
352 dest += '/' + os.path.basename(src);
353 # Don't overwrite unless forced to
354 if os.path.exists(dest):
356 raise file_exists_exc
358 if not os.access(dest, os.W_OK):
359 raise cant_overwrite_exc
360 shutil.copy2(src, dest);
361 os.chmod(dest, perms);
363 ################################################################################
366 res = socket.gethostbyaddr(socket.gethostname());
367 database_hostname = Cnf.get("Config::" + res[0] + "::DatabaseHostname");
368 if database_hostname:
369 return database_hostname;
373 def which_conf_file ():
374 res = socket.gethostbyaddr(socket.gethostname());
375 if Cnf.get("Config::" + res[0] + "::KatieConfig"):
376 return Cnf["Config::" + res[0] + "::KatieConfig"]
378 return default_config;
380 def which_apt_conf_file ():
381 res = socket.gethostbyaddr(socket.gethostname());
382 if Cnf.get("Config::" + res[0] + "::AptConfig"):
383 return Cnf["Config::" + res[0] + "::AptConfig"]
385 return default_apt_config;
387 ################################################################################
389 # Escape characters which have meaning to SQL's regex comparison operator ('~')
390 # (woefully incomplete)
393 s = s.replace('+', '\\\\+');
394 s = s.replace('.', '\\\\.');
397 ################################################################################
399 # Perform a substition of template
400 def TemplateSubst(map, filename):
401 file = open_file(filename);
402 template = file.read();
404 template = template.replace(x,map[x]);
408 ################################################################################
410 def fubar(msg, exit_code=1):
411 sys.stderr.write("E: %s\n" % (msg));
415 sys.stderr.write("W: %s\n" % (msg));
417 ################################################################################
419 # Returns the user name with a laughable attempt at rfc822 conformancy
420 # (read: removing stray periods).
422 return pwd.getpwuid(os.getuid())[4].split(',')[0].replace('.', '');
424 ################################################################################
434 return ("%d%s" % (c, t))
436 ################################################################################
438 def cc_fix_changes (changes):
439 o = changes.get("architecture", "");
441 del changes["architecture"];
442 changes["architecture"] = {};
444 changes["architecture"][j] = 1;
446 # Sort by source name, source version, 'have source', and then by filename
447 def changes_compare (a, b):
449 a_changes = parse_changes(a);
454 b_changes = parse_changes(b);
458 cc_fix_changes (a_changes);
459 cc_fix_changes (b_changes);
461 # Sort by source name
462 a_source = a_changes.get("source");
463 b_source = b_changes.get("source");
464 q = cmp (a_source, b_source);
468 # Sort by source version
469 a_version = a_changes.get("version");
470 b_version = b_changes.get("version");
471 q = apt_pkg.VersionCompare(a_version, b_version);
475 # Sort by 'have source'
476 a_has_source = a_changes["architecture"].get("source");
477 b_has_source = b_changes["architecture"].get("source");
478 if a_has_source and not b_has_source:
480 elif b_has_source and not a_has_source:
483 # Fall back to sort by filename
486 ################################################################################
488 def find_next_free (dest, too_many=100):
491 while os.path.exists(dest) and extra < too_many:
492 dest = orig_dest + '.' + repr(extra);
494 if extra >= too_many:
495 raise tried_too_hard_exc;
498 ################################################################################
500 def result_join (original, sep = '\t'):
502 for i in xrange(len(original)):
503 if original[i] == None:
506 list.append(original[i]);
507 return sep.join(list);
509 ################################################################################
511 def prefix_multi_line_string(str, prefix, include_blank_lines=0):
513 for line in str.split('\n'):
515 if line or include_blank_lines:
516 out += "%s%s\n" % (prefix, line);
517 # Strip trailing new line
522 ################################################################################
524 def validate_changes_file_arg(file, fatal=1):
528 if file.endswith(".katie"):
529 file = file[:-6]+".changes";
531 if not file.endswith(".changes"):
532 error = "invalid file type; not a changes file";
534 if not os.access(file,os.R_OK):
535 if os.path.exists(file):
536 error = "permission denied";
538 error = "file not found";
542 fubar("%s: %s." % (orig_filename, error));
544 warn("Skipping %s - %s" % (orig_filename, error));
549 ################################################################################
552 return (arch != "source" and arch != "all");
554 ################################################################################
556 def join_with_commas_and(list):
557 if len(list) == 0: return "nothing";
558 if len(list) == 1: return list[0];
559 return ", ".join(list[:-1]) + " and " + list[-1];
561 ################################################################################
566 ################################################################################
568 # Handle -a, -c and -s arguments; returns them as SQL constraints
569 def parse_args(Options):
573 for suite in split_args(Options["Suite"]):
574 suite_id = db_access.get_suite_id(suite);
576 warn("suite '%s' not recognised." % (suite));
578 suite_ids_list.append(suite_id);
580 con_suites = "AND su.id IN (%s)" % ", ".join(map(str, suite_ids_list));
582 fubar("No valid suite given.");
587 if Options["Component"]:
588 component_ids_list = [];
589 for component in split_args(Options["Component"]):
590 component_id = db_access.get_component_id(component);
591 if component_id == -1:
592 warn("component '%s' not recognised." % (component));
594 component_ids_list.append(component_id);
595 if component_ids_list:
596 con_components = "AND c.id IN (%s)" % ", ".join(map(str, component_ids_list));
598 fubar("No valid component given.");
602 # Process architecture
603 con_architectures = "";
604 if Options["Architecture"]:
607 for architecture in split_args(Options["Architecture"]):
608 if architecture == "source":
611 architecture_id = db_access.get_architecture_id(architecture);
612 if architecture_id == -1:
613 warn("architecture '%s' not recognised." % (architecture));
615 arch_ids_list.append(architecture_id);
617 con_architectures = "AND a.id IN (%s)" % ", ".join(map(str, arch_ids_list));
620 fubar("No valid architecture given.");
624 return (con_suites, con_architectures, con_components, check_source);
626 ################################################################################
628 # Inspired(tm) by Bryn Keller's print_exc_plus (See
629 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52215)
632 tb = sys.exc_info()[2];
639 frame = frame.f_back;
641 traceback.print_exc();
643 print "\nFrame %s in %s at line %s" % (frame.f_code.co_name,
644 frame.f_code.co_filename,
646 for key, value in frame.f_locals.items():
647 print "\t%20s = " % key,;
651 print "<unable to print>";
653 ################################################################################
655 def try_with_debug(function):
663 ################################################################################
665 # Function for use in sorting lists of architectures.
666 # Sorts normally except that 'source' dominates all others.
668 def arch_compare_sw (a, b):
669 if a == "source" and b == "source":
678 ################################################################################
680 # Split command line arguments which can be separated by either commas
681 # or whitespace. If dwim is set, it will complain about string ending
682 # in comma since this usually means someone did 'madison -a i386, m68k
683 # foo' or something and the inevitable confusion resulting from 'm68k'
684 # being treated as an argument is undesirable.
686 def split_args (s, dwim=1):
687 if s.find(",") == -1:
690 if s[-1:] == "," and dwim:
691 fubar("split_args: found trailing comma, spurious space maybe?");
694 ################################################################################
696 def Dict(**dict): return dict
698 ########################################
700 # Our very own version of commands.getouputstatus(), hacked to support
702 def gpgv_get_status_output(cmd, status_read, status_write):
703 cmd = ['/bin/sh', '-c', cmd];
704 p2cread, p2cwrite = os.pipe();
705 c2pread, c2pwrite = os.pipe();
706 errout, errin = os.pipe();
716 for i in range(3, 256):
717 if i != status_write:
723 os.execvp(cmd[0], cmd);
729 os.dup2(c2pread, c2pwrite);
730 os.dup2(errout, errin);
732 output = status = "";
734 i, o, e = select.select([c2pwrite, errin, status_read], [], []);
737 r = os.read(fd, 8196);
739 more_data.append(fd);
740 if fd == c2pwrite or fd == errin:
742 elif fd == status_read:
745 fubar("Unexpected file descriptor [%s] returned from select\n" % (fd));
747 pid, exit_status = os.waitpid(pid, 0)
749 os.close(status_write);
750 os.close(status_read);
760 return output, status, exit_status;
762 ############################################################
765 def check_signature (filename, reject):
766 """Check the signature of a file and return the fingerprint if the
767 signature is valid or 'None' if it's not. The first argument is the
768 filename whose signature should be checked. The second argument is a
769 reject function and is called when an error is found. The reject()
770 function must allow for two arguments: the first is the error message,
771 the second is an optional prefix string. It's possible for reject()
772 to be called more than once during an invocation of check_signature()."""
774 # Ensure the filename contains no shell meta-characters or other badness
775 if not re_taint_free.match(os.path.basename(filename)):
776 reject("!!WARNING!! tainted filename: '%s'." % (filename));
779 # Invoke gpgv on the file
780 status_read, status_write = os.pipe();
781 cmd = "gpgv --status-fd %s --keyring %s --keyring %s %s" \
782 % (status_write, Cnf["Dinstall::PGPKeyring"], Cnf["Dinstall::GPGKeyring"], filename);
783 (output, status, exit_status) = gpgv_get_status_output(cmd, status_read, status_write);
785 # Process the status-fd output
787 bad = internal_error = "";
788 for line in status.split('\n'):
792 split = line.split();
794 internal_error += "gpgv status line is malformed (< 2 atoms) ['%s'].\n" % (line);
796 (gnupg, keyword) = split[:2];
797 if gnupg != "[GNUPG:]":
798 internal_error += "gpgv status line is malformed (incorrect prefix '%s').\n" % (gnupg);
801 if keywords.has_key(keyword) and (keyword != "NODATA" and keyword != "SIGEXPIRED"):
802 internal_error += "found duplicate status token ('%s').\n" % (keyword);
805 keywords[keyword] = args;
807 # If we failed to parse the status-fd output, let's just whine and bail now
809 reject("internal error while performing signature check on %s." % (filename));
810 reject(internal_error, "");
811 reject("Please report the above errors to the Archive maintainers by replying to this mail.", "");
814 # Now check for obviously bad things in the processed output
815 if keywords.has_key("SIGEXPIRED"):
816 reject("The key used to sign %s has expired." % (filename));
818 if keywords.has_key("KEYREVOKED"):
819 reject("The key used to sign %s has been revoked." % (filename));
821 if keywords.has_key("BADSIG"):
822 reject("bad signature on %s." % (filename));
824 if keywords.has_key("ERRSIG") and not keywords.has_key("NO_PUBKEY"):
825 reject("failed to check signature on %s." % (filename));
827 if keywords.has_key("NO_PUBKEY"):
828 args = keywords["NO_PUBKEY"];
831 reject("The key (0x%s) used to sign %s wasn't found in the keyring(s)." % (key, filename));
833 if keywords.has_key("BADARMOR"):
834 reject("ASCII armour of signature was corrupt in %s." % (filename));
836 if keywords.has_key("NODATA"):
837 reject("no signature found in %s." % (filename));
843 # Next check gpgv exited with a zero return code
845 reject("gpgv failed while checking %s." % (filename));
847 reject(prefix_multi_line_string(status, " [GPG status-fd output:] "), "");
849 reject(prefix_multi_line_string(output, " [GPG output:] "), "");
852 # Sanity check the good stuff we expect
853 if not keywords.has_key("VALIDSIG"):
854 reject("signature on %s does not appear to be valid [No VALIDSIG]." % (filename));
857 args = keywords["VALIDSIG"];
859 reject("internal error while checking signature on %s." % (filename));
862 fingerprint = args[0];
863 if not keywords.has_key("GOODSIG"):
864 reject("signature on %s does not appear to be valid [No GOODSIG]." % (filename));
866 if not keywords.has_key("SIG_ID"):
867 reject("signature on %s does not appear to be valid [No SIG_ID]." % (filename));
870 # Finally ensure there's not something we don't recognise
871 known_keywords = Dict(VALIDSIG="",SIG_ID="",GOODSIG="",BADSIG="",ERRSIG="",
872 SIGEXPIRED="",KEYREVOKED="",NO_PUBKEY="",BADARMOR="",
875 for keyword in keywords.keys():
876 if not known_keywords.has_key(keyword):
877 reject("found unknown status token '%s' from gpgv with args '%r' in %s." % (keyword, keywords[keyword], filename));
885 ################################################################################
887 # Inspired(tm) by http://www.zopelabs.com/cookbook/1022242603
889 def wrap(paragraph, max_length, prefix=""):
893 words = paragraph.split();
896 word_size = len(word);
897 if word_size > max_length:
899 s += line + '\n' + prefix;
900 s += word + '\n' + prefix;
903 new_length = len(line) + word_size + 1;
904 if new_length > max_length:
905 s += line + '\n' + prefix;
918 ################################################################################
920 # Relativize an absolute symlink from 'src' -> 'dest' relative to 'root'.
921 # Returns fixed 'src'
922 def clean_symlink (src, dest, root):
923 src = src.replace(root, '', 1);
924 dest = dest.replace(root, '', 1);
925 dest = os.path.dirname(dest);
926 new_src = '../' * len(dest.split('/'));
927 return new_src + src;
929 ################################################################################
933 Cnf = apt_pkg.newConfiguration();
934 apt_pkg.ReadConfigFileISC(Cnf,default_config);
936 if which_conf_file() != default_config:
937 apt_pkg.ReadConfigFileISC(Cnf,which_conf_file());
939 ################################################################################