2 # Import contents files
4 # Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 ################################################################################
21 ################################################################################
23 ################################################################################
25 import sys, os, popen2, tempfile, stat, time, pg
26 import re, gzip, apt_pkg
27 from daklib import database, utils
28 from daklib.dak_exceptions import *
30 ################################################################################
36 content_path_id_cache = {}
37 content_file_id_cache = {}
38 insert_contents_file_cache = {}
40 ################################################################################
42 def usage (exit_code=0):
43 print """Usage: dak import-contents
46 -h, --help show this help and exit
47 -s, --suite=SUITE only write file lists for this suite
51 ################################################################################
54 def set_contents_file_id(file):
55 global content_file_id_cache
57 if not content_file_id_cache.has_key(file):
58 # since this can be called within a transaction, we can't use currval
59 q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file))
60 content_file_id_cache[file] = int(q.getresult()[0][0])
61 return content_file_id_cache[file]
63 ################################################################################
65 def set_contents_path_id(path):
66 global content_path_id_cache
68 if not content_path_id_cache.has_key(path):
69 q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path))
70 content_path_id_cache[path] = int(q.getresult()[0][0])
71 return content_path_id_cache[path]
73 ################################################################################
75 def insert_content_path(bin_id, fullpath):
76 global insert_contents_file_cache
77 cache_key = "%s_%s" % (bin_id, fullpath)
79 # have we seen this contents before?
80 # probably only revelant during package import
81 if insert_contents_file_cache.has_key(cache_key):
84 # split the path into basename, and pathname
85 (path, file) = os.path.split(fullpath)
87 # Get the necessary IDs ...
88 file_id = set_contents_file_id(file)
89 path_id = set_contents_path_id(path)
91 # Put them into content_assiocations
92 projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
95 ################################################################################
97 def import_contents(suites):
101 projectB.query("BEGIN WORK")
103 # Needed to make sure postgreSQL doesn't freak out on some of the data
104 projectB.query("SET CLIENT_ENCODING TO 'LATIN1'")
107 line_regex = re.compile(r'^(.+?)\s+(\S+)$')
108 pkg_regex = re.compile(r'(\S+)/(\S+)$')
109 file_regex = re.compile('^FILE')
111 # Get our suites, and the architectures
113 suite_id = database.get_suite_id(s)
115 q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
118 for r in q.getresult():
119 if r[1] != "source" and r[1] != "all":
120 arch_list.append((r[0], r[1]))
122 arch_all_id = database.get_architecture_id("all")
124 for arch in arch_list:
125 print "Processing %s/%s" % (s, arch[1])
126 arch_id = database.get_architecture_id(arch[1])
127 f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch[1]), "r")
130 lines = f.readlines()
131 num_of_lines = len(lines)
133 # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit
138 if found_header == False:
140 print "Unable to find end of Contents-%s.gz header!" % ( arch[1])
144 if file_regex.match(line):
148 # The format is simple enough, *filename*, *section/package1,section/package2,etc*
149 # Each file appears once per Contents file, so first, use some regex match
150 # to split the two bits
152 # Print out progress bar
153 print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, ((float(lines_processed)/num_of_lines)*100)),
155 # regex lifted from packages.d.o code
156 matchs = line_regex.findall(line)
157 filename = matchs[0][0]
158 packages = matchs[0][1].split(',')
160 # Iterate through each file's packages
161 for package in packages:
162 matchs = pkg_regex.findall(package)
164 # Needed since the DB is unicode, and these files
166 section_name = matchs[0][0]
167 package_name = matchs[0][1]
169 section_id = database.get_section_id(section_name)
170 package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id)
172 if package_id == None:
173 # Likely got an arch all package
174 package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_all_id)
176 insert_content_path(package_id, filename)
182 print "Committing to database ..."
183 projectB.query("COMMIT")
185 ################################################################################
188 global Cnf, projectB, out
191 Cnf = utils.get_conf()
193 Arguments = [('h',"help","Import-Contents::Options::Help"),
194 ('s',"suite","Import-Contents::Options::Suite","HasArg"),
197 for i in [ "help", "suite" ]:
198 if not Cnf.has_key("Import-Contents::Options::%s" % (i)):
199 Cnf["Import-Contents::Options::%s" % (i)] = ""
201 suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
202 Options = Cnf.SubTree("Import-Contents::Options")
208 suites = utils.split_args(Options["Suite"])
210 suites = Cnf.SubTree("Suite").List()
212 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
213 database.init(Cnf, projectB)
215 import_contents(suites)
217 #######################################################################################
219 if __name__ == '__main__':