2 # Import contents files
4 # Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 ################################################################################
21 ################################################################################
23 ################################################################################
25 import sys, os, popen2, tempfile, stat, time, pg
26 import re, gzip, apt_pkg
27 from daklib import database, utils
28 from daklib.dak_exceptions import *
30 ################################################################################
36 has_opened_temp_file_lists = False
37 content_path_file = ""
38 content_name_file = ""
39 content_file_cache = {}
40 content_name_cache = {}
41 content_path_cache = {}
43 ################################################################################
45 def usage (exit_code=0):
46 print """Usage: dak import-contents
49 -h, --help show this help and exit
50 -s, --suite=SUITE only write file lists for this suite
54 ################################################################################
56 def cache_content_path(fullpath):
57 global content_file_cache, contents_name_cache, content_path_cache
58 global content_path_file, content_name_file, has_opened_temp_file_lists
60 # have we seen this contents before?
61 if content_file_cache.has_key(fullpath):
64 # split the path into basename, and pathname
65 (path, file) = os.path.split(fullpath)
67 # Due to performance reasons, we need to get the entire filelists table
68 # sorted first before we can do assiocation tables.
69 if has_opened_temp_file_lists == False:
70 content_path_file = open("/tmp/content_file_path.tmp", "w")
71 content_name_file = open("/tmp/content_name_path.tmp", "w")
72 has_opened_temp_file_lists = True
74 if not content_path_cache.has_key(path):
75 content_path_file.write("DEFAULT %s\n" % (path))
76 content_path_cache[path] = 1
78 if not content_name_cache.has_key(file):
79 content_name_file.write("DEFAULT %s\n" % (file))
80 content_name_cache[file] = 1
83 ################################################################################
85 def import_contents(suites):
89 projectB.query("BEGIN WORK")
91 # Needed to make sure postgreSQL doesn't freak out on some of the data
92 projectB.query("SET CLIENT_ENCODING TO 'LATIN1'")
95 #print "Precaching binary information, this will take a few moments ..."
96 #database.preload_binary_id_cache()
99 line_regex = re.compile(r'^(.+?)\s+(\S+)$')
100 pkg_regex = re.compile(r'(\S+)/(\S+)$')
101 file_regex = re.compile('^FILE')
103 # Get our suites, and the architectures
105 suite_id = database.get_suite_id(s)
107 q = projectB.query("SELECT s.architecture, a.arch_string FROM suite_architectures s JOIN architecture a ON (s.architecture=a.id) WHERE suite = '%d'" % suite_id)
110 for r in q.getresult():
111 if r[1] != "source" and r[1] != "all":
112 arch_list.append((r[0], r[1]))
114 arch_all_id = database.get_architecture_id("all")
116 for arch in arch_list:
117 print "Processing %s/%s" % (s, arch[1])
118 arch_id = database.get_architecture_id(arch[1])
121 f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch[1]), "r")
124 print "Unable to open dists/%s/Contents-%s.gz" % (s, arch[1])
129 lines = f.readlines()
130 num_of_lines = len(lines)
132 # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit
137 if found_header == False:
139 print "Unable to find end of Contents-%s.gz header!" % ( arch[1])
143 if file_regex.match(line):
147 # The format is simple enough, *filename*, *section/package1,section/package2,etc*
148 # Each file appears once per Contents file, so first, use some regex match
149 # to split the two bits
151 # Print out progress bar
152 print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, ((float(lines_processed)/num_of_lines)*100)),
154 # regex lifted from packages.d.o code
155 matchs = line_regex.findall(line)
156 filename = matchs[0][0]
157 packages = matchs[0][1].split(',')
160 cache_content_path(filename)
162 # Iterate through each file's packages
163 #for package in packages:
164 # matchs = pkg_regex.findall(package)
166 # Needed since the DB is unicode, and these files
168 # section_name = matchs[0][0]
169 # package_name = matchs[0][1]
171 #section_id = database.get_section_id(section_name)
172 #package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id)
174 # if package_id == None:
175 # This can happen if the Contents file refers to a non-existant package
176 # it seems Contents sometimes can be stale due to use of caches (i.e., hurd-i386)
177 # hurd-i386 was removed from the archive, but its Contents file still exists
178 # and is seemingly still updated. The sane thing to do is skip it and continue
184 print "" # newline since the Progress bar doesn't print one
189 content_name_file.close()
190 content_path_file.close()
192 print "Committing to database ..."
193 projectB.query("COMMIT")
195 ################################################################################
198 global Cnf, projectB, out
201 Cnf = utils.get_conf()
203 Arguments = [('h',"help","Import-Contents::Options::Help"),
204 ('s',"suite","Import-Contents::Options::Suite","HasArg"),
207 for i in [ "help", "suite" ]:
208 if not Cnf.has_key("Import-Contents::Options::%s" % (i)):
209 Cnf["Import-Contents::Options::%s" % (i)] = ""
211 suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
212 Options = Cnf.SubTree("Import-Contents::Options")
218 suites = utils.split_args(Options["Suite"])
220 suites = Cnf.SubTree("Suite").List()
222 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
223 database.init(Cnf, projectB)
225 import_contents(suites)
227 #######################################################################################
229 if __name__ == '__main__':