1 #!/usr/bin/env python2.4
2 # Import contents files
4 # Copyright (C) 2008, 2009 Michael Casadevall <mcasadevall@debian.org>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 ################################################################################
21 ################################################################################
23 ################################################################################
25 import sys, os, popen2, tempfile, stat, time, pg
26 import re, gzip, apt_pkg
27 from daklib import database, utils
28 from daklib.dak_exceptions import *
30 ################################################################################
36 has_opened_temp_file_lists = False
37 content_path_file = ""
38 content_name_file = ""
39 content_file_cache = set([])
40 content_name_cache = set([])
41 content_path_cache = set([])
43 ################################################################################
45 def usage (exit_code=0):
46 print """Usage: dak import-contents
49 -h, --help show this help and exit
50 -s, --suite=SUITE only write file lists for this suite
54 ################################################################################
56 def cache_content_path(fullpath):
57 global content_file_cache, contents_name_cache, content_path_cache
59 # have we seen this contents before?
60 if fullpath in content_file_cache:
63 # Add the new key to the cache
64 content_file_cache.add(fullpath)
66 # split the path into basename, and pathname
67 (path, file) = os.path.split(fullpath)
69 # Due to performance reasons, we need to get the entire filelists table
70 # sorted first before we can do assiocation tables.
71 if path not in content_path_cache:
72 content_path_cache.add(path)
74 if file not in content_name_cache:
75 content_name_cache.add(file)
79 ################################################################################
81 def import_contents(suites):
85 projectB.query("BEGIN WORK")
87 # Needed to make sure postgreSQL doesn't freak out on some of the data
88 projectB.query("SET CLIENT_ENCODING TO 'LATIN1'")
91 #print "Precaching binary information, this will take a few moments ..."
92 #database.preload_binary_id_cache()
95 line_regex = re.compile(r'^(.+?)\s+(\S+)$')
96 pkg_regex = re.compile(r'(\S+)/(\S+)$')
97 file_regex = re.compile('^FILE')
99 # Get our suites, and the architectures
101 suite_id = database.get_suite_id(s)
104 for r in Cnf.ValueList("Suite::%s::Architectures" % (s)):
105 if r != "source" and r != "all":
108 arch_all_id = database.get_architecture_id("all")
110 for arch in arch_list:
111 print "Processing %s/%s" % (s, arch)
112 arch_id = database.get_architecture_id(arch)
115 f = gzip.open(Cnf["Dir::Root"] + "dists/%s/Contents-%s.gz" % (s, arch), "r")
118 print "Unable to open dists/%s/Contents-%s.gz" % (s, arch)
123 lines = f.readlines()
124 num_of_lines = len(lines)
126 # Ok, the file cursor is at the first entry, now comes the fun 'lets parse' bit
131 if found_header == False:
133 print "Unable to find end of Contents-%s.gz header!" % (arch)
137 if file_regex.match(line):
141 # The format is simple enough, *filename*, *section/package1,section/package2,etc*
142 # Each file appears once per Contents file, so first, use some regex match
143 # to split the two bits
145 # Print out progress bar
146 print "\rProcessed %d lines of %d (%%%.2f)" % (lines_processed, num_of_lines, ((float(lines_processed)/num_of_lines)*100)),
148 # regex lifted from packages.d.o code
149 matchs = line_regex.findall(line)
150 filename = matchs[0][0]
151 packages = matchs[0][1].split(',')
154 cache_content_path(filename)
156 # Iterate through each file's packages
157 #for package in packages:
158 # matchs = pkg_regex.findall(package)
160 # Needed since the DB is unicode, and these files
162 # section_name = matchs[0][0]
163 # package_name = matchs[0][1]
165 #section_id = database.get_section_id(section_name)
166 #package_id = database.get_latest_binary_version_id(package_name, section_id, suite_id, arch_id)
168 # if package_id == None:
169 # This can happen if the Contents file refers to a non-existant package
170 # it seems Contents sometimes can be stale due to use of caches (i.e., hurd-i386)
171 # hurd-i386 was removed from the archive, but its Contents file still exists
172 # and is seemingly still updated. The sane thing to do is skip it and continue
178 print "" # newline since the Progress bar doesn't print one
183 print "Committing to database ..."
184 projectB.query("COPY content_file_names (file) FROM STDIN")
186 for line in content_name_cache:
187 projectB.putline("%s\n" % (line))
191 projectB.query("COMMIT")
193 ################################################################################
196 global Cnf, projectB, out
199 Cnf = utils.get_conf()
201 Arguments = [('h',"help","Import-Contents::Options::Help"),
202 ('s',"suite","Import-Contents::Options::Suite","HasArg"),
205 for i in [ "help", "suite" ]:
206 if not Cnf.has_key("Import-Contents::Options::%s" % (i)):
207 Cnf["Import-Contents::Options::%s" % (i)] = ""
209 suites = apt_pkg.ParseCommandLine(Cnf,Arguments,sys.argv)
210 Options = Cnf.SubTree("Import-Contents::Options")
216 suites = utils.split_args(Options["Suite"])
218 suites = Cnf.SubTree("Suite").List()
220 projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
221 database.init(Cnf, projectB)
223 import_contents(suites)
225 #######################################################################################
227 if __name__ == '__main__':