3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
31 from multiprocessing import Pool
33 from sqlalchemy import desc, or_
34 from sqlalchemy.exc import IntegrityError
35 from subprocess import Popen, PIPE, call
39 class ContentsWriter(object):
41 ContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component = None):
45 The constructor clones its arguments into a new session object to make
46 sure that the new ContentsWriter object can be executed in a different
50 self.architecture = architecture
51 self.overridetype = overridetype
52 self.component = component
53 self.session = suite.session()
57 Returns a query object that is doing most of the work.
59 overridesuite = self.suite
60 if self.suite.overridesuite is not None:
61 overridesuite = get_suite(self.suite.overridesuite, self.session)
63 'suite': self.suite.suite_id,
64 'overridesuite': overridesuite.suite_id,
65 'arch_all': get_architecture('all', self.session).arch_id,
66 'arch': self.architecture.arch_id,
67 'type_id': self.overridetype.overridetype_id,
68 'type': self.overridetype.overridetype,
71 if self.component is not None:
72 params['component'] = self.component.component_id
74 create temp table newest_binaries (
75 id integer primary key,
78 create index newest_binaries_by_package on newest_binaries (package);
80 insert into newest_binaries (id, package)
81 select distinct on (package) id, package from binaries
82 where type = :type and
83 (architecture = :arch_all or architecture = :arch) and
84 id in (select bin from bin_associations where suite = :suite)
85 order by package, version desc;
90 (select o.package, s.section
91 from override o, section s
92 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
93 o.component = :component)
95 select bc.file, o.section || '/' || b.package as package
96 from newest_binaries b, bin_contents bc, unique_override o
97 where b.id = bc.binary_id and o.package = b.package
98 order by bc.file, b.package'''
102 create temp table newest_binaries (
103 id integer primary key,
106 create index newest_binaries_by_package on newest_binaries (package);
108 insert into newest_binaries (id, package)
109 select distinct on (package) id, package from binaries
110 where type = :type and
111 (architecture = :arch_all or architecture = :arch) and
112 id in (select bin from bin_associations where suite = :suite)
113 order by package, version desc;
118 (select distinct on (o.package, s.section) o.package, s.section
119 from override o, section s
120 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
121 order by o.package, s.section, o.modified desc)
123 select bc.file, o.section || '/' || b.package as package
124 from newest_binaries b, bin_contents bc, unique_override o
125 where b.id = bc.binary_id and o.package = b.package
126 order by bc.file, b.package'''
128 return self.session.query("file", "package").from_statement(sql). \
131 def formatline(self, filename, package_list):
133 Returns a formatted string for the filename argument.
135 package_list = ','.join(package_list)
136 return "%-55s %s\n" % (filename, package_list)
140 Yields a new line of the Contents-$arch.gz file in filename order.
144 for filename, package in self.query().yield_per(100):
145 if filename != last_filename:
146 if last_filename is not None:
147 yield self.formatline(last_filename, package_list)
148 last_filename = filename
150 package_list.append(package)
151 if last_filename is not None:
152 yield self.formatline(last_filename, package_list)
153 # end transaction to return connection to pool
154 self.session.rollback()
158 Returns a list of lines for the Contents-$arch.gz file.
160 return [item for item in self.fetch()]
162 def output_filename(self):
164 Returns the name of the output file.
167 'root': Config()['Dir::Root'],
168 'suite': self.suite.suite_name,
169 'architecture': self.architecture.arch_string
171 if self.component is None:
172 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
173 values['component'] = self.component.component_name
174 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
176 def get_header(self):
178 Returns the header for the Contents files as a string.
182 filename = os.path.join(Config()['Dir::Templates'], 'contents')
183 header_file = open(filename)
184 return header_file.read()
189 def write_file(self):
191 Write the output file.
193 command = ['gzip', '--rsyncable']
194 final_filename = self.output_filename()
195 temp_filename = final_filename + '.new'
196 output_file = open(temp_filename, 'w')
197 gzip = Popen(command, stdin = PIPE, stdout = output_file)
198 gzip.stdin.write(self.get_header())
199 for item in self.fetch():
200 gzip.stdin.write(item)
204 os.remove(final_filename)
205 os.rename(temp_filename, final_filename)
206 os.chmod(final_filename, 0664)
209 def write_all(class_, suite_names = [], force = False):
211 Writes all Contents files for suites in list suite_names which defaults
212 to all 'touchable' suites if not specified explicitely. Untouchable
213 suites will be included if the force argument is set to True.
215 session = DBConn().session()
216 suite_query = session.query(Suite)
217 if len(suite_names) > 0:
218 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
220 suite_query = suite_query.filter_by(untouchable = False)
222 for suite in suite_query:
223 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
224 # handle 'deb' packages
225 command = ['dak', 'contents', '-s', suite.suite_name, \
226 'generate_helper', architecture.arch_string, 'deb']
227 pool.apply_async(call, (command, ))
228 # handle 'udeb' packages for 'main' and 'non-free'
229 command = ['dak', 'contents', '-s', suite.suite_name, \
230 'generate_helper', architecture.arch_string, 'udeb', 'main']
231 pool.apply_async(call, (command, ))
232 command = ['dak', 'contents', '-s', suite.suite_name, \
233 'generate_helper', architecture.arch_string, 'udeb', 'non-free']
234 pool.apply_async(call, (command, ))
240 class ContentsScanner(object):
242 ContentsScanner provides a threadsafe method scan() to scan the contents of
245 def __init__(self, binary):
247 The argument binary is the actual DBBinary object that should be
250 self.binary_id = binary.binary_id
252 def scan(self, dummy_arg = None):
254 This method does the actual scan and fills in the associated BinContents
255 property. It commits any changes to the database. The argument dummy_arg
256 is ignored but needed by our threadpool implementation.
258 session = DBConn().session()
259 binary = session.query(DBBinary).get(self.binary_id)
260 fileset = set(binary.scan_contents())
261 if len(fileset) == 0:
262 fileset.add('EMPTY_PACKAGE')
263 for filename in fileset:
264 binary.contents.append(BinContents(file = filename))
269 def scan_all(class_, limit = None):
271 The class method scan_all() scans all binaries using multiple threads.
272 The number of binaries to be scanned can be limited with the limit
273 argument. Returns the number of processed and remaining packages as a
276 session = DBConn().session()
277 query = session.query(DBBinary).filter(DBBinary.contents == None)
278 remaining = query.count
279 if limit is not None:
280 query = query.limit(limit)
281 processed = query.count()
282 threadpool = ThreadPool()
283 for binary in query.yield_per(100):
284 threadpool.queueTask(ContentsScanner(binary).scan)
286 remaining = remaining()
288 return { 'processed': processed, 'remaining': remaining }