3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
31 from multiprocessing import Pool
33 from sqlalchemy import desc, or_
34 from subprocess import Popen, PIPE, call
38 class ContentsWriter(object):
40 ContentsWriter writes the Contents-$arch.gz files.
42 def __init__(self, suite, architecture, overridetype, component = None):
44 The constructor clones its arguments into a new session object to make
45 sure that the new ContentsWriter object can be executed in a different
49 self.architecture = architecture
50 self.overridetype = overridetype
51 self.component = component
52 self.session = suite.session()
56 Returns a query object that is doing most of the work.
58 overridesuite = self.suite
59 if self.suite.overridesuite is not None:
60 overridesuite = self.suite.overridesuite
62 'suite': self.suite.suite_id,
63 'overridesuite': overridesuite.suite_id
64 'arch_all': get_architecture('all', self.session).arch_id,
65 'arch': self.architecture.arch_id,
66 'type_id': self.overridetype.overridetype_id,
67 'type': self.overridetype.overridetype,
70 if self.component is not None:
71 params['component'] = self.component.component_id
73 create temp table newest_binaries (
74 id integer primary key,
77 create index newest_binaries_by_package on newest_binaries (package);
79 insert into newest_binaries (id, package)
80 select distinct on (package) id, package from binaries
81 where type = :type and
82 (architecture = :arch_all or architecture = :arch) and
83 id in (select bin from bin_associations where suite = :suite)
84 order by package, version desc;
89 (select o.package, s.section
90 from override o, section s
91 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
92 o.component = :component)
94 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
95 from newest_binaries b, bin_contents bc, unique_override o
96 where b.id = bc.binary_id and o.package = b.package
97 order by bc.file, b.package'''
101 create temp table newest_binaries (
102 id integer primary key,
105 create index newest_binaries_by_package on newest_binaries (package);
107 insert into newest_binaries (id, package)
108 select distinct on (package) id, package from binaries
109 where type = :type and
110 (architecture = :arch_all or architecture = :arch) and
111 id in (select bin from bin_associations where suite = :suite)
112 order by package, version desc;
117 (select distinct on (o.package, s.section) o.package, s.section
118 from override o, section s
119 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
120 order by o.package, s.section, o.modified desc)
122 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
123 from newest_binaries b, bin_contents bc, unique_override o
124 where b.id = bc.binary_id and o.package = b.package
125 order by bc.file, b.package'''
127 return self.session.query("file", "package").from_statement(sql). \
130 def formatline(self, filename, package_list):
132 Returns a formatted string for the filename argument.
134 package_list = ','.join(package_list)
135 return "%-55s %s\n" % (filename, package_list)
139 Yields a new line of the Contents-$arch.gz file in filename order.
143 for filename, package in self.query().yield_per(100):
144 if filename != last_filename:
145 if last_filename is not None:
146 yield self.formatline(last_filename, package_list)
147 last_filename = filename
149 package_list.append(package)
150 if last_filename is not None:
151 yield self.formatline(last_filename, package_list)
152 # end transaction to return connection to pool
153 self.session.rollback()
157 Returns a list of lines for the Contents-$arch.gz file.
159 return [item for item in self.fetch()]
161 def output_filename(self):
163 Returns the name of the output file.
166 'root': Config()['Dir::Root'],
167 'suite': self.suite.suite_name,
168 'architecture': self.architecture.arch_string
170 if self.component is None:
171 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
172 values['component'] = self.component.component_name
173 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
175 def get_header(self):
177 Returns the header for the Contents files as a string.
181 filename = os.path.join(Config()['Dir::Templates'], 'contents')
182 header_file = open(filename)
183 return header_file.read()
188 def write_file(self):
190 Write the output file.
192 command = ['gzip', '--rsyncable']
193 output_file = open(self.output_filename(), 'w')
194 gzip = Popen(command, stdin = PIPE, stdout = output_file)
195 gzip.stdin.write(self.get_header())
196 for item in self.fetch():
197 gzip.stdin.write(item)
203 def write_all(class_, suite_names = [], force = False):
205 Writes all Contents files for suites in list suite_names which defaults
206 to all 'touchable' suites if not specified explicitely. Untouchable
207 suites will be included if the force argument is set to True.
209 session = DBConn().session()
210 suite_query = session.query(Suite)
211 if len(suite_names) > 0:
212 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
214 suite_query = suite_query.filter_by(untouchable = False)
216 for suite in suite_query:
217 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
218 # handle 'deb' packages
219 command = ['dak', 'contents', '-s', suite.suite_name, \
220 'generate_helper', architecture.arch_string, 'deb']
221 pool.apply_async(call, (command, ))
222 # handle 'udeb' packages for 'main' and 'non-free'
223 command = ['dak', 'contents', '-s', suite.suite_name, \
224 'generate_helper', architecture.arch_string, 'udeb', 'main']
225 pool.apply_async(call, (command, ))
226 command = ['dak', 'contents', '-s', suite.suite_name, \
227 'generate_helper', architecture.arch_string, 'udeb', 'non-free']
228 pool.apply_async(call, (command, ))
234 class ContentsScanner(object):
236 ContentsScanner provides a threadsafe method scan() to scan the contents of
239 def __init__(self, binary):
241 The argument binary is the actual DBBinary object that should be
244 self.binary_id = binary.binary_id
246 def scan(self, dummy_arg = None):
248 This method does the actual scan and fills in the associated BinContents
249 property. It commits any changes to the database. The argument dummy_arg
250 is ignored but needed by our threadpool implementation.
252 session = DBConn().session()
253 binary = session.query(DBBinary).get(self.binary_id)
254 for filename in binary.scan_contents():
255 binary.contents.append(BinContents(file = filename))
260 def scan_all(class_, limit = None):
262 The class method scan_all() scans all binaries using multiple threads.
263 The number of binaries to be scanned can be limited with the limit
264 argument. Returns the number of processed and remaining packages as a
267 session = DBConn().session()
268 query = session.query(DBBinary).filter(DBBinary.contents == None)
269 remaining = query.count
270 if limit is not None:
271 query = query.limit(limit)
272 processed = query.count()
273 threadpool = ThreadPool()
274 for binary in query.yield_per(100):
275 threadpool.queueTask(ContentsScanner(binary).scan)
277 remaining = remaining()
279 return { 'processed': processed, 'remaining': remaining }