3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
32 from sqlalchemy import desc, or_
33 from subprocess import Popen, PIPE
37 class ContentsWriter(object):
39 ContentsWriter writes the Contents-$arch.gz files.
41 def __init__(self, suite, architecture, overridetype, component = None):
43 The constructor clones its arguments into a new session object to make
44 sure that the new ContentsWriter object can be executed in a different
47 self.suite = suite.clone()
48 self.session = self.suite.session()
49 self.architecture = architecture.clone(self.session)
50 self.overridetype = overridetype.clone(self.session)
51 if component is not None:
52 self.component = component.clone(self.session)
58 Returns a query object that is doing most of the work.
61 'suite': self.suite.suite_id,
62 'arch_all': get_architecture('all', self.session).arch_id,
63 'arch': self.architecture.arch_id,
64 'type_id': self.overridetype.overridetype_id,
65 'type': self.overridetype.overridetype,
68 if self.component is not None:
69 params['component'] = self.component.component_id
71 create temp table newest_binaries (
72 id integer primary key,
75 create index newest_binaries_by_package on newest_binaries (package);
77 insert into newest_binaries (id, package)
78 select distinct on (package) id, package from binaries
79 where type = :type and
80 (architecture = :arch_all or architecture = :arch) and
81 id in (select bin from bin_associations where suite = :suite)
82 order by package, version desc;
87 (select o.package, s.section
88 from override o, section s
89 where o.suite = :suite and o.type = :type_id and o.section = s.id and
90 o.component = :component)
92 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
93 from newest_binaries b, bin_contents bc, unique_override o
94 where b.id = bc.binary_id and o.package = b.package
95 order by bc.file, b.package'''
99 create temp table newest_binaries (
100 id integer primary key,
103 create index newest_binaries_by_package on newest_binaries (package);
105 insert into newest_binaries (id, package)
106 select distinct on (package) id, package from binaries
107 where type = :type and
108 (architecture = :arch_all or architecture = :arch) and
109 id in (select bin from bin_associations where suite = :suite)
110 order by package, version desc;
115 (select distinct on (o.package, s.section) o.package, s.section
116 from override o, section s
117 where o.suite = :suite and o.type = :type_id and o.section = s.id
118 order by o.package, s.section, o.modified desc)
120 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
121 from newest_binaries b, bin_contents bc, unique_override o
122 where b.id = bc.binary_id and o.package = b.package
123 order by bc.file, b.package'''
125 return self.session.query("file", "package").from_statement(sql). \
128 def formatline(self, filename, package_list):
130 Returns a formatted string for the filename argument.
132 package_list = ','.join(package_list)
133 return "%-55s %s\n" % (filename, package_list)
137 Yields a new line of the Contents-$arch.gz file in filename order.
141 for filename, package in self.query().yield_per(100):
142 if filename != last_filename:
143 if last_filename is not None:
144 yield self.formatline(last_filename, package_list)
145 last_filename = filename
147 package_list.append(package)
148 if last_filename is not None:
149 yield self.formatline(last_filename, package_list)
150 # end transaction to return connection to pool
151 self.session.rollback()
155 Returns a list of lines for the Contents-$arch.gz file.
157 return [item for item in self.fetch()]
159 def output_filename(self):
161 Returns the name of the output file.
164 'root': Config()['Dir::Root'],
165 'suite': self.suite.suite_name,
166 'architecture': self.architecture.arch_string
168 if self.component is None:
169 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
170 values['component'] = self.component.component_name
171 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
173 def get_header(self):
175 Returns the header for the Contents files as a string.
179 filename = os.path.join(Config()['Dir::Templates'], 'contents')
180 header_file = open(filename)
181 return header_file.read()
186 def write_file(self, dummy_arg = None):
188 Write the output file. The argument dummy_arg is ignored but needed by
189 our threadpool implementation.
191 command = ['gzip', '--rsyncable']
192 output_file = open(self.output_filename(), 'w')
193 pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
194 pipe.write(self.get_header())
195 for item in self.fetch():
201 def write_all(class_, suite_names = [], force = False):
203 Writes all Contents files for suites in list suite_names which defaults
204 to all 'touchable' suites if not specified explicitely. Untouchable
205 suites will be included if the force argument is set to True.
207 session = DBConn().session()
208 suite_query = session.query(Suite)
209 if len(suite_names) > 0:
210 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
212 suite_query = suite_query.filter_by(untouchable = False)
213 main = get_component('main', session)
214 non_free = get_component('non-free', session)
215 deb = get_override_type('deb', session)
216 udeb = get_override_type('udeb', session)
217 threadpool = ThreadPool()
218 for suite in suite_query:
219 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
220 # handle 'deb' packages
221 writer = ContentsWriter(suite, architecture, deb)
222 threadpool.queueTask(writer.write_file)
223 # handle 'udeb' packages for 'main' and 'non-free'
224 writer = ContentsWriter(suite, architecture, udeb, component = main)
225 threadpool.queueTask(writer.write_file)
226 writer = ContentsWriter(suite, architecture, udeb, component = non_free)
227 threadpool.queueTask(writer.write_file)
232 class ContentsScanner(object):
234 ContentsScanner provides a threadsafe method scan() to scan the contents of
237 def __init__(self, binary):
239 The argument binary is the actual DBBinary object that should be
242 self.binary_id = binary.binary_id
244 def scan(self, dummy_arg = None):
246 This method does the actual scan and fills in the associated BinContents
247 property. It commits any changes to the database. The argument dummy_arg
248 is ignored but needed by our threadpool implementation.
250 session = DBConn().session()
251 binary = session.query(DBBinary).get(self.binary_id)
252 for filename in binary.scan_contents():
253 binary.contents.append(BinContents(file = filename))
258 def scan_all(class_, limit = None):
260 The class method scan_all() scans all binaries using multiple threads.
261 The number of binaries to be scanned can be limited with the limit
262 argument. Returns the number of processed and remaining packages as a
265 session = DBConn().session()
266 query = session.query(DBBinary).filter(DBBinary.contents == None)
267 remaining = query.count
268 if limit is not None:
269 query = query.limit(limit)
270 processed = query.count()
271 threadpool = ThreadPool()
272 for binary in query.yield_per(100):
273 threadpool.queueTask(ContentsScanner(binary).scan)
275 remaining = remaining()
277 return { 'processed': processed, 'remaining': remaining }