3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
31 from multiprocessing import Pool
33 from sqlalchemy import desc, or_
34 from subprocess import Popen, PIPE
38 class ContentsWriter(object):
40 ContentsWriter writes the Contents-$arch.gz files.
42 def __init__(self, suite, architecture, overridetype, component = None):
44 The constructor clones its arguments into a new session object to make
45 sure that the new ContentsWriter object can be executed in a different
48 self.suite = suite.clone()
49 self.session = self.suite.session()
50 self.architecture = architecture.clone(self.session)
51 self.overridetype = overridetype.clone(self.session)
52 if component is not None:
53 self.component = component.clone(self.session)
59 Returns a query object that is doing most of the work.
62 'suite': self.suite.suite_id,
63 'arch_all': get_architecture('all', self.session).arch_id,
64 'arch': self.architecture.arch_id,
65 'type_id': self.overridetype.overridetype_id,
66 'type': self.overridetype.overridetype,
69 if self.component is not None:
70 params['component'] = self.component.component_id
72 create temp table newest_binaries (
73 id integer primary key,
76 create index newest_binaries_by_package on newest_binaries (package);
78 insert into newest_binaries (id, package)
79 select distinct on (package) id, package from binaries
80 where type = :type and
81 (architecture = :arch_all or architecture = :arch) and
82 id in (select bin from bin_associations where suite = :suite)
83 order by package, version desc;
88 (select o.package, s.section
89 from override o, section s
90 where o.suite = :suite and o.type = :type_id and o.section = s.id and
91 o.component = :component)
93 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
94 from newest_binaries b, bin_contents bc, unique_override o
95 where b.id = bc.binary_id and o.package = b.package
96 order by bc.file, b.package'''
100 create temp table newest_binaries (
101 id integer primary key,
104 create index newest_binaries_by_package on newest_binaries (package);
106 insert into newest_binaries (id, package)
107 select distinct on (package) id, package from binaries
108 where type = :type and
109 (architecture = :arch_all or architecture = :arch) and
110 id in (select bin from bin_associations where suite = :suite)
111 order by package, version desc;
116 (select distinct on (o.package, s.section) o.package, s.section
117 from override o, section s
118 where o.suite = :suite and o.type = :type_id and o.section = s.id
119 order by o.package, s.section, o.modified desc)
121 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
122 from newest_binaries b, bin_contents bc, unique_override o
123 where b.id = bc.binary_id and o.package = b.package
124 order by bc.file, b.package'''
126 return self.session.query("file", "package").from_statement(sql). \
129 def formatline(self, filename, package_list):
131 Returns a formatted string for the filename argument.
133 package_list = ','.join(package_list)
134 return "%-55s %s\n" % (filename, package_list)
138 Yields a new line of the Contents-$arch.gz file in filename order.
142 for filename, package in self.query().yield_per(100):
143 if filename != last_filename:
144 if last_filename is not None:
145 yield self.formatline(last_filename, package_list)
146 last_filename = filename
148 package_list.append(package)
149 if last_filename is not None:
150 yield self.formatline(last_filename, package_list)
151 # end transaction to return connection to pool
152 self.session.rollback()
156 Returns a list of lines for the Contents-$arch.gz file.
158 return [item for item in self.fetch()]
160 def output_filename(self):
162 Returns the name of the output file.
165 'root': Config()['Dir::Root'],
166 'suite': self.suite.suite_name,
167 'architecture': self.architecture.arch_string
169 if self.component is None:
170 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
171 values['component'] = self.component.component_name
172 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
174 def get_header(self):
176 Returns the header for the Contents files as a string.
180 filename = os.path.join(Config()['Dir::Templates'], 'contents')
181 header_file = open(filename)
182 return header_file.read()
187 def write_file(self):
189 Write the output file.
191 command = ['gzip', '--rsyncable']
192 output_file = open(self.output_filename(), 'w')
193 gzip = Popen(command, stdin = PIPE, stdout = output_file)
194 gzip.stdin.write(self.get_header())
195 for item in self.fetch():
196 gzip.stdin.write(item)
202 def write_all(class_, suite_names = [], force = False):
204 Writes all Contents files for suites in list suite_names which defaults
205 to all 'touchable' suites if not specified explicitely. Untouchable
206 suites will be included if the force argument is set to True.
208 session = DBConn().session()
209 suite_query = session.query(Suite)
210 if len(suite_names) > 0:
211 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
213 suite_query = suite_query.filter_by(untouchable = False)
214 main = get_component('main', session)
215 non_free = get_component('non-free', session)
216 deb = get_override_type('deb', session)
217 udeb = get_override_type('udeb', session)
219 for suite in suite_query:
220 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
221 # handle 'deb' packages
222 writer = ContentsWriter(suite, architecture, deb)
223 pool.apply(writer.write_file)
224 # handle 'udeb' packages for 'main' and 'non-free'
225 writer = ContentsWriter(suite, architecture, udeb, component = main)
226 pool.apply(writer.write_file)
227 writer = ContentsWriter(suite, architecture, udeb, component = non_free)
228 pool.apply(writer.write_file)
234 class ContentsScanner(object):
236 ContentsScanner provides a threadsafe method scan() to scan the contents of
239 def __init__(self, binary):
241 The argument binary is the actual DBBinary object that should be
244 self.binary_id = binary.binary_id
246 def scan(self, dummy_arg = None):
248 This method does the actual scan and fills in the associated BinContents
249 property. It commits any changes to the database. The argument dummy_arg
250 is ignored but needed by our threadpool implementation.
252 session = DBConn().session()
253 binary = session.query(DBBinary).get(self.binary_id)
254 for filename in binary.scan_contents():
255 binary.contents.append(BinContents(file = filename))
260 def scan_all(class_, limit = None):
262 The class method scan_all() scans all binaries using multiple threads.
263 The number of binaries to be scanned can be limited with the limit
264 argument. Returns the number of processed and remaining packages as a
267 session = DBConn().session()
268 query = session.query(DBBinary).filter(DBBinary.contents == None)
269 remaining = query.count
270 if limit is not None:
271 query = query.limit(limit)
272 processed = query.count()
273 threadpool = ThreadPool()
274 for binary in query.yield_per(100):
275 threadpool.queueTask(ContentsScanner(binary).scan)
277 remaining = remaining()
279 return { 'processed': processed, 'remaining': remaining }