3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
32 from sqlalchemy import desc, or_
33 from subprocess import Popen, PIPE
35 class ContentsWriter(object):
37 ContentsWriter writes the Contents-$arch.gz files.
39 def __init__(self, suite, architecture, overridetype, component = None):
41 The constructor clones its arguments into a new session object to make
42 sure that the new ContentsWriter object can be executed in a different
45 self.suite = suite.clone()
46 self.session = self.suite.session()
47 self.architecture = architecture.clone(self.session)
48 self.overridetype = overridetype.clone(self.session)
49 if component is not None:
50 self.component = component.clone(self.session)
56 Returns a query object that is doing most of the work.
59 'suite': self.suite.suite_id,
60 'arch_all': get_architecture('all', self.session).arch_id,
61 'arch': self.architecture.arch_id,
62 'type_id': self.overridetype.overridetype_id,
63 'type': self.overridetype.overridetype,
66 if self.component is not None:
67 params['component'] = component.component_id
69 create temp table newest_binaries (
70 id integer primary key,
73 create index newest_binaries_by_package on newest_binaries (package);
75 insert into newest_binaries (id, package)
76 select distinct on (package) id, package from binaries
77 where type = :type and
78 (architecture = :arch_all or architecture = :arch) and
79 id in (select bin from bin_associations where suite = :suite)
80 order by package, version desc;
85 (select o.package, s.section
86 from override o, section s
87 where o.suite = :suite and o.type = :type_id and o.section = s.id and
88 o.component = :component)
90 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
91 from newest_binaries b, bin_contents bc, unique_override o
92 where b.id = bc.binary_id and o.package = b.package
93 order by bc.file, b.package'''
97 create temp table newest_binaries (
98 id integer primary key,
101 create index newest_binaries_by_package on newest_binaries (package);
103 insert into newest_binaries (id, package)
104 select distinct on (package) id, package from binaries
105 where type = :type and
106 (architecture = :arch_all or architecture = :arch) and
107 id in (select bin from bin_associations where suite = :suite)
108 order by package, version desc;
113 (select distinct on (o.package, s.section) o.package, s.section
114 from override o, section s
115 where o.suite = :suite and o.type = :type_id and o.section = s.id
116 order by o.package, s.section, o.modified desc)
118 select bc.file, substring(o.section from position('/' in o.section) + 1) || '/' || b.package as package
119 from newest_binaries b, bin_contents bc, unique_override o
120 where b.id = bc.binary_id and o.package = b.package
121 order by bc.file, b.package'''
123 return self.session.query("file", "package").from_statement(sql). \
126 def formatline(self, filename, package_list):
128 Returns a formatted string for the filename argument.
130 package_list = ','.join(package_list)
131 return "%-60s%s\n" % (filename, package_list)
135 Yields a new line of the Contents-$arch.gz file in filename order.
139 for filename, package in self.query().yield_per(100):
140 if filename != last_filename:
141 if last_filename is not None:
142 yield self.formatline(last_filename, package_list)
143 last_filename = filename
145 package_list.append(package)
146 yield self.formatline(last_filename, package_list)
147 # end transaction to return connection to pool
148 self.session.rollback()
152 Returns a list of lines for the Contents-$arch.gz file.
154 return [item for item in self.fetch()]
156 def output_filename(self):
158 Returns the name of the output file.
161 'root': Config()['Dir::Root'],
162 'suite': self.suite.suite_name,
163 'architecture': self.architecture.arch_string
165 if self.component is None:
166 return "%(root)s%(suite)s/Contents-%(architecture)s.gz" % values
167 values['component'] = self.component.component_name
168 return "%(root)s%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
170 def write_file(self):
172 Write the output file.
174 command = ['gzip', '--rsyncable']
175 output_file = open(self.output_filename(), 'w')
176 pipe = Popen(command, stdin = PIPE, stdout = output_file).stdin
177 for item in self.fetch():
183 class ContentsScanner(object):
185 ContentsScanner provides a threadsafe method scan() to scan the contents of
188 def __init__(self, binary):
190 The argument binary is the actual DBBinary object that should be
193 self.binary_id = binary.binary_id
195 def scan(self, dummy_arg = None):
197 This method does the actual scan and fills in the associated BinContents
198 property. It commits any changes to the database. The argument dummy_arg
199 is ignored but needed by our threadpool implementation.
201 session = DBConn().session()
202 binary = session.query(DBBinary).get(self.binary_id)
203 for filename in binary.scan_contents():
204 binary.contents.append(BinContents(file = filename))
209 def scan_all(class_, limit = None):
211 The class method scan_all() scans all binaries using multiple threads.
212 The number of binaries to be scanned can be limited with the limit
213 argument. Returns the number of processed and remaining packages as a
216 session = DBConn().session()
217 query = session.query(DBBinary).filter(DBBinary.contents == None)
218 remaining = query.count
219 if limit is not None:
220 query = query.limit(limit)
221 processed = query.count()
222 threadpool = ThreadPool()
223 for binary in query.yield_per(100):
224 threadpool.queueTask(ContentsScanner(binary).scan)
226 remaining = remaining()
228 return { 'processed': processed, 'remaining': remaining }