3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
31 from multiprocessing import Pool
32 from shutil import rmtree
33 from subprocess import Popen, PIPE, check_call
34 from tempfile import mkdtemp
38 class ContentsWriter(object):
40 ContentsWriter writes the Contents-$arch.gz files.
42 def __init__(self, suite, architecture, overridetype, component = None):
44 The constructor clones its arguments into a new session object to make
45 sure that the new ContentsWriter object can be executed in a different
49 self.architecture = architecture
50 self.overridetype = overridetype
51 self.component = component
52 self.session = suite.session()
56 Returns a query object that is doing most of the work.
58 overridesuite = self.suite
59 if self.suite.overridesuite is not None:
60 overridesuite = get_suite(self.suite.overridesuite, self.session)
62 'suite': self.suite.suite_id,
63 'overridesuite': overridesuite.suite_id,
64 'arch_all': get_architecture('all', self.session).arch_id,
65 'arch': self.architecture.arch_id,
66 'type_id': self.overridetype.overridetype_id,
67 'type': self.overridetype.overridetype,
70 if self.component is not None:
71 params['component'] = self.component.component_id
73 create temp table newest_binaries (
74 id integer primary key,
77 create index newest_binaries_by_package on newest_binaries (package);
79 insert into newest_binaries (id, package)
80 select distinct on (package) id, package from binaries
81 where type = :type and
82 (architecture = :arch_all or architecture = :arch) and
83 id in (select bin from bin_associations where suite = :suite)
84 order by package, version desc;
89 (select o.package, s.section
90 from override o, section s
91 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
92 o.component = :component)
94 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
95 from newest_binaries b, bin_contents bc, unique_override o
96 where b.id = bc.binary_id and o.package = b.package
101 create temp table newest_binaries (
102 id integer primary key,
105 create index newest_binaries_by_package on newest_binaries (package);
107 insert into newest_binaries (id, package)
108 select distinct on (package) id, package from binaries
109 where type = :type and
110 (architecture = :arch_all or architecture = :arch) and
111 id in (select bin from bin_associations where suite = :suite)
112 order by package, version desc;
117 (select distinct on (o.package, s.section) o.package, s.section
118 from override o, section s
119 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
120 order by o.package, s.section, o.modified desc)
122 select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
123 from newest_binaries b, bin_contents bc, unique_override o
124 where b.id = bc.binary_id and o.package = b.package
127 return self.session.query("file", "pkglist").from_statement(sql). \
130 def formatline(self, filename, package_list):
132 Returns a formatted string for the filename argument.
134 return "%-55s %s\n" % (filename, package_list)
138 Yields a new line of the Contents-$arch.gz file in filename order.
140 for filename, package_list in self.query().yield_per(100):
141 yield self.formatline(filename, package_list)
142 # end transaction to return connection to pool
143 self.session.rollback()
147 Returns a list of lines for the Contents-$arch.gz file.
149 return [item for item in self.fetch()]
151 def output_filename(self):
153 Returns the name of the output file.
156 'root': Config()['Dir::Root'],
157 'suite': self.suite.suite_name,
158 'architecture': self.architecture.arch_string
160 if self.component is None:
161 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
162 values['component'] = self.component.component_name
163 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
165 def get_header(self):
167 Returns the header for the Contents files as a string.
171 filename = os.path.join(Config()['Dir::Templates'], 'contents')
172 header_file = open(filename)
173 return header_file.read()
178 def write_file(self):
180 Write the output file.
182 command = ['gzip', '--rsyncable']
183 final_filename = self.output_filename()
184 temp_filename = final_filename + '.new'
185 output_file = open(temp_filename, 'w')
186 gzip = Popen(command, stdin = PIPE, stdout = output_file)
187 gzip.stdin.write(self.get_header())
188 for item in self.fetch():
189 gzip.stdin.write(item)
194 os.remove(final_filename)
197 os.rename(temp_filename, final_filename)
198 os.chmod(final_filename, 0664)
201 def log_result(class_, result):
203 Writes a result message to the logfile.
205 class_.logger.log(result)
208 def write_all(class_, logger, suite_names = [], force = False):
210 Writes all Contents files for suites in list suite_names which defaults
211 to all 'touchable' suites if not specified explicitely. Untouchable
212 suites will be included if the force argument is set to True.
214 class_.logger = logger
215 session = DBConn().session()
216 suite_query = session.query(Suite)
217 if len(suite_names) > 0:
218 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
220 suite_query = suite_query.filter_by(untouchable = False)
221 deb_id = get_override_type('deb', session).overridetype_id
222 udeb_id = get_override_type('udeb', session).overridetype_id
223 main_id = get_component('main', session).component_id
224 non_free_id = get_component('non-free', session).component_id
226 for suite in suite_query:
227 suite_id = suite.suite_id
228 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
229 arch_id = architecture.arch_id
230 # handle 'deb' packages
231 pool.apply_async(generate_helper, (suite_id, arch_id, deb_id), \
232 callback = class_.log_result)
233 # handle 'udeb' packages for 'main' and 'non-free'
234 pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, main_id), \
235 callback = class_.log_result)
236 pool.apply_async(generate_helper, (suite_id, arch_id, udeb_id, non_free_id), \
237 callback = class_.log_result)
242 def generate_helper(suite_id, arch_id, overridetype_id, component_id = None):
244 This function is called in a new subprocess.
246 session = DBConn().session()
247 suite = Suite.get(suite_id, session)
248 architecture = Architecture.get(arch_id, session)
249 overridetype = OverrideType.get(overridetype_id, session)
250 log_message = [suite.suite_name, architecture.arch_string, overridetype.overridetype]
251 if component_id is None:
254 component = Component.get(component_id, session)
255 log_message.append(component.component_name)
256 contents_writer = ContentsWriter(suite, architecture, overridetype, component)
257 contents_writer.write_file()
261 class ContentsScanner(object):
263 ContentsScanner provides a threadsafe method scan() to scan the contents of
266 def __init__(self, binary_id):
268 The argument binary_id is the id of the DBBinary object that
271 self.binary_id = binary_id
273 def scan(self, dummy_arg = None):
275 This method does the actual scan and fills in the associated BinContents
276 property. It commits any changes to the database. The argument dummy_arg
277 is ignored but needed by our threadpool implementation.
279 session = DBConn().session()
280 binary = session.query(DBBinary).get(self.binary_id)
281 fileset = set(binary.scan_contents())
282 if len(fileset) == 0:
283 fileset.add('EMPTY_PACKAGE')
284 for filename in fileset:
285 binary.contents.append(BinContents(file = filename))
290 def scan_all(class_, limit = None):
292 The class method scan_all() scans all binaries using multiple threads.
293 The number of binaries to be scanned can be limited with the limit
294 argument. Returns the number of processed and remaining packages as a
297 session = DBConn().session()
298 query = session.query(DBBinary).filter(DBBinary.contents == None)
299 remaining = query.count
300 if limit is not None:
301 query = query.limit(limit)
302 processed = query.count()
304 for binary in query.yield_per(100):
305 pool.apply_async(scan_helper, (binary.binary_id, ))
308 remaining = remaining()
310 return { 'processed': processed, 'remaining': remaining }
312 def scan_helper(binary_id):
314 This function runs in a subprocess.
316 scanner = ContentsScanner(binary_id)
320 class UnpackedSource(object):
322 UnpackedSource extracts a source package into a temporary location and
323 gives you some convinient function for accessing it.
325 def __init__(self, dscfilename):
327 The dscfilename is a name of a DSC file that will be extracted.
329 self.root_directory = os.path.join(mkdtemp(), 'root')
330 command = ('dpkg-source', '--no-copy', '--no-check', '-x', dscfilename,
332 # dpkg-source does not have a --quiet option
333 devnull = open(os.devnull, 'w')
334 check_call(command, stdout = devnull, stderr = devnull)
337 def get_root_directory(self):
339 Returns the name of the package's root directory which is the directory
340 where the debian subdirectory is located.
342 return self.root_directory
344 def get_changelog_file(self):
346 Returns a file object for debian/changelog or None if no such file exists.
348 changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
350 return open(changelog_name)
354 def get_all_filenames(self):
356 Returns an iterator over all filenames. The filenames will be relative
357 to the root directory.
359 skip = len(self.root_directory) + 1
360 for root, _, files in os.walk(self.root_directory):
362 yield os.path.join(root[skip:], name)
366 Removes all temporary files.
368 if self.root_directory is None:
370 parent_directory = os.path.dirname(self.root_directory)
371 rmtree(parent_directory)
372 self.root_directory = None