3 """ DB access functions
4 @group readonly: get_suite_id, get_section_id, get_priority_id, get_override_type_id,
5 get_architecture_id, get_archive_id, get_component_id, get_location_id,
6 get_source_id, get_suite_version, get_files_id, get_maintainer, get_suites
7 @group read/write: get_or_set*, set_files_id
9 @contact: Debian FTP Master <ftpmaster@debian.org>
10 @copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
11 @copyright: 2009 Joerg Jaspert <joerg@debian.org>
12 @license: GNU General Public License version 2 or later
15 # This program is free software; you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation; either version 2 of the License, or
18 # (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program; if not, write to the Free Software
27 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ################################################################################
35 ################################################################################
37 Cnf = None #: Configuration, apt_pkg.Configuration
38 projectB = None #: database connection, pgobject
39 suite_id_cache = {} #: cache for suites
40 section_id_cache = {} #: cache for sections
41 priority_id_cache = {} #: cache for priorities
42 override_type_id_cache = {} #: cache for overrides
43 architecture_id_cache = {} #: cache for architectures
44 archive_id_cache = {} #: cache for archives
45 component_id_cache = {} #: cache for components
46 location_id_cache = {} #: cache for locations
47 maintainer_id_cache = {} #: cache for maintainers
48 keyring_id_cache = {} #: cache for keyrings
49 source_id_cache = {} #: cache for sources
50 files_id_cache = {} #: cache for files
51 maintainer_cache = {} #: cache for maintainer names
52 fingerprint_id_cache = {} #: cache for fingerprints
53 queue_id_cache = {} #: cache for queues
54 uid_id_cache = {} #: cache for uids
55 suite_version_cache = {} #: cache for suite_versions (packages)
56 suite_bin_version_cache = {}
57 content_path_id_cache = {}
58 content_file_id_cache = {}
59 insert_contents_file_cache = {}
60 cache_preloaded = False
62 ################################################################################
64 def init (config, sql):
68 @type config: apt_pkg.Configuration
69 @param config: apt config, see U{http://apt.alioth.debian.org/python-apt-doc/apt_pkg/cache.html#Configuration}
72 @param sql: database connection
83 Executes a database query. Writes statistics / timing to stderr.
86 @param query: database query string, passed unmodified
90 @warning: The query is passed B{unmodified}, so be careful what you use this for.
92 sys.stderr.write("query: \"%s\" ... " % (query))
94 r = projectB.query(query)
95 time_diff = time.time()-before
96 sys.stderr.write("took %.3f seconds.\n" % (time_diff))
98 sys.stderr.write("int result: %s\n" % (r))
99 elif type(r) is types.NoneType:
100 sys.stderr.write("result: None\n")
102 sys.stderr.write("pgresult: %s\n" % (r.getresult()))
105 ################################################################################
107 def get_suite_id (suite):
109 Returns database id for given C{suite}.
110 Results are kept in a cache during runtime to minimize database queries.
113 @param suite: The name of the suite
116 @return: the database id for the given suite
119 global suite_id_cache
121 if suite_id_cache.has_key(suite):
122 return suite_id_cache[suite]
124 q = projectB.query("SELECT id FROM suite WHERE suite_name = '%s'" % (suite))
130 suite_id_cache[suite] = suite_id
134 def get_section_id (section):
136 Returns database id for given C{section}.
137 Results are kept in a cache during runtime to minimize database queries.
139 @type section: string
140 @param section: The name of the section
143 @return: the database id for the given section
146 global section_id_cache
148 if section_id_cache.has_key(section):
149 return section_id_cache[section]
151 q = projectB.query("SELECT id FROM section WHERE section = '%s'" % (section))
156 section_id = ql[0][0]
157 section_id_cache[section] = section_id
161 def get_priority_id (priority):
163 Returns database id for given C{priority}.
164 Results are kept in a cache during runtime to minimize database queries.
166 @type priority: string
167 @param priority: The name of the priority
170 @return: the database id for the given priority
173 global priority_id_cache
175 if priority_id_cache.has_key(priority):
176 return priority_id_cache[priority]
178 q = projectB.query("SELECT id FROM priority WHERE priority = '%s'" % (priority))
183 priority_id = ql[0][0]
184 priority_id_cache[priority] = priority_id
188 def get_override_type_id (type):
190 Returns database id for given override C{type}.
191 Results are kept in a cache during runtime to minimize database queries.
194 @param type: The name of the override type
197 @return: the database id for the given override type
200 global override_type_id_cache
202 if override_type_id_cache.has_key(type):
203 return override_type_id_cache[type]
205 q = projectB.query("SELECT id FROM override_type WHERE type = '%s'" % (type))
210 override_type_id = ql[0][0]
211 override_type_id_cache[type] = override_type_id
213 return override_type_id
215 def get_architecture_id (architecture):
217 Returns database id for given C{architecture}.
218 Results are kept in a cache during runtime to minimize database queries.
220 @type architecture: string
221 @param architecture: The name of the override type
224 @return: the database id for the given architecture
227 global architecture_id_cache
229 if architecture_id_cache.has_key(architecture):
230 return architecture_id_cache[architecture]
232 q = projectB.query("SELECT id FROM architecture WHERE arch_string = '%s'" % (architecture))
237 architecture_id = ql[0][0]
238 architecture_id_cache[architecture] = architecture_id
240 return architecture_id
242 def get_archive_id (archive):
244 Returns database id for given C{archive}.
245 Results are kept in a cache during runtime to minimize database queries.
247 @type archive: string
248 @param archive: The name of the override type
251 @return: the database id for the given archive
254 global archive_id_cache
256 archive = archive.lower()
258 if archive_id_cache.has_key(archive):
259 return archive_id_cache[archive]
261 q = projectB.query("SELECT id FROM archive WHERE lower(name) = '%s'" % (archive))
266 archive_id = ql[0][0]
267 archive_id_cache[archive] = archive_id
271 def get_component_id (component):
273 Returns database id for given C{component}.
274 Results are kept in a cache during runtime to minimize database queries.
276 @type component: string
277 @param component: The name of the component
280 @return: the database id for the given component
283 global component_id_cache
285 component = component.lower()
287 if component_id_cache.has_key(component):
288 return component_id_cache[component]
290 q = projectB.query("SELECT id FROM component WHERE lower(name) = '%s'" % (component))
295 component_id = ql[0][0]
296 component_id_cache[component] = component_id
300 def get_location_id (location, component, archive):
302 Returns database id for the location behind the given combination of
303 - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/}
304 - B{component} - the id of the component as returned by L{get_component_id}
305 - B{archive} - the id of the archive as returned by L{get_archive_id}
306 Results are kept in a cache during runtime to minimize database queries.
308 @type location: string
309 @param location: the path of the location
312 @param component: the id of the component
315 @param archive: the id of the archive
318 @return: the database id for the location
321 global location_id_cache
323 cache_key = location + '_' + component + '_' + location
324 if location_id_cache.has_key(cache_key):
325 return location_id_cache[cache_key]
327 archive_id = get_archive_id (archive)
329 component_id = get_component_id (component)
330 if component_id != -1:
331 q = projectB.query("SELECT id FROM location WHERE path = '%s' AND component = %d AND archive = %d" % (location, component_id, archive_id))
333 q = projectB.query("SELECT id FROM location WHERE path = '%s' AND archive = %d" % (location, archive_id))
338 location_id = ql[0][0]
339 location_id_cache[cache_key] = location_id
343 def get_source_id (source, version):
345 Returns database id for the combination of C{source} and C{version}
346 - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
348 Results are kept in a cache during runtime to minimize database queries.
351 @param source: source package name
353 @type version: string
354 @param version: the source version
357 @return: the database id for the source
360 global source_id_cache
362 cache_key = source + '_' + version + '_'
363 if source_id_cache.has_key(cache_key):
364 return source_id_cache[cache_key]
366 q = projectB.query("SELECT id FROM source s WHERE s.source = '%s' AND s.version = '%s'" % (source, version))
368 if not q.getresult():
371 source_id = q.getresult()[0][0]
372 source_id_cache[cache_key] = source_id
376 def get_suite_version(source, suite):
378 Returns database id for a combination of C{source} and C{suite}.
380 - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc}
381 - B{suite} - a suite name, eg. I{unstable}
383 Results are kept in a cache during runtime to minimize database queries.
386 @param source: source package name
389 @param suite: the suite name
392 @return: the version for I{source} in I{suite}
396 global suite_version_cache
397 cache_key = "%s_%s" % (source, suite)
399 if suite_version_cache.has_key(cache_key):
400 return suite_version_cache[cache_key]
402 q = projectB.query("""
403 SELECT s.version FROM source s, suite su, src_associations sa
406 AND su.suite_name='%s'
410 if not q.getresult():
413 version = q.getresult()[0][0]
414 suite_version_cache[cache_key] = version
418 def get_latest_binary_version_id(binary, section, suite, arch):
419 global suite_bin_version_cache
420 cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
421 cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all"))
423 # Check for the cache hit for its arch, then arch all
424 if suite_bin_version_cache.has_key(cache_key):
425 return suite_bin_version_cache[cache_key]
426 if suite_bin_version_cache.has_key(cache_key_all):
427 return suite_bin_version_cache[cache_key_all]
428 if cache_preloaded == True:
429 return # package does not exist
431 q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
433 if not q.getresult():
436 highest_bid = q.getresult()[0][0]
438 suite_bin_version_cache[cache_key] = highest_bid
441 def preload_binary_id_cache():
442 global suite_bin_version_cache, cache_preloaded
445 q = projectB.query("SELECT id FROM suite")
446 suites = q.getresult()
449 q = projectB.query("SELECT id FROM architecture")
450 arches = q.getresult()
454 q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0])))
456 for bi in q.getresult():
457 cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0])
458 suite_bin_version_cache[cache_key] = int(bi[0])
460 cache_preloaded = True
462 ################################################################################
464 def get_or_set_maintainer_id (maintainer):
466 If C{maintainer} does not have an entry in the maintainer table yet, create one
467 and return the new id.
468 If C{maintainer} already has an entry, simply return the existing id.
470 Results are kept in a cache during runtime to minimize database queries.
472 @type maintainer: string
473 @param maintainer: the maintainer name
476 @return: the database id for the maintainer
479 global maintainer_id_cache
481 if maintainer_id_cache.has_key(maintainer):
482 return maintainer_id_cache[maintainer]
484 q = projectB.query("SELECT id FROM maintainer WHERE name = '%s'" % (maintainer))
485 if not q.getresult():
486 projectB.query("INSERT INTO maintainer (name) VALUES ('%s')" % (maintainer))
487 q = projectB.query("SELECT id FROM maintainer WHERE name = '%s'" % (maintainer))
488 maintainer_id = q.getresult()[0][0]
489 maintainer_id_cache[maintainer] = maintainer_id
493 ################################################################################
495 def get_or_set_keyring_id (keyring):
497 If C{keyring} does not have an entry in the C{keyrings} table yet, create one
498 and return the new id.
499 If C{keyring} already has an entry, simply return the existing id.
501 Results are kept in a cache during runtime to minimize database queries.
503 @type keyring: string
504 @param keyring: the keyring name
507 @return: the database id for the keyring
510 global keyring_id_cache
512 if keyring_id_cache.has_key(keyring):
513 return keyring_id_cache[keyring]
515 q = projectB.query("SELECT id FROM keyrings WHERE name = '%s'" % (keyring))
516 if not q.getresult():
517 projectB.query("INSERT INTO keyrings (name) VALUES ('%s')" % (keyring))
518 q = projectB.query("SELECT id FROM keyrings WHERE name = '%s'" % (keyring))
519 keyring_id = q.getresult()[0][0]
520 keyring_id_cache[keyring] = keyring_id
524 ################################################################################
526 def get_or_set_uid_id (uid):
528 If C{uid} does not have an entry in the uid table yet, create one
529 and return the new id.
530 If C{uid} already has an entry, simply return the existing id.
532 Results are kept in a cache during runtime to minimize database queries.
538 @return: the database id for the uid
544 if uid_id_cache.has_key(uid):
545 return uid_id_cache[uid]
547 q = projectB.query("SELECT id FROM uid WHERE uid = '%s'" % (uid))
548 if not q.getresult():
549 projectB.query("INSERT INTO uid (uid) VALUES ('%s')" % (uid))
550 q = projectB.query("SELECT id FROM uid WHERE uid = '%s'" % (uid))
551 uid_id = q.getresult()[0][0]
552 uid_id_cache[uid] = uid_id
556 ################################################################################
558 def get_or_set_fingerprint_id (fingerprint):
560 If C{fingerprint} does not have an entry in the fingerprint table yet, create one
561 and return the new id.
562 If C{fingerprint} already has an entry, simply return the existing id.
564 Results are kept in a cache during runtime to minimize database queries.
566 @type fingerprint: string
567 @param fingerprint: the fingerprint
570 @return: the database id for the fingerprint
573 global fingerprint_id_cache
575 if fingerprint_id_cache.has_key(fingerprint):
576 return fingerprint_id_cache[fingerprint]
578 q = projectB.query("SELECT id FROM fingerprint WHERE fingerprint = '%s'" % (fingerprint))
579 if not q.getresult():
580 projectB.query("INSERT INTO fingerprint (fingerprint) VALUES ('%s')" % (fingerprint))
581 q = projectB.query("SELECT id FROM fingerprint WHERE fingerprint = '%s'" % (fingerprint))
582 fingerprint_id = q.getresult()[0][0]
583 fingerprint_id_cache[fingerprint] = fingerprint_id
585 return fingerprint_id
587 ################################################################################
589 def get_files_id (filename, size, md5sum, location_id):
591 Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an
594 The database is queried using the C{filename} and C{location_id}. If a file does exist
595 at that location, the existing size and md5sum are checked against the provided
596 parameters. A size or checksum mismatch returns -2. If more than one entry is
597 found within the database, a -1 is returned, no result returns None, otherwise
600 Results are kept in a cache during runtime to minimize database queries.
602 @type filename: string
603 @param filename: the filename of the file to check against the DB
606 @param size: the size of the file to check against the DB
609 @param md5sum: the md5sum of the file to check against the DB
611 @type location_id: int
612 @param location_id: the id of the location as returned by L{get_location_id}
615 @return: Various return values are possible:
616 - -2: size/checksum error
617 - -1: more than one file found in database
618 - None: no file found in database
622 global files_id_cache
624 cache_key = "%s_%d" % (filename, location_id)
626 if files_id_cache.has_key(cache_key):
627 return files_id_cache[cache_key]
630 q = projectB.query("SELECT id, size, md5sum FROM files WHERE filename = '%s' AND location = %d" % (filename, location_id))
636 orig_size = int(ql[1])
638 if orig_size != size or orig_md5sum != md5sum:
640 files_id_cache[cache_key] = ql[0]
641 return files_id_cache[cache_key]
645 ################################################################################
647 def get_or_set_queue_id (queue):
649 If C{queue} does not have an entry in the queue table yet, create one
650 and return the new id.
651 If C{queue} already has an entry, simply return the existing id.
653 Results are kept in a cache during runtime to minimize database queries.
656 @param queue: the queue name (no full path)
659 @return: the database id for the queue
662 global queue_id_cache
664 if queue_id_cache.has_key(queue):
665 return queue_id_cache[queue]
667 q = projectB.query("SELECT id FROM queue WHERE queue_name = '%s'" % (queue))
668 if not q.getresult():
669 projectB.query("INSERT INTO queue (queue_name) VALUES ('%s')" % (queue))
670 q = projectB.query("SELECT id FROM queue WHERE queue_name = '%s'" % (queue))
671 queue_id = q.getresult()[0][0]
672 queue_id_cache[queue] = queue_id
676 ################################################################################
678 def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
680 Insert a new entry into the files table and return its id.
682 @type filename: string
683 @param filename: the filename
686 @param size: the size in bytes
689 @param md5sum: md5sum of the file
691 @type sha1sum: string
692 @param sha1sum: sha1sum of the file
694 @type sha256sum: string
695 @param sha256sum: sha256sum of the file
697 @type location_id: int
698 @param location_id: the id of the location as returned by L{get_location_id}
701 @return: the database id for the new file
704 global files_id_cache
706 projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
708 return get_files_id (filename, size, md5sum, location_id)
710 ### currval has issues with postgresql 7.1.3 when the table is big
711 ### it was taking ~3 seconds to return on auric which is very Not
714 ##q = projectB.query("SELECT id FROM files WHERE id = currval('files_id_seq')")
715 ##ql = q.getresult()[0]
716 ##cache_key = "%s_%d" % (filename, location_id)
717 ##files_id_cache[cache_key] = ql[0]
718 ##return files_id_cache[cache_key]
720 ################################################################################
722 def get_maintainer (maintainer_id):
724 Return the name of the maintainer behind C{maintainer_id}.
726 Results are kept in a cache during runtime to minimize database queries.
728 @type maintainer_id: int
729 @param maintainer_id: the id of the maintainer, eg. from L{get_or_set_maintainer_id}
732 @return: the name of the maintainer
735 global maintainer_cache
737 if not maintainer_cache.has_key(maintainer_id):
738 q = projectB.query("SELECT name FROM maintainer WHERE id = %s" % (maintainer_id))
739 maintainer_cache[maintainer_id] = q.getresult()[0][0]
741 return maintainer_cache[maintainer_id]
743 ################################################################################
745 def get_suites(pkgname, src=False):
747 Return the suites in which C{pkgname} can be found. If C{src} is True query for source
748 package, else binary package.
750 @type pkgname: string
751 @param pkgname: name of the package
754 @param src: if True look for source packages, false (default) looks for binary.
757 @return: list of suites, or empty list if no match
766 WHERE source.id = src_associations.source
767 AND source.source = '%s'
768 AND src_associations.suite = suite.id
776 WHERE binaries.id = bin_associations.bin
778 AND bin_associations.suite = suite.id
781 q = projectB.query(sql)
782 return map(lambda x: x[0], q.getresult())
784 ################################################################################
786 def get_or_set_contents_file_id(file):
787 global content_file_id_cache
789 if not content_file_id_cache.has_key(file):
790 sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file
791 q = projectB.query(sql_select)
792 if not q.getresult():
793 # since this can be called within a transaction, we can't use currval
794 q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file))
795 content_file_id_cache[file] = int(q.getresult()[0][0])
796 return content_file_id_cache[file]
798 ################################################################################
800 def get_or_set_contents_path_id(path):
801 global content_path_id_cache
803 if not content_path_id_cache.has_key(path):
804 sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path
805 q = projectB.query(sql_select)
806 if not q.getresult():
807 # since this can be called within a transaction, we can't use currval
808 q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path))
809 content_path_id_cache[path] = int(q.getresult()[0][0])
810 return content_path_id_cache[path]
812 ################################################################################
814 def insert_content_path(bin_id, fullpath):
815 global insert_contents_file_cache
816 cache_key = "%s_%s" % (bin_id, fullpath)
818 # have we seen this contents before?
819 # probably only revelant during package import
820 if insert_contents_file_cache.has_key(cache_key):
823 # split the path into basename, and pathname
824 (path, file) = os.path.split(fullpath)
826 # Get the necessary IDs ...
827 file_id = get_or_set_contents_file_id(file)
828 path_id = get_or_set_contents_path_id(path)
830 # Determine if we're inserting a duplicate row
831 q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
833 # Yes we are, return without doing the insert
836 # Put them into content_assiocations
837 projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))