]> git.decadent.org.uk Git - dak.git/blobdiff - dak/stats.py
Debug suites might also miss the source package
[dak.git] / dak / stats.py
index 7c61e2a42948fa58e2bce7a0d50493d981ebfee0..6f69e1a7f4239429fac841b78cdb47c92605a278 100755 (executable)
@@ -2,6 +2,7 @@
 
 """ Various statistical pr0nography fun and games """
 # Copyright (C) 2000, 2001, 2002, 2003, 2006  James Troup <james@nocrew.org>
+# Copyright (C) 2013  Luca Falavigna <dktrkranz@debian.org>
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 import sys
 import apt_pkg
 
+from datetime import datetime
+from email.utils import mktime_tz, parsedate_tz
+from mailbox import mbox
+from os import listdir, system, unlink
+from os.path import isfile, join, splitext
+from re import findall, DOTALL, MULTILINE
+from sys import stderr
+from yaml import safe_load, safe_dump
+
 from daklib import utils
 from daklib.dbconn import DBConn, get_suite_architectures, Suite, Architecture
 
@@ -40,6 +50,19 @@ from daklib.dbconn import DBConn, get_suite_architectures, Suite, Architecture
 
 Cnf = None
 
+stats = {}
+users = {}
+buffer = 0
+FORMAT_SWITCH = '2009-08'
+blacklisted = ('dak', 'katie')
+
+NEW = ('^(\d{14})\|(?:jennifer|process-unchecked|.*?\|dak)'
+       '\|(Moving to new|ACCEPT-TO-NEW)')
+new_ACTIONS = '^(\d{14})\|[^\|]*\|(\S+)\|NEW (\S+)[:\|]'
+old_ACTIONS = ('(?:lisa|process-new)\|program start\|(.*?)\|'
+               '(?:lisa|process-new)\|program end')
+old_ACTION = '^(\d{14})\|(?:lisa|process-new)\|(Accepting changes|rejected)\|'
+
 ################################################################################
 
 def usage(exit_code=0):
@@ -53,6 +76,7 @@ The following MODEs are available:
   arch-space    - displays space used by each architecture
   pkg-nums      - displays the number of packages by suite/architecture
   daily-install - displays daily install stats suitable for graphing
+  new           - stores stats about the NEW queue
 """
     sys.exit(exit_code)
 
@@ -111,17 +135,6 @@ def longest(list):
             longest = l
     return longest
 
-def suite_sort(a, b):
-    if Cnf.has_key("Suite::%s::Priority" % (a)):
-        a_priority = int(Cnf["Suite::%s::Priority" % (a)])
-    else:
-        a_priority = 0
-    if Cnf.has_key("Suite::%s::Priority" % (b)):
-        b_priority = int(Cnf["Suite::%s::Priority" % (b)])
-    else:
-        b_priority = 0
-    return cmp(a_priority, b_priority)
-
 def output_format(suite):
     output_suite = []
     for word in suite.split("-"):
@@ -164,7 +177,6 @@ def number_of_packages():
     ## Print the results
     # Setup
     suite_list = suites.values()
-    suite_list.sort(suite_sort)
     suite_id_list = []
     suite_arches = {}
     for suite in suite_list:
@@ -201,8 +213,179 @@ def number_of_packages():
 
 ################################################################################
 
+def parse_new_uploads(data):
+    global stats
+    latest_timestamp = stats['timestamp']
+    for entry in findall(NEW, data, MULTILINE):
+        timestamp = entry[0]
+        if stats['timestamp'] >= timestamp:
+            continue
+        date = parse_timestamp(timestamp)
+        if date not in stats:
+            stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
+                           'REJECT': 0, 'PROD': 0}, 'members': {}}
+        stats[date]['stats']['NEW'] += 1
+        stats['history']['stats']['NEW'] += 1
+        latest_timestamp = timestamp
+    return latest_timestamp
+
+
+def parse_actions(data, logdate):
+    global stats
+    latest_timestamp = stats['timestamp']
+    if logdate <= FORMAT_SWITCH:
+        for batch in findall(old_ACTIONS, data, DOTALL):
+            who = batch.split()[0]
+            if who in blacklisted:
+                continue
+            for entry in findall(old_ACTION, batch, MULTILINE):
+                action = entry[1]
+                if action.startswith('Accepting'):
+                    action = 'ACCEPT'
+                elif action.startswith('rejected'):
+                    action = 'REJECT'
+                timestamp = entry[0]
+                if stats['timestamp'] >= timestamp:
+                    continue
+                date = parse_timestamp(entry[0])
+                if date not in stats:
+                    stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
+                                   'REJECT': 0, 'PROD': 0}, 'members': {}}
+                stats[date]['stats'][action] += 1
+                stats['history']['stats'][action] += 1
+                if who not in stats[date]['members']:
+                    stats[date]['members'][who] = {'ACCEPT': 0, 'REJECT': 0,
+                                                   'PROD': 0}
+                stats[date]['members'][who][action] += 1
+                if who not in stats['history']['members']:
+                    stats['history']['members'][who] = {'ACCEPT': 0, 'REJECT': 0,
+                                                    'PROD': 0}
+                stats['history']['members'][who][action] += 1
+                latest_timestamp = timestamp
+        parse_prod(logdate)
+    if logdate >= FORMAT_SWITCH:
+        for entry in findall(new_ACTIONS, data, MULTILINE):
+            action = entry[2]
+            timestamp = entry[0]
+            if stats['timestamp'] >= timestamp:
+                continue
+            date = parse_timestamp(timestamp)
+            if date not in stats:
+                stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
+                               'REJECT': 0, 'PROD': 0}, 'members': {}}
+            member = entry[1]
+            if member in blacklisted:
+                continue
+            if date not in stats:
+                stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
+                               'REJECT': 0, 'PROD': 0}, 'members': {}}
+            if member not in stats[date]['members']:
+                stats[date]['members'][member] = {'ACCEPT': 0, 'REJECT': 0,
+                                                  'PROD': 0}
+            if member not in stats['history']['members']:
+                stats['history']['members'][member] = {'ACCEPT': 0,
+                                                       'REJECT': 0, 'PROD': 0}
+            stats[date]['stats'][action] += 1
+            stats[date]['members'][member][action] += 1
+            stats['history']['stats'][action] += 1
+            stats['history']['members'][member][action] += 1
+            latest_timestamp = timestamp
+    return latest_timestamp
+
+
+def parse_prod(logdate):
+    global stats
+    global users
+    maildate = ''.join([x[-2:] for x in logdate.split('-')])
+    mailarchive = join(utils.get_conf()['Dir::Base'], 'mail/archive',
+                       'mail-%s.xz' % maildate)
+    if not isfile(mailarchive):
+        return
+    (fd, tmpfile) = utils.temp_filename(utils.get_conf()['Dir::TempPath'])
+    system('xzcat %s > %s' % (mailarchive, tmpfile))
+    for message in mbox(tmpfile):
+        if (message['subject'] and
+                message['subject'].startswith('Comments regarding')):
+            try:
+                member = users[' '.join(message['From'].split()[:-1])]
+            except KeyError:
+                continue
+            ts = mktime_tz(parsedate_tz(message['date']))
+            timestamp = datetime.fromtimestamp(ts).strftime("%Y%m%d%H%M%S")
+            date = parse_timestamp(timestamp)
+            if date not in stats:
+                stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
+                                 'REJECT': 0, 'PROD': 0}, 'members': {}}
+            if member not in stats[date]['members']:
+                stats[date]['members'][member] = {'ACCEPT': 0, 'REJECT': 0,
+                                                     'PROD': 0}
+            if member not in stats['history']['members']:
+                stats['history']['members'][member] = {'ACCEPT': 0,
+                                                       'REJECT': 0, 'PROD': 0}
+            stats[date]['stats']['PROD'] += 1
+            stats[date]['members'][member]['PROD'] += 1
+            stats['history']['stats']['PROD'] += 1
+            stats['history']['members'][member]['PROD'] += 1
+    unlink(tmpfile)
+
+
+def parse_timestamp(timestamp):
+    y = int(timestamp[:4])
+    m = int(timestamp[4:6])
+    return '%d-%02d' % (y, m)
+
+
+def new_stats(logdir, yaml):
+    global Cnf
+    global stats
+    try:
+        with open(yaml, 'r') as fd:
+            stats = safe_load(fd)
+    except IOError:
+        pass
+    if not stats:
+        stats = {'history': {'stats': {'NEW': 0, 'ACCEPT': 0,
+                 'REJECT': 0, 'PROD': 0}, 'members': {}},
+                 'timestamp': '19700101000000'}
+    latest_timestamp = stats['timestamp']
+    for fn in sorted(listdir(logdir)):
+        if fn == 'current':
+            continue
+        log = splitext(fn)[0]
+        if log < parse_timestamp(stats['timestamp']):
+            continue
+        logfile = join(logdir, fn)
+        if isfile(logfile):
+            if fn.endswith('.bz2'):
+                # This hack is required becaue python2 does not support
+                # multi-stream files (http://bugs.python.org/issue1625)
+                (fd, tmpfile) = utils.temp_filename(Cnf['Dir::TempPath'])
+                system('bzcat %s > %s' % (logfile, tmpfile))
+                with open(tmpfile, 'r') as fd:
+                    data = fd.read()
+                unlink(tmpfile)
+            else:
+                with open(logfile, 'r') as fd:
+                    data = fd.read()
+            ts = parse_new_uploads(data)
+            if ts > latest_timestamp:
+                latest_timestamp = ts
+            ts = parse_actions(data, log)
+            if ts > latest_timestamp:
+                latest_timestamp = ts
+            stderr.write('.')
+            stderr.flush()
+    stderr.write('\n')
+    stderr.flush()
+    stats['timestamp'] = latest_timestamp
+    with open(yaml, 'w') as fd:
+        safe_dump(stats, fd)
+
+################################################################################
+
 def main ():
     global Cnf
+    global users
 
     Cnf = utils.get_conf()
     Arguments = [('h',"help","Stats::Options::Help")]
@@ -210,9 +393,9 @@ def main ():
         if not Cnf.has_key("Stats::Options::%s" % (i)):
             Cnf["Stats::Options::%s" % (i)] = ""
 
-    args = apt_pkg.ParseCommandLine(Cnf, Arguments, sys.argv)
+    args = apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
 
-    Options = Cnf.SubTree("Stats::Options")
+    Options = Cnf.subtree("Stats::Options")
     if Options["Help"]:
         usage()
 
@@ -220,8 +403,12 @@ def main ():
         utils.warn("dak stats requires a MODE argument")
         usage(1)
     elif len(args) > 1:
-        utils.warn("dak stats accepts only one MODE argument")
-        usage(1)
+        if args[0].lower() != "new":
+            utils.warn("dak stats accepts only one MODE argument")
+            usage(1)
+    elif args[0].lower() == "new":
+            utils.warn("new MODE requires an output file")
+            usage(1)
     mode = args[0].lower()
 
     if mode == "arch-space":
@@ -230,6 +417,9 @@ def main ():
         number_of_packages()
     elif mode == "daily-install":
         daily_install_stats()
+    elif mode == "new":
+        users = utils.get_users_from_ldap()
+        new_stats(Cnf["Dir::Log"], args[1])
     else:
         utils.warn("unknown mode '%s'" % (mode))
         usage(1)