Browse Source

Makes counting commits work again

fixes #1113
Hamilton Turner 11 years ago
parent
commit
0922f90d80
1 changed files with 33 additions and 5 deletions
  1. 33 5
      toolset/benchmark/benchmarker.py

+ 33 - 5
toolset/benchmark/benchmarker.py

@@ -16,6 +16,8 @@ import csv
 import sys
 import logging
 import socket
+import threading
+
 from multiprocessing import Process
 from datetime import datetime
 
@@ -766,18 +768,44 @@ class Benchmarker:
 
   ############################################################
   # __count_commits
+  #
   ############################################################
   def __count_commits(self):
-    frameworks = gather_frameworks(benchmarker=self)
+    frameworks = gather_frameworks(include=self.test,
+      exclude=self.exclude, benchmarker=self)
 
-    jsonResult = {}
-    for framework, testlist in frameworks.iteritems():
-      command = "git rev-list HEAD -- " + testlist[0].directory + " | sort -u | wc -l"
+    def count_commit(directory, jsonResult):
+      command = "git rev-list HEAD -- " + directory + " | sort -u | wc -l"
       try:
         commitCount = subprocess.check_output(command, shell=True)
         jsonResult[framework] = int(commitCount)
       except subprocess.CalledProcessError:
-        continue
+        pass
+
+    # Because git can be slow when run in large batches, this 
+    # calls git up to 4 times in parallel. Normal improvement is ~3-4x
+    # in my trials, or ~100 seconds down to ~25
+    # This is safe to parallelize as long as each thread only 
+    # accesses one key in the dictionary
+    threads = []
+    jsonResult = {}
+    t1 = datetime.now()
+    for framework, testlist in frameworks.iteritems():
+      directory = testlist[0].directory
+      t = threading.Thread(target=count_commit, args=(directory,jsonResult))
+      t.start()
+      threads.append(t)
+      # Git has internal locks, full parallel will just cause contention
+      # and slowness, so we rate-limit a bit
+      if len(threads) >= 4:
+        threads[0].join()
+        threads.remove(threads[0])
+
+    # Wait for remaining threads
+    for t in threads:
+      t.join()
+    t2 = datetime.now()
+    # print "Took %s seconds " % (t2 - t1).seconds
 
     self.results['rawData']['commitCounts'] = jsonResult
     self.commits = jsonResult