gcpausevis.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #!/usr/bin/env python
  2. import matplotlib.pyplot as plt
  3. from matplotlib.dates import DateFormatter, MinuteLocator, SecondLocator
  4. import numpy as np
  5. from StringIO import StringIO
  6. import os
  7. import re
  8. import sys
  9. from optparse import OptionParser
  10. import subprocess
  11. parser = OptionParser (usage = "Usage: %prog [options] BINARY-PROTOCOL")
  12. parser.add_option ('--histogram', action = 'store_true', dest = 'histogram', help = "pause time histogram")
  13. parser.add_option ('--scatter', action = 'store_true', dest = 'scatter', help = "pause time scatterplot")
  14. parser.add_option ('--minor', action = 'store_true', dest = 'minor', help = "only show minor collections in histogram")
  15. parser.add_option ('--major', action = 'store_true', dest = 'major', help = "only show major collections in histogram")
  16. (options, files) = parser.parse_args ()
  17. show_histogram = False
  18. show_scatter = False
  19. show_minor = True
  20. show_major = True
  21. if options.minor:
  22. show_major = False
  23. if options.major:
  24. show_minor = False
  25. if options.histogram:
  26. show_histogram = True
  27. if options.scatter:
  28. show_scatter = True
  29. if (options.minor or options.major) and not options.scatter:
  30. show_histogram = True
  31. script_path = os.path.realpath (__file__)
  32. sgen_grep_path = os.path.join (os.path.dirname (script_path), 'sgen-grep-binprot')
  33. if not os.path.isfile (sgen_grep_path):
  34. sys.stderr.write ('Error: `%s` does not exist.\n' % sgen_grep_path)
  35. sys.exit (1)
  36. if len (files) != 1:
  37. parser.print_help ()
  38. sys.exit (1)
  39. data = []
  40. class Event:
  41. def __init__(self, **kwargs):
  42. self.minor_work = kwargs['minor_work']
  43. self.major_work = kwargs['major_work']
  44. self.start = kwargs['start']
  45. self.stop = kwargs['stop']
  46. self.gc_type = kwargs['gc_type']
  47. def __repr__(self):
  48. return 'Event(minor_work={}, major_work={}, start={}, stop={}, gc_type={})'.format(
  49. self.minor_work,
  50. self.major_work,
  51. self.start,
  52. self.stop,
  53. self.gc_type,
  54. )
  55. grep_input = open (files [0])
  56. proc = subprocess.Popen ([sgen_grep_path, '--pause-times'], stdin = grep_input, stdout = subprocess.PIPE)
  57. for line in iter (proc.stdout.readline, ''):
  58. m = re.match ('^pause-time (\d+) (\d+) (\d+) (\d+) (\d+)', line)
  59. if m:
  60. minor_work = major_work = False
  61. generation = int (m.group (1))
  62. concurrent = int (m.group (2)) != 0
  63. finish = int (m.group (3)) != 0
  64. msecs = int (m.group (4)) / 10.0 / 1000.0
  65. start = int (m.group (5)) / 10.0 / 1000.0
  66. if concurrent:
  67. kind = "CONC"
  68. else:
  69. kind = "SYNC"
  70. if generation == 0:
  71. minor_work = True
  72. if concurrent:
  73. major_work = True
  74. gc_type = "nursery+update"
  75. else:
  76. gc_type = "nursery"
  77. else:
  78. major_work = True
  79. if concurrent:
  80. if finish:
  81. minor_work = True
  82. gc_type = "nursery+finish"
  83. else:
  84. gc_type = "start"
  85. else:
  86. gc_type = "full"
  87. rec = Event(
  88. minor_work=minor_work,
  89. major_work=major_work,
  90. start=start,
  91. stop=start + msecs,
  92. kind=kind,
  93. gc_type=gc_type,
  94. )
  95. print rec
  96. data.append (rec)
  97. class MajorGCEventGroup:
  98. pass
  99. class FullMajorGCEventGroup(MajorGCEventGroup):
  100. def __init__(self, event):
  101. self.event = event
  102. def __repr__(self):
  103. return 'FullMajorGCEventGroup({})'.format(
  104. self.event,
  105. )
  106. class ConcurrentMajorGCEventGroup(MajorGCEventGroup):
  107. def __init__(self, start, updates, finish):
  108. self.start = start
  109. self.updates = updates
  110. self.finish = finish
  111. def __repr__(self):
  112. return 'ConcurrentMajorEventGroup({}, {}, {})'.format(
  113. self.start,
  114. self.updates,
  115. self.finish,
  116. )
  117. # ([Event], int) -> (MajorGCEventGroup, int) | None
  118. def parse_next_major_gc(data, i):
  119. assert i >= 0
  120. # Find start or full event.
  121. while i < len(data) and data[i].gc_type not in ['start', 'full', 'nursery+update']:
  122. i += 1
  123. if i == len(data):
  124. return None
  125. # If full event, done.
  126. if data[i].gc_type == 'full':
  127. return (FullMajorGCEventGroup(data[i]), i + 1)
  128. start_event = data[i]
  129. update_events = []
  130. # Filter update events and find finish event.
  131. while i < len(data) and data[i].gc_type != 'nursery+finish':
  132. if data[i].gc_type == 'nursery+update':
  133. update_events.append(data[i])
  134. i += 1
  135. if i == len(data):
  136. return None
  137. finish_event = data[i]
  138. i += 1
  139. return (ConcurrentMajorGCEventGroup(start_event, update_events, finish_event), i)
  140. # [Event] -> [MajorGCEventGroup]
  141. def parse_major_gcs(data):
  142. major_gc_events = []
  143. i = 0
  144. while True:
  145. maybe_event_group = parse_next_major_gc(data, i)
  146. if maybe_event_group is None:
  147. return major_gc_events
  148. event_group, i = maybe_event_group
  149. major_gc_events.append(event_group)
  150. if show_histogram or show_scatter:
  151. bin_data_minor = []
  152. bin_data_both = []
  153. bin_data_major = []
  154. bin_names = []
  155. timeline_x = []
  156. timeline_y = []
  157. timeline_c = []
  158. for rec in data:
  159. pause = rec.stop - rec.start
  160. color = None
  161. if rec.major_work:
  162. if rec.minor_work:
  163. color = 'purple'
  164. else:
  165. color = 'red' if show_major else None
  166. else:
  167. color = 'blue' if show_minor else None
  168. if color:
  169. timeline_x.append(rec.start)
  170. timeline_y.append(pause)
  171. timeline_c.append(color)
  172. for i in range(100):
  173. time = (1.3)**(i+6)
  174. prev_time = 0 if i==0 else (1.3)**(i+5)
  175. if len(bin_names) <= i:
  176. bin_data_minor.append(0)
  177. bin_data_both.append(0)
  178. bin_data_major.append(0)
  179. bin_names.append('%d-%dms' % (int(prev_time), int(time)))
  180. if pause <= time:
  181. if rec.major_work:
  182. if rec.minor_work:
  183. bin_data_both[i] += pause
  184. else:
  185. bin_data_major[i] += pause
  186. else:
  187. bin_data_minor[i] += pause
  188. break
  189. bin_data_minor=np.array(bin_data_minor)
  190. bin_data_both=np.array(bin_data_both)
  191. bin_data_major=np.array(bin_data_major)
  192. if show_scatter:
  193. plt.scatter(timeline_x, timeline_y, c=timeline_c)
  194. else:
  195. if show_minor:
  196. plt.bar(range(len(bin_data_minor)), bin_data_minor, color='blue', label="minor") #, align='center')
  197. plt.bar(range(len(bin_data_both)), bin_data_both, bottom=bin_data_minor, color='purple', label="minor & major")
  198. if show_major:
  199. plt.bar(range(len(bin_data_major)), bin_data_major, bottom=(bin_data_minor+bin_data_both), color='red', label="only major")
  200. else:
  201. plt.bar(range(len(bin_data_both)), bin_data_both, color='purple', label="minor & major")
  202. plt.bar(range(len(bin_data_major)), bin_data_major, bottom=bin_data_both, color='red')
  203. plt.xticks(range(len(bin_names)), bin_names)
  204. plt.ylabel('Cumulative time spent in GC pauses (ms)')
  205. plt.xlabel('GC pause length')
  206. plt.xticks(rotation=60)
  207. plt.legend(loc='upper left')
  208. else:
  209. major_gc_event_groups = parse_major_gcs(data)
  210. def bar(**kwargs):
  211. indices = kwargs['indices']
  212. pauses = kwargs['pauses']
  213. color = kwargs['color']
  214. if 'bottom' in kwargs:
  215. bottom = kwargs['bottom']
  216. else:
  217. bottom = 0
  218. plt.bar(
  219. [index for index in indices if pauses[index] is not None],
  220. np.array([pause for pause in pauses if pause is not None]),
  221. color=color,
  222. bottom=bottom,
  223. )
  224. indices = np.arange(len(major_gc_event_groups))
  225. start_pauses = [
  226. event_group.start.stop - event_group.start.start
  227. if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
  228. for event_group in major_gc_event_groups
  229. ]
  230. bar(
  231. indices=indices,
  232. pauses=start_pauses,
  233. color='red',
  234. )
  235. update_pauses = [
  236. sum([
  237. update_event.stop - update_event.start
  238. for update_event in event_group.updates
  239. ]) if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
  240. for event_group in major_gc_event_groups
  241. ]
  242. bar(
  243. indices=indices,
  244. pauses=update_pauses,
  245. color='green',
  246. bottom=[pause for pause in start_pauses if pause is not None],
  247. )
  248. finish_pauses = [
  249. event_group.finish.stop - event_group.finish.start
  250. if isinstance(event_group, ConcurrentMajorGCEventGroup) else None
  251. for event_group in major_gc_event_groups
  252. ]
  253. start_update_pauses = [
  254. a + b
  255. for a, b in zip(start_pauses, update_pauses)
  256. if a is not None and b is not None
  257. ]
  258. bar(
  259. indices=indices,
  260. pauses=finish_pauses,
  261. color='blue',
  262. bottom=start_update_pauses,
  263. )
  264. full_pauses = [
  265. event_group.event.stop - event_group.event.start
  266. if isinstance(event_group, FullMajorGCEventGroup) else None
  267. for event_group in major_gc_event_groups
  268. ]
  269. bar(
  270. indices=indices,
  271. pauses=full_pauses,
  272. color='black',
  273. )
  274. plt.ylabel("Pause Time (ms)")
  275. plt.xlabel("Collection")
  276. plt.show()