framework_test.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887
  1. from benchmark.fortune_html_parser import FortuneHTMLParser
  2. from setup.linux import setup_util
  3. from benchmark.test_types import *
  4. import importlib
  5. import os
  6. import subprocess
  7. import time
  8. import re
  9. from pprint import pprint
  10. import sys
  11. import traceback
  12. import json
  13. import logging
  14. import csv
  15. import shlex
  16. import math
  17. from collections import OrderedDict
  18. from threading import Thread
  19. from threading import Event
  20. from utils import header
  21. class FrameworkTest:
  22. headers_template = "-H 'Host: localhost' -H '{accept}' -H 'Connection: keep-alive'"
  23. # Used for test types that require no pipelining or query string params.
  24. concurrency_template = """
  25. echo ""
  26. echo "---------------------------------------------------------"
  27. echo " Running Primer {name}"
  28. echo " {wrk} {headers} -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  29. echo "---------------------------------------------------------"
  30. echo ""
  31. {wrk} {headers} -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  32. sleep 5
  33. echo ""
  34. echo "---------------------------------------------------------"
  35. echo " Running Warmup {name}"
  36. echo " {wrk} {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} \"http://{server_host}:{port}{url}\""
  37. echo "---------------------------------------------------------"
  38. echo ""
  39. {wrk} {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} "http://{server_host}:{port}{url}"
  40. sleep 5
  41. echo ""
  42. echo "---------------------------------------------------------"
  43. echo " Synchronizing time"
  44. echo "---------------------------------------------------------"
  45. echo ""
  46. ntpdate -s pool.ntp.org
  47. for c in {levels}
  48. do
  49. echo ""
  50. echo "---------------------------------------------------------"
  51. echo " Concurrency: $c for {name}"
  52. echo " {wrk} {headers} -d {duration} -c $c --timeout $c -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\""
  53. echo "---------------------------------------------------------"
  54. echo ""
  55. STARTTIME=$(date +"%s")
  56. {wrk} {headers} -d {duration} -c $c --timeout $c -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url}
  57. echo "STARTTIME $STARTTIME"
  58. echo "ENDTIME $(date +"%s")"
  59. sleep 2
  60. done
  61. """
  62. # Used for test types that require pipelining.
  63. pipeline_template = """
  64. echo ""
  65. echo "---------------------------------------------------------"
  66. echo " Running Primer {name}"
  67. echo " {wrk} {headers} -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  68. echo "---------------------------------------------------------"
  69. echo ""
  70. {wrk} {headers} -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  71. sleep 5
  72. echo ""
  73. echo "---------------------------------------------------------"
  74. echo " Running Warmup {name}"
  75. echo " {wrk} {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} \"http://{server_host}:{port}{url}\""
  76. echo "---------------------------------------------------------"
  77. echo ""
  78. {wrk} {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} "http://{server_host}:{port}{url}"
  79. sleep 5
  80. echo ""
  81. echo "---------------------------------------------------------"
  82. echo " Synchronizing time"
  83. echo "---------------------------------------------------------"
  84. echo ""
  85. ntpdate -s pool.ntp.org
  86. for c in {levels}
  87. do
  88. echo ""
  89. echo "---------------------------------------------------------"
  90. echo " Concurrency: $c for {name}"
  91. echo " {wrk} {headers} -d {duration} -c $c --timeout $c -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\" -s ~/pipeline.lua -- {pipeline}"
  92. echo "---------------------------------------------------------"
  93. echo ""
  94. STARTTIME=$(date +"%s")
  95. {wrk} {headers} -d {duration} -c $c --timeout $c -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url} -s ~/pipeline.lua -- {pipeline}
  96. echo "STARTTIME $STARTTIME"
  97. echo "ENDTIME $(date +"%s")"
  98. sleep 2
  99. done
  100. """
  101. # Used for test types that require a database -
  102. # These tests run at a static concurrency level and vary the size of
  103. # the query sent with each request
  104. query_template = """
  105. echo ""
  106. echo "---------------------------------------------------------"
  107. echo " Running Primer {name}"
  108. echo " wrk {headers} -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}2\""
  109. echo "---------------------------------------------------------"
  110. echo ""
  111. wrk {headers} -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}2"
  112. sleep 5
  113. echo ""
  114. echo "---------------------------------------------------------"
  115. echo " Running Warmup {name}"
  116. echo " wrk {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} \"http://{server_host}:{port}{url}2\""
  117. echo "---------------------------------------------------------"
  118. echo ""
  119. wrk {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} "http://{server_host}:{port}{url}2"
  120. sleep 5
  121. echo ""
  122. echo "---------------------------------------------------------"
  123. echo " Synchronizing time"
  124. echo "---------------------------------------------------------"
  125. echo ""
  126. ntpdate -s pool.ntp.org
  127. for c in {levels}
  128. do
  129. echo ""
  130. echo "---------------------------------------------------------"
  131. echo " Queries: $c for {name}"
  132. echo " wrk {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} \"http://{server_host}:{port}{url}$c\""
  133. echo "---------------------------------------------------------"
  134. echo ""
  135. STARTTIME=$(date +"%s")
  136. wrk {headers} -d {duration} -c {max_concurrency} --timeout {max_concurrency} -t {max_threads} "http://{server_host}:{port}{url}$c"
  137. echo "STARTTIME $STARTTIME"
  138. echo "ENDTIME $(date +"%s")"
  139. sleep 2
  140. done
  141. """
  142. ############################################################
  143. # start(benchmarker)
  144. # Start the test using it's setup file
  145. ############################################################
  146. def start(self, out, err):
  147. # Load profile for this installation
  148. profile="$FWROOT/config/benchmark_profile"
  149. # Setup variables for TROOT and IROOT
  150. setup_util.replace_environ(config=profile,
  151. command='export TROOT=%s && export IROOT=%s && export DBHOST=%s && export MAX_THREADS=%s && export OUT=%s && export ERR=%s' %
  152. (self.directory, self.install_root, self.database_host, self.benchmarker.threads, os.path.join(self.fwroot, out.name), os.path.join(self.fwroot, err.name)))
  153. # Run the module start (inside parent of TROOT)
  154. # - we use the parent as a historical accident - a lot of tests
  155. # use subprocess's cwd argument already
  156. previousDir = os.getcwd()
  157. os.chdir(os.path.dirname(self.troot))
  158. logging.info("Running setup module start (cwd=%s)", self.directory)
  159. # Run the start script for the test as the "testrunner" user.
  160. # This requires superuser privs, so `sudo` is necessary.
  161. # -u [username] The username
  162. # -E Preserves the current environment variables
  163. # -H Forces the home var (~) to be reset to the user specified
  164. # -e Force bash to exit on first error
  165. # -x Turn on bash tracing e.g. print commands before running
  166. # Note: check_call is a blocking call, so any startup scripts
  167. # run by the framework that need to continue (read: server has
  168. # started and needs to remain that way), then they should be
  169. # executed in the background.
  170. command = 'sudo -u %s -E -H bash -ex %s.sh' % (self.benchmarker.runner_user, self.setup_file)
  171. debug_command = '''\
  172. export FWROOT=%s && \\
  173. export TROOT=%s && \\
  174. export IROOT=%s && \\
  175. export DBHOST=%s && \\
  176. export MAX_THREADS=%s && \\
  177. export OUT=%s && \\
  178. export ERR=%s && \\
  179. cd %s && \\
  180. %s''' % (self.fwroot,
  181. self.directory,
  182. self.install_root,
  183. self.database_host,
  184. self.benchmarker.threads,
  185. os.path.join(self.fwroot, out.name),
  186. os.path.join(self.fwroot, err.name),
  187. self.directory,
  188. command)
  189. logging.info("To run framework manually, copy/paste this:\n%s", debug_command)
  190. '''
  191. # Write the stderr to our temp.txt file to be read and fed back
  192. # to the user via logging later.
  193. with open('temp', 'w') as errout:
  194. try:
  195. subprocess.check_call(command, cwd=self.directory,
  196. shell=True, stderr=errout, stdout=out)
  197. retcode = 0
  198. except Exception:
  199. logging.exception("Failure running setup.sh")
  200. retcode = 1
  201. with open('temp', 'r') as errout:
  202. # Read out temp error output in its entirety
  203. body = errout.read()
  204. if len(body) > 0:
  205. # Log it to the user.
  206. logging.error(body)
  207. # Log it to our err.txt file
  208. err.write(body)
  209. # We are done with our temp file - delete it
  210. os.remove('temp')
  211. '''
  212. # Run setup.sh
  213. # Printing output until subprocess terminates
  214. class RunSetup(Thread):
  215. def __init__(self, command, directory, stopFlag, setupFinished, name):
  216. Thread.__init__(self)
  217. self.command = command
  218. self.directory = directory
  219. self.stopFlag = stopFlag
  220. self.setupFinished = setupFinished
  221. self.setupResult = None
  222. self.name = name
  223. self.prefix = "Setup %s: " % self.name
  224. def get_setup_result(self):
  225. return self.setupResult
  226. def _output(self, line):
  227. # Log to current terminal
  228. # Needs to be one atomic write, so we join because
  229. # list operations are faster than string concat
  230. sys.stdout.write("".join([self.prefix, line]))
  231. sys.stdout.flush()
  232. # logging.error(body)
  233. # Goal: Stream output of both benchmark toolset and
  234. # server to the console and to a file
  235. # Problem: Capturing output of subprocess and children
  236. # Solution: Use pipes provided by python
  237. # Future-proof: Add unit tests that ensure this code works in all situations
  238. def run(self):
  239. # Run in setup.sh in background, using line buffered output and PIPEs
  240. p = subprocess.Popen(self.command, cwd=self.directory,
  241. shell=True, stdout=subprocess.PIPE, bufsize=1,
  242. stderr=subprocess.STDOUT)
  243. # Flush output until setup.sh process dies. Note that
  244. # the child processes forked using & will still be alive
  245. # and directing their output to the pipes
  246. while p.poll() is None:
  247. line = p.stdout.readline()
  248. if line:
  249. self._output(line)
  250. else:
  251. time.sleep(0.5)
  252. self._output("Exited with code %s\n" % p.returncode)
  253. self.prefix = "Server %s: " % self.name
  254. self.setupResult = p.returncode
  255. self.setupFinished.set()
  256. # Setup.sh process has terminated, now we watch for output from
  257. # child processes until the framework_test#stop is called.
  258. # (This also captures any remaining output that may have happened
  259. # between readline and poll)
  260. while self.stopFlag.wait(0.5):
  261. line = p.stdout.readline()
  262. if line:
  263. self._output(line)
  264. # Grab any remaining output
  265. pout = p.communicate()[0]
  266. for line in pout.splitlines():
  267. self._output(line)
  268. '''
  269. from threading import Event
  270. from threading import Thread
  271. import subprocess
  272. import sys
  273. import time
  274. command = 'sudo -u testrunner -E -H bash -e setup.sh'
  275. directory='/home/vagrant/FrameworkBenchmarks/frameworks/CSharp/evhttp-sharp'
  276. stopFlag = Event()
  277. setup_thread = RunSetup(command, directory, stopFlag, 'evhttp')
  278. # setup_thread.start()
  279. '''
  280. self.stopFlag = Event()
  281. setupFinished = Event()
  282. setup_thread = RunSetup(command, self.directory, self.stopFlag, setupFinished, self.name)
  283. setup_thread.start()
  284. # We can return once the port is bound or the setup process is dead
  285. while not setupFinished.wait(5):
  286. sys.stderr.write("Waiting for start to return...\n")
  287. logging.info("Executed %s.sh, returning %s", self.setup_file, setup_thread.get_setup_result())
  288. os.chdir(previousDir)
  289. return setup_thread.get_setup_result()
  290. ############################################################
  291. # End start
  292. ############################################################
  293. def stop(self, out, err):
  294. if self.stopFlag:
  295. self.stopFlag.set()
  296. ############################################################
  297. # verify_urls
  298. # Verifys each of the URLs for this test. THis will sinply
  299. # curl the URL and check for it's return status.
  300. # For each url, a flag will be set on this object for whether
  301. # or not it passed
  302. # Returns True if all verifications succeeded
  303. ############################################################
  304. def verify_urls(self, out, err):
  305. result = True
  306. def verify_type(test_type):
  307. test = self.runTests[test_type]
  308. test.setup_out_err(out, err)
  309. out.write(header("VERIFYING %s" % test_type.upper()))
  310. base_url = "http://%s:%s" % (self.benchmarker.server_host, self.port)
  311. try:
  312. results = test.verify(base_url)
  313. except Exception as e:
  314. results = [('fail',"""Caused Exception in TFB
  315. This almost certainly means your return value is incorrect,
  316. but also that you have found a bug. Please submit an issue
  317. including this message: %s\n%s""" % (e, traceback.format_exc()),
  318. base_url)]
  319. logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
  320. traceback.format_exc()
  321. test.failed = any(result is 'fail' for (result, reason, url) in results)
  322. test.warned = any(result is 'warn' for (result, reason, url) in results)
  323. test.passed = all(result is 'pass' for (result, reason, url) in results)
  324. def output_result(result, reason, url):
  325. out.write(" %s for %s\n" % (result.upper(), url))
  326. print " %s for %s" % (result.upper(), url)
  327. if reason is not None and len(reason) != 0:
  328. for line in reason.splitlines():
  329. out.write(" " + line + '\n')
  330. print " " + line
  331. [output_result(r1,r2,url) for (r1, r2, url) in results]
  332. if test.failed:
  333. self.benchmarker.report_verify_results(self, test_type, 'fail')
  334. elif test.warned:
  335. self.benchmarker.report_verify_results(self, test_type, 'warn')
  336. elif test.passed:
  337. self.benchmarker.report_verify_results(self, test_type, 'pass')
  338. else:
  339. raise Exception("Unknown error - test did not pass,warn,or fail")
  340. result = True
  341. for test_type in self.runTests:
  342. verify_type(test_type)
  343. if self.runTests[test_type].failed:
  344. result = False
  345. return result
  346. ############################################################
  347. # End verify_urls
  348. ############################################################
  349. ############################################################
  350. # benchmark
  351. # Runs the benchmark for each type of test that it implements
  352. # JSON/DB/Query.
  353. ############################################################
  354. def benchmark(self, out, err):
  355. def benchmark_type(test_type):
  356. out.write("BENCHMARKING %s ... " % test_type.upper())
  357. test = self.runTests[test_type]
  358. test.setup_out_err(out, err)
  359. output_file = self.benchmarker.output_file(self.name, test_type)
  360. if not os.path.exists(output_file):
  361. # Open to create the empty file
  362. with open(output_file, 'w'):
  363. pass
  364. if not test.failed:
  365. if test_type == 'plaintext': # One special case
  366. remote_script = self.__generate_pipeline_script(test.get_url(), self.port, test.accept_header)
  367. elif test_type == 'query' or test_type == 'update':
  368. remote_script = self.__generate_query_script(test.get_url(), self.port, test.accept_header)
  369. else:
  370. remote_script = self.__generate_concurrency_script(test.get_url(), self.port, test.accept_header)
  371. # Begin resource usage metrics collection
  372. self.__begin_logging(test_type)
  373. # Run the benchmark
  374. with open(output_file, 'w') as raw_file:
  375. p = subprocess.Popen(self.benchmarker.client_ssh_string.split(" "), stdin=subprocess.PIPE, stdout=raw_file, stderr=err)
  376. p.communicate(remote_script)
  377. err.flush()
  378. # End resource usage metrics collection
  379. self.__end_logging()
  380. results = self.__parse_test(test_type)
  381. print "Benchmark results:"
  382. pprint(results)
  383. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  384. out.write( "Complete\n" )
  385. out.flush()
  386. for test_type in self.runTests:
  387. benchmark_type(test_type)
  388. ############################################################
  389. # End benchmark
  390. ############################################################
  391. ############################################################
  392. # parse_all
  393. # Method meant to be run for a given timestamp
  394. ############################################################
  395. def parse_all(self):
  396. for test_type in self.runTests:
  397. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  398. results = self.__parse_test(test_type)
  399. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  400. ############################################################
  401. # __parse_test(test_type)
  402. ############################################################
  403. def __parse_test(self, test_type):
  404. try:
  405. results = dict()
  406. results['results'] = []
  407. stats = []
  408. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  409. with open(self.benchmarker.output_file(self.name, test_type)) as raw_data:
  410. is_warmup = True
  411. rawData = None
  412. for line in raw_data:
  413. if "Queries:" in line or "Concurrency:" in line:
  414. is_warmup = False
  415. rawData = None
  416. continue
  417. if "Warmup" in line or "Primer" in line:
  418. is_warmup = True
  419. continue
  420. if not is_warmup:
  421. if rawData == None:
  422. rawData = dict()
  423. results['results'].append(rawData)
  424. #if "Requests/sec:" in line:
  425. # m = re.search("Requests/sec:\s+([0-9]+)", line)
  426. # rawData['reportedResults'] = m.group(1)
  427. # search for weighttp data such as succeeded and failed.
  428. if "Latency" in line:
  429. m = re.findall("([0-9]+\.*[0-9]*[us|ms|s|m|%]+)", line)
  430. if len(m) == 4:
  431. rawData['latencyAvg'] = m[0]
  432. rawData['latencyStdev'] = m[1]
  433. rawData['latencyMax'] = m[2]
  434. # rawData['latencyStdevPercent'] = m[3]
  435. #if "Req/Sec" in line:
  436. # m = re.findall("([0-9]+\.*[0-9]*[k|%]*)", line)
  437. # if len(m) == 4:
  438. # rawData['requestsAvg'] = m[0]
  439. # rawData['requestsStdev'] = m[1]
  440. # rawData['requestsMax'] = m[2]
  441. # rawData['requestsStdevPercent'] = m[3]
  442. #if "requests in" in line:
  443. # m = re.search("requests in ([0-9]+\.*[0-9]*[ms|s|m|h]+)", line)
  444. # if m != None:
  445. # # parse out the raw time, which may be in minutes or seconds
  446. # raw_time = m.group(1)
  447. # if "ms" in raw_time:
  448. # rawData['total_time'] = float(raw_time[:len(raw_time)-2]) / 1000.0
  449. # elif "s" in raw_time:
  450. # rawData['total_time'] = float(raw_time[:len(raw_time)-1])
  451. # elif "m" in raw_time:
  452. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 60.0
  453. # elif "h" in raw_time:
  454. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 3600.0
  455. if "requests in" in line:
  456. m = re.search("([0-9]+) requests in", line)
  457. if m != None:
  458. rawData['totalRequests'] = int(m.group(1))
  459. if "Socket errors" in line:
  460. if "connect" in line:
  461. m = re.search("connect ([0-9]+)", line)
  462. rawData['connect'] = int(m.group(1))
  463. if "read" in line:
  464. m = re.search("read ([0-9]+)", line)
  465. rawData['read'] = int(m.group(1))
  466. if "write" in line:
  467. m = re.search("write ([0-9]+)", line)
  468. rawData['write'] = int(m.group(1))
  469. if "timeout" in line:
  470. m = re.search("timeout ([0-9]+)", line)
  471. rawData['timeout'] = int(m.group(1))
  472. if "Non-2xx" in line:
  473. m = re.search("Non-2xx or 3xx responses: ([0-9]+)", line)
  474. if m != None:
  475. rawData['5xx'] = int(m.group(1))
  476. if "STARTTIME" in line:
  477. m = re.search("[0-9]+", line)
  478. rawData["startTime"] = int(m.group(0))
  479. if "ENDTIME" in line:
  480. m = re.search("[0-9]+", line)
  481. rawData["endTime"] = int(m.group(0))
  482. test_stats = self.__parse_stats(test_type, rawData["startTime"], rawData["endTime"], 1)
  483. # rawData["averageStats"] = self.__calculate_average_stats(test_stats)
  484. stats.append(test_stats)
  485. with open(self.benchmarker.stats_file(self.name, test_type) + ".json", "w") as stats_file:
  486. json.dump(stats, stats_file, indent=2)
  487. return results
  488. except IOError:
  489. return None
  490. ############################################################
  491. # End benchmark
  492. ############################################################
  493. ##########################################################################################
  494. # Private Methods
  495. ##########################################################################################
  496. ############################################################
  497. # __generate_concurrency_script(url, port)
  498. # Generates the string containing the bash script that will
  499. # be run on the client to benchmark a single test. This
  500. # specifically works for the variable concurrency tests (JSON
  501. # and DB)
  502. ############################################################
  503. def __generate_concurrency_script(self, url, port, accept_header, wrk_command="wrk"):
  504. headers = self.headers_template.format(accept=accept_header)
  505. return self.concurrency_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  506. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  507. levels=" ".join("{}".format(item) for item in self.benchmarker.concurrency_levels),
  508. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command)
  509. ############################################################
  510. # __generate_pipeline_script(url, port)
  511. # Generates the string containing the bash script that will
  512. # be run on the client to benchmark a single pipeline test.
  513. ############################################################
  514. def __generate_pipeline_script(self, url, port, accept_header, wrk_command="wrk"):
  515. headers = self.headers_template.format(accept=accept_header)
  516. return self.pipeline_template.format(max_concurrency=16384,
  517. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  518. levels=" ".join("{}".format(item) for item in [256,1024,4096,16384]),
  519. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command,
  520. pipeline=16)
  521. ############################################################
  522. # __generate_query_script(url, port)
  523. # Generates the string containing the bash script that will
  524. # be run on the client to benchmark a single test. This
  525. # specifically works for the variable query tests (Query)
  526. ############################################################
  527. def __generate_query_script(self, url, port, accept_header):
  528. headers = self.headers_template.format(accept=accept_header)
  529. return self.query_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  530. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  531. levels=" ".join("{}".format(item) for item in self.benchmarker.query_levels),
  532. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers)
  533. ############################################################
  534. # Returns True if any test type this this framework test will use a DB
  535. ############################################################
  536. def requires_database(self):
  537. '''Returns True/False if this test requires a database'''
  538. return any(tobj.requires_db for (ttype,tobj) in self.runTests.iteritems())
  539. ############################################################
  540. # __begin_logging
  541. # Starts a thread to monitor the resource usage, to be synced with the client's time
  542. # TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  543. ############################################################
  544. def __begin_logging(self, test_type):
  545. output_file = "{file_name}".format(file_name=self.benchmarker.get_stats_file(self.name, test_type))
  546. dstat_string = "dstat -afilmprsT --aio --fs --ipc --lock --raw --socket --tcp \
  547. --raw --socket --tcp --udp --unix --vm --disk-util \
  548. --rpc --rpcd --output {output_file}".format(output_file=output_file)
  549. cmd = shlex.split(dstat_string)
  550. dev_null = open(os.devnull, "w")
  551. self.subprocess_handle = subprocess.Popen(cmd, stdout=dev_null)
  552. ##############################################################
  553. # Begin __end_logging
  554. # Stops the logger thread and blocks until shutdown is complete.
  555. ##############################################################
  556. def __end_logging(self):
  557. self.subprocess_handle.terminate()
  558. self.subprocess_handle.communicate()
  559. ##############################################################
  560. # Begin __parse_stats
  561. # For each test type, process all the statistics, and return a multi-layered dictionary
  562. # that has a structure as follows:
  563. # (timestamp)
  564. # | (main header) - group that the stat is in
  565. # | | (sub header) - title of the stat
  566. # | | | (stat) - the stat itself, usually a floating point number
  567. ##############################################################
  568. def __parse_stats(self, test_type, start_time, end_time, interval):
  569. stats_dict = dict()
  570. stats_file = self.benchmarker.stats_file(self.name, test_type)
  571. with open(stats_file) as stats:
  572. while(stats.next() != "\n"): # dstat doesn't output a completely compliant CSV file - we need to strip the header
  573. pass
  574. stats_reader = csv.reader(stats)
  575. main_header = stats_reader.next()
  576. sub_header = stats_reader.next()
  577. time_row = sub_header.index("epoch")
  578. int_counter = 0
  579. for row in stats_reader:
  580. time = float(row[time_row])
  581. int_counter+=1
  582. if time < start_time:
  583. continue
  584. elif time > end_time:
  585. return stats_dict
  586. if int_counter % interval != 0:
  587. continue
  588. row_dict = dict()
  589. for nextheader in main_header:
  590. if nextheader != "":
  591. row_dict[nextheader] = dict()
  592. header = ""
  593. for item_num, column in enumerate(row):
  594. if(len(main_header[item_num]) != 0):
  595. header = main_header[item_num]
  596. row_dict[header][sub_header[item_num]] = float(column) # all the stats are numbers, so we want to make sure that they stay that way in json
  597. stats_dict[time] = row_dict
  598. return stats_dict
  599. ##############################################################
  600. # End __parse_stats
  601. ##############################################################
  602. def __getattr__(self, name):
  603. """For backwards compatibility, we used to pass benchmarker
  604. as the argument to the setup.sh files"""
  605. try:
  606. x = getattr(self.benchmarker, name)
  607. except AttributeError:
  608. print "AttributeError: %s not a member of FrameworkTest or Benchmarker" % name
  609. print "This is probably a bug"
  610. raise
  611. return x
  612. ##############################################################
  613. # Begin __calculate_average_stats
  614. # We have a large amount of raw data for the statistics that
  615. # may be useful for the stats nerds, but most people care about
  616. # a couple of numbers. For now, we're only going to supply:
  617. # * Average CPU
  618. # * Average Memory
  619. # * Total network use
  620. # * Total disk use
  621. # More may be added in the future. If they are, please update
  622. # the above list.
  623. # Note: raw_stats is directly from the __parse_stats method.
  624. # Recall that this consists of a dictionary of timestamps,
  625. # each of which contain a dictionary of stat categories which
  626. # contain a dictionary of stats
  627. ##############################################################
  628. def __calculate_average_stats(self, raw_stats):
  629. raw_stat_collection = dict()
  630. for timestamp, time_dict in raw_stats.items():
  631. for main_header, sub_headers in time_dict.items():
  632. item_to_append = None
  633. if 'cpu' in main_header:
  634. # We want to take the idl stat and subtract it from 100
  635. # to get the time that the CPU is NOT idle.
  636. item_to_append = sub_headers['idl'] - 100.0
  637. elif main_header == 'memory usage':
  638. item_to_append = sub_headers['used']
  639. elif 'net' in main_header:
  640. # Network stats have two parts - recieve and send. We'll use a tuple of
  641. # style (recieve, send)
  642. item_to_append = (sub_headers['recv'], sub_headers['send'])
  643. elif 'dsk' or 'io' in main_header:
  644. # Similar for network, except our tuple looks like (read, write)
  645. item_to_append = (sub_headers['read'], sub_headers['writ'])
  646. if item_to_append is not None:
  647. if main_header not in raw_stat_collection:
  648. raw_stat_collection[main_header] = list()
  649. raw_stat_collection[main_header].append(item_to_append)
  650. # Simple function to determine human readable size
  651. # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
  652. def sizeof_fmt(num):
  653. # We'll assume that any number we get is convertable to a float, just in case
  654. num = float(num)
  655. for x in ['bytes','KB','MB','GB']:
  656. if num < 1024.0 and num > -1024.0:
  657. return "%3.1f%s" % (num, x)
  658. num /= 1024.0
  659. return "%3.1f%s" % (num, 'TB')
  660. # Now we have our raw stats in a readable format - we need to format it for display
  661. # We need a floating point sum, so the built in sum doesn't cut it
  662. display_stat_collection = dict()
  663. for header, values in raw_stat_collection.items():
  664. display_stat = None
  665. if 'cpu' in header:
  666. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  667. elif main_header == 'memory usage':
  668. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  669. elif 'net' in main_header:
  670. receive, send = zip(*values) # unzip
  671. display_stat = {'receive': sizeof_fmt(math.fsum(receive)), 'send': sizeof_fmt(math.fsum(send))}
  672. else: # if 'dsk' or 'io' in header:
  673. read, write = zip(*values) # unzip
  674. display_stat = {'read': sizeof_fmt(math.fsum(read)), 'write': sizeof_fmt(math.fsum(write))}
  675. display_stat_collection[header] = display_stat
  676. return display_stat
  677. ###########################################################################################
  678. # End __calculate_average_stats
  679. #########################################################################################
  680. ##########################################################################################
  681. # Constructor
  682. ##########################################################################################
  683. def __init__(self, name, directory, benchmarker, runTests, args):
  684. self.name = name
  685. self.directory = directory
  686. self.benchmarker = benchmarker
  687. self.runTests = runTests
  688. self.fwroot = benchmarker.fwroot
  689. self.approach = ""
  690. self.classification = ""
  691. self.database = ""
  692. self.framework = ""
  693. self.language = ""
  694. self.orm = ""
  695. self.platform = ""
  696. self.webserver = ""
  697. self.os = ""
  698. self.database_os = ""
  699. self.display_name = ""
  700. self.notes = ""
  701. self.versus = ""
  702. # setup logging
  703. logging.basicConfig(stream=sys.stderr, level=logging.INFO)
  704. self.install_root="%s/%s" % (self.fwroot, "installs")
  705. if benchmarker.install_strategy is 'pertest':
  706. self.install_root="%s/pertest/%s" % (self.install_root, name)
  707. # Used in setup.sh scripts for consistency with
  708. # the bash environment variables
  709. self.troot = self.directory
  710. self.iroot = self.install_root
  711. self.__dict__.update(args)
  712. ############################################################
  713. # End __init__
  714. ############################################################
  715. ############################################################
  716. # End FrameworkTest
  717. ############################################################
  718. ##########################################################################################
  719. # Static methods
  720. ##########################################################################################
  721. ##############################################################
  722. # parse_config(config, directory, benchmarker)
  723. # parses a config file and returns a list of FrameworkTest
  724. # objects based on that config file.
  725. ##############################################################
  726. def parse_config(config, directory, benchmarker):
  727. tests = []
  728. # This sort ordering is set up specifically to return the length
  729. # of the test name. There were SO many problems involved with
  730. # 'plaintext' being run first (rather, just not last) that we
  731. # needed to ensure that it was run last for every framework.
  732. def testOrder(type_name):
  733. return len(type_name)
  734. # The config object can specify multiple tests
  735. # Loop over them and parse each into a FrameworkTest
  736. for test in config['tests']:
  737. names = [name for (name,keys) in test.iteritems()]
  738. if "default" not in names:
  739. logging.warn("Framework %s does not define a default test in benchmark_config", config['framework'])
  740. for test_name, test_keys in test.iteritems():
  741. # Prefix all test names with framework except 'default' test
  742. if test_name == 'default':
  743. test_name = config['framework']
  744. else:
  745. test_name = "%s-%s" % (config['framework'], test_name)
  746. # Ensure FrameworkTest.framework is available
  747. if not test_keys['framework']:
  748. test_keys['framework'] = config['framework']
  749. #if test_keys['framework'].lower() != config['framework'].lower():
  750. # print Exception("benchmark_config for test %s is invalid - test framework '%s' must match benchmark_config framework '%s'" %
  751. # (test_name, test_keys['framework'], config['framework']))
  752. # Confirm required keys are present
  753. # TODO have a TechEmpower person confirm this list - I don't know what the website requires....
  754. required = ['language','webserver','classification','database','approach','orm','framework','os','database_os']
  755. if not all (key in test_keys for key in required):
  756. raise Exception("benchmark_config for test %s is invalid - missing required keys" % test_name)
  757. # Map test type to a parsed FrameworkTestType object
  758. runTests = dict()
  759. for type_name, type_obj in benchmarker.types.iteritems():
  760. try:
  761. runTests[type_name] = type_obj.copy().parse(test_keys)
  762. except AttributeError as ae:
  763. # This is quite common - most tests don't support all types
  764. # Quitely log it and move on (debug logging is on in travis and this causes
  765. # ~1500 lines of debug, so I'm totally ignoring it for now
  766. # logging.debug("Missing arguments for test type %s for framework test %s", type_name, test_name)
  767. pass
  768. # We need to sort by test_type to run
  769. sortedTestKeys = sorted(runTests.keys(), key=testOrder)
  770. sortedRunTests = OrderedDict()
  771. for sortedTestKey in sortedTestKeys:
  772. sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
  773. # By passing the entire set of keys, each FrameworkTest will have a member for each key
  774. tests.append(FrameworkTest(test_name, directory, benchmarker, sortedRunTests, test_keys))
  775. return tests
  776. ##############################################################
  777. # End parse_config
  778. ##############################################################