framework_test.py 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. from benchmark.fortune_html_parser import FortuneHTMLParser
  2. from setup.linux import setup_util
  3. from benchmark.test_types import *
  4. import importlib
  5. import os
  6. import subprocess
  7. import time
  8. import re
  9. from pprint import pprint
  10. import sys
  11. import traceback
  12. import json
  13. import logging
  14. import csv
  15. import shlex
  16. import math
  17. from collections import OrderedDict
  18. from requests import ConnectionError
  19. from threading import Thread
  20. from threading import Event
  21. from utils import header
  22. # Cross-platform colored text
  23. from colorama import Fore, Back, Style
  24. from datetime import datetime
  25. from datetime import timedelta
  26. class FrameworkTest:
  27. headers_template = "-H 'Host: localhost' -H '{accept}' -H 'Connection: keep-alive'"
  28. # Used for test types that require no pipelining or query string params.
  29. concurrency_template = """
  30. echo ""
  31. echo "---------------------------------------------------------"
  32. echo " Running Primer {name}"
  33. echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  34. echo "---------------------------------------------------------"
  35. echo ""
  36. {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  37. sleep 5
  38. echo ""
  39. echo "---------------------------------------------------------"
  40. echo " Running Warmup {name}"
  41. echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}\""
  42. echo "---------------------------------------------------------"
  43. echo ""
  44. {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}"
  45. sleep 5
  46. echo ""
  47. echo "---------------------------------------------------------"
  48. echo " Synchronizing time"
  49. echo "---------------------------------------------------------"
  50. echo ""
  51. ntpdate -s pool.ntp.org
  52. for c in {levels}
  53. do
  54. echo ""
  55. echo "---------------------------------------------------------"
  56. echo " Concurrency: $c for {name}"
  57. echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\""
  58. echo "---------------------------------------------------------"
  59. echo ""
  60. STARTTIME=$(date +"%s")
  61. {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url}
  62. echo "STARTTIME $STARTTIME"
  63. echo "ENDTIME $(date +"%s")"
  64. sleep 2
  65. done
  66. """
  67. # Used for test types that require pipelining.
  68. pipeline_template = """
  69. echo ""
  70. echo "---------------------------------------------------------"
  71. echo " Running Primer {name}"
  72. echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  73. echo "---------------------------------------------------------"
  74. echo ""
  75. {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  76. sleep 5
  77. echo ""
  78. echo "---------------------------------------------------------"
  79. echo " Running Warmup {name}"
  80. echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}\""
  81. echo "---------------------------------------------------------"
  82. echo ""
  83. {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}"
  84. sleep 5
  85. echo ""
  86. echo "---------------------------------------------------------"
  87. echo " Synchronizing time"
  88. echo "---------------------------------------------------------"
  89. echo ""
  90. ntpdate -s pool.ntp.org
  91. for c in {levels}
  92. do
  93. echo ""
  94. echo "---------------------------------------------------------"
  95. echo " Concurrency: $c for {name}"
  96. echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\" -s ~/pipeline.lua -- {pipeline}"
  97. echo "---------------------------------------------------------"
  98. echo ""
  99. STARTTIME=$(date +"%s")
  100. {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url} -s ~/pipeline.lua -- {pipeline}
  101. echo "STARTTIME $STARTTIME"
  102. echo "ENDTIME $(date +"%s")"
  103. sleep 2
  104. done
  105. """
  106. # Used for test types that require a database -
  107. # These tests run at a static concurrency level and vary the size of
  108. # the query sent with each request
  109. query_template = """
  110. echo ""
  111. echo "---------------------------------------------------------"
  112. echo " Running Primer {name}"
  113. echo " wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}2\""
  114. echo "---------------------------------------------------------"
  115. echo ""
  116. wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}2"
  117. sleep 5
  118. echo ""
  119. echo "---------------------------------------------------------"
  120. echo " Running Warmup {name}"
  121. echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}2\""
  122. echo "---------------------------------------------------------"
  123. echo ""
  124. wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}2"
  125. sleep 5
  126. echo ""
  127. echo "---------------------------------------------------------"
  128. echo " Synchronizing time"
  129. echo "---------------------------------------------------------"
  130. echo ""
  131. ntpdate -s pool.ntp.org
  132. for c in {levels}
  133. do
  134. echo ""
  135. echo "---------------------------------------------------------"
  136. echo " Queries: $c for {name}"
  137. echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}$c\""
  138. echo "---------------------------------------------------------"
  139. echo ""
  140. STARTTIME=$(date +"%s")
  141. wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}$c"
  142. echo "STARTTIME $STARTTIME"
  143. echo "ENDTIME $(date +"%s")"
  144. sleep 2
  145. done
  146. """
  147. ############################################################
  148. # start(benchmarker)
  149. # Start the test using its setup file
  150. ############################################################
  151. def start(self, out):
  152. # Setup environment variables
  153. logDir = os.path.join(self.fwroot, self.benchmarker.latest_results_directory, 'logs', self.name.lower())
  154. bash_functions_path= os.path.join(self.fwroot, 'toolset/setup/linux/bash_functions.sh')
  155. setup_util.replace_environ(config='$FWROOT/config/benchmark_profile',
  156. command='''\
  157. export TROOT=%s && \
  158. export IROOT=%s && \
  159. export DBHOST=%s && \
  160. export LOGDIR=%s && \
  161. export MAX_THREADS=%s && \
  162. export MAX_CONCURRENCY=%s \
  163. ''' % (
  164. self.directory,
  165. self.install_root,
  166. self.database_host,
  167. logDir,
  168. self.benchmarker.threads,
  169. max(self.benchmarker.concurrency_levels)))
  170. # Always ensure that IROOT belongs to the runner_user
  171. if not os.path.exists(self.install_root):
  172. os.mkdir(self.install_root)
  173. chown = "sudo chown -R %s:%s %s" % (self.benchmarker.runner_user,
  174. self.benchmarker.runner_user, os.path.join(self.fwroot, self.install_root))
  175. subprocess.check_call(chown, shell=True, cwd=self.fwroot, executable='/bin/bash')
  176. # Run the module start inside parent of TROOT
  177. # - we use the parent as a historical accident, a number of tests
  178. # refer to their TROOT maually still
  179. previousDir = os.getcwd()
  180. os.chdir(os.path.dirname(self.troot))
  181. logging.info("Running setup module start (cwd=%s)", self.directory)
  182. # Run the start script for the test as the "testrunner" user
  183. #
  184. # `sudo` - Switching user requires superuser privs
  185. # -u [username] The username
  186. # -E Preserves the current environment variables
  187. # -H Forces the home var (~) to be reset to the user specified
  188. # `stdbuf` - Disable buffering, send output to python ASAP
  189. # -o0 zero-sized buffer for stdout
  190. # -e0 zero-sized buffer for stderr
  191. # `bash` - Run the setup.sh script using bash
  192. # -e Force bash to exit on first error
  193. # -x Turn on bash tracing e.g. print commands before running
  194. #
  195. # Most servers do not output to stdout/stderr while serving
  196. # requests so there is no performance hit from disabling
  197. # output buffering. This disabling is necessary to
  198. # a) allow TFB to show output in real time and b) avoid loosing
  199. # output in the buffer when the testrunner processes are forcibly
  200. # killed
  201. #
  202. # See http://www.pixelbeat.org/programming/stdio_buffering/
  203. # See https://blogs.gnome.org/markmc/2013/06/04/async-io-and-python/
  204. # See http://eyalarubas.com/python-subproc-nonblock.html
  205. command = 'sudo -u %s -E -H stdbuf -o0 -e0 bash -exc "source %s && source %s.sh"' % (
  206. self.benchmarker.runner_user,
  207. bash_functions_path,
  208. os.path.join(self.troot, self.setup_file))
  209. debug_command = '''\
  210. export FWROOT=%s && \\
  211. export TROOT=%s && \\
  212. export IROOT=%s && \\
  213. export DBHOST=%s && \\
  214. export LOGDIR=%s && \\
  215. export MAX_THREADS=%s && \\
  216. export MAX_CONCURRENCY=%s && \\
  217. cd %s && \\
  218. %s''' % (self.fwroot,
  219. self.directory,
  220. self.install_root,
  221. self.database_host,
  222. logDir,
  223. self.benchmarker.threads,
  224. max(self.benchmarker.concurrency_levels),
  225. self.directory,
  226. command)
  227. logging.info("To run %s manually, copy/paste this:\n%s", self.name, debug_command)
  228. def tee_output(prefix, line):
  229. # Needs to be one atomic write
  230. # Explicitly use UTF-8 as it's the most common framework output
  231. # TODO improve encoding handling
  232. line = prefix.encode('utf-8') + line
  233. # Log to current terminal
  234. sys.stdout.write(line)
  235. sys.stdout.flush()
  236. # logging.error("".join([prefix, line]))
  237. out.write(line)
  238. out.flush()
  239. # Start the setup.sh command
  240. p = subprocess.Popen(command, cwd=self.directory,
  241. shell=True, stdout=subprocess.PIPE,
  242. stderr=subprocess.STDOUT)
  243. nbsr = setup_util.NonBlockingStreamReader(p.stdout,
  244. "%s: %s.sh and framework processes have terminated" % (self.name, self.setup_file))
  245. # Set a limit on total execution time of setup.sh
  246. timeout = datetime.now() + timedelta(minutes = 105)
  247. time_remaining = timeout - datetime.now()
  248. # Need to print to stdout once every 10 minutes or Travis-CI will abort
  249. travis_timeout = datetime.now() + timedelta(minutes = 5)
  250. # Flush output until setup.sh work is finished. This is
  251. # either a) when setup.sh exits b) when the port is bound
  252. # c) when we run out of time. Note that 'finished' doesn't
  253. # guarantee setup.sh process is dead - the OS may choose to make
  254. # setup.sh a zombie process if it still has living children
  255. #
  256. # Note: child processes forked (using &) will remain alive
  257. # after setup.sh has exited. The will have inherited the
  258. # stdout/stderr descriptors and will be directing their
  259. # output to the pipes.
  260. #
  261. prefix = "Setup %s: " % self.name
  262. while (p.poll() is None
  263. and not self.benchmarker.is_port_bound(self.port)
  264. and not time_remaining.total_seconds() < 0):
  265. # The conditions above are slow to check, so
  266. # we will delay output substantially if we only
  267. # print one line per condition check.
  268. # Adding a tight loop here mitigates the effect,
  269. # ensuring that most of the output directly from
  270. # setup.sh is sent to tee_output before the outer
  271. # loop exits and prints things like "setup.sh exited"
  272. #
  273. for i in xrange(10):
  274. try:
  275. line = nbsr.readline(0.05)
  276. if line:
  277. tee_output(prefix, line)
  278. # Reset Travis-CI timer
  279. travis_timeout = datetime.now() + timedelta(minutes = 5)
  280. except setup_util.EndOfStream:
  281. tee_output(prefix, "Setup has terminated\n")
  282. break
  283. time_remaining = timeout - datetime.now()
  284. if (travis_timeout - datetime.now()).total_seconds() < 0:
  285. sys.stdout.write(prefix + 'Printing so Travis-CI does not time out\n')
  286. sys.stdout.write(prefix + "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
  287. p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
  288. sys.stdout.flush()
  289. travis_timeout = datetime.now() + timedelta(minutes = 5)
  290. # Did we time out?
  291. if time_remaining.total_seconds() < 0:
  292. tee_output(prefix, "%s.sh timed out!! Aborting...\n" % self.setup_file)
  293. p.kill()
  294. return 1
  295. # What's our return code?
  296. # If setup.sh has terminated, use that code
  297. # Otherwise, detect if the port was bound
  298. tee_output(prefix, "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
  299. p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
  300. retcode = (p.poll() if p.poll() is not None else 0 if self.benchmarker.is_port_bound(self.port) else 1)
  301. if p.poll() is not None:
  302. tee_output(prefix, "%s.sh process exited naturally with %s\n" % (self.setup_file, p.poll()))
  303. elif self.benchmarker.is_port_bound(self.port):
  304. tee_output(prefix, "Bound port detected on %s\n" % self.port)
  305. # Before we return control to the benchmarker, spin up a
  306. # thread to keep an eye on the pipes in case the running
  307. # framework uses stdout/stderr. Once all processes accessing
  308. # the subprocess.PIPEs are dead, this thread will terminate.
  309. # Use a different prefix to indicate this is the framework
  310. # speaking
  311. prefix = "Server %s: " % self.name
  312. def watch_child_pipes(nbsr, prefix):
  313. while True:
  314. try:
  315. line = nbsr.readline(60)
  316. if line:
  317. tee_output(prefix, line)
  318. except setup_util.EndOfStream:
  319. tee_output(prefix, "Framework processes have terminated\n")
  320. return
  321. watch_thread = Thread(target = watch_child_pipes,
  322. args = (nbsr, prefix))
  323. watch_thread.daemon = True
  324. watch_thread.start()
  325. logging.info("Executed %s.sh, returning %s", self.setup_file, retcode)
  326. os.chdir(previousDir)
  327. return retcode
  328. ############################################################
  329. # End start
  330. ############################################################
  331. ############################################################
  332. # verify_urls
  333. # Verifys each of the URLs for this test. THis will sinply
  334. # curl the URL and check for it's return status.
  335. # For each url, a flag will be set on this object for whether
  336. # or not it passed
  337. # Returns True if all verifications succeeded
  338. ############################################################
  339. def verify_urls(self, verificationPath):
  340. result = True
  341. def verify_type(test_type):
  342. with open(os.path.join(verificationPath, (test_type + '.txt')), 'w') as verification:
  343. test = self.runTests[test_type]
  344. test.setup_out(verification)
  345. verification.write(header("VERIFYING %s" % test_type.upper()))
  346. base_url = "http://%s:%s" % (self.benchmarker.server_host, self.port)
  347. try:
  348. results = test.verify(base_url)
  349. except ConnectionError as e:
  350. results = [('fail',"Server did not respond to request", base_url)]
  351. logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
  352. except Exception as e:
  353. results = [('fail',"""Caused Exception in TFB
  354. This almost certainly means your return value is incorrect,
  355. but also that you have found a bug. Please submit an issue
  356. including this message: %s\n%s""" % (e, traceback.format_exc()),
  357. base_url)]
  358. logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
  359. traceback.format_exc()
  360. test.failed = any(result == 'fail' for (result, reason, url) in results)
  361. test.warned = any(result == 'warn' for (result, reason, url) in results)
  362. test.passed = all(result == 'pass' for (result, reason, url) in results)
  363. def output_result(result, reason, url):
  364. specific_rules_url = "http://frameworkbenchmarks.readthedocs.org/en/latest/Project-Information/Framework-Tests/#specific-test-requirements"
  365. color = Fore.GREEN
  366. if result.upper() == "WARN":
  367. color = Fore.YELLOW
  368. elif result.upper() == "FAIL":
  369. color = Fore.RED
  370. verification.write((" " + color + "%s" + Style.RESET_ALL + " for %s\n") % (result.upper(), url))
  371. print (" " + color + "%s" + Style.RESET_ALL + " for %s\n") % (result.upper(), url)
  372. if reason is not None and len(reason) != 0:
  373. for line in reason.splitlines():
  374. verification.write(" " + line + '\n')
  375. print " " + line
  376. if not test.passed:
  377. verification.write(" See %s\n" % specific_rules_url)
  378. print " See %s\n" % specific_rules_url
  379. [output_result(r1,r2,url) for (r1, r2, url) in results]
  380. if test.failed:
  381. self.benchmarker.report_verify_results(self, test_type, 'fail')
  382. elif test.warned:
  383. self.benchmarker.report_verify_results(self, test_type, 'warn')
  384. elif test.passed:
  385. self.benchmarker.report_verify_results(self, test_type, 'pass')
  386. else:
  387. raise Exception("Unknown error - test did not pass,warn,or fail")
  388. verification.flush()
  389. result = True
  390. for test_type in self.runTests:
  391. verify_type(test_type)
  392. if self.runTests[test_type].failed:
  393. result = False
  394. return result
  395. ############################################################
  396. # End verify_urls
  397. ############################################################
  398. ############################################################
  399. # benchmark
  400. # Runs the benchmark for each type of test that it implements
  401. # JSON/DB/Query.
  402. ############################################################
  403. def benchmark(self, benchmarkPath):
  404. def benchmark_type(test_type):
  405. with open(os.path.join(benchmarkPath, (test_type + '.txt')), 'w') as out:
  406. out.write("BENCHMARKING %s ... " % test_type.upper())
  407. test = self.runTests[test_type]
  408. test.setup_out(out)
  409. output_file = self.benchmarker.output_file(self.name, test_type)
  410. if not os.path.exists(output_file):
  411. # Open to create the empty file
  412. with open(output_file, 'w'):
  413. pass
  414. if not test.failed:
  415. if test_type == 'plaintext': # One special case
  416. remote_script = self.__generate_pipeline_script(test.get_url(), self.port, test.accept_header)
  417. elif test_type == 'query' or test_type == 'update':
  418. remote_script = self.__generate_query_script(test.get_url(), self.port, test.accept_header)
  419. else:
  420. remote_script = self.__generate_concurrency_script(test.get_url(), self.port, test.accept_header)
  421. # Begin resource usage metrics collection
  422. self.__begin_logging(test_type)
  423. # Run the benchmark
  424. with open(output_file, 'w') as raw_file:
  425. p = subprocess.Popen(self.benchmarker.client_ssh_string.split(" "), stdin=subprocess.PIPE, stdout=raw_file, stderr=raw_file)
  426. p.communicate(remote_script)
  427. out.flush()
  428. # End resource usage metrics collection
  429. self.__end_logging()
  430. results = self.__parse_test(test_type)
  431. print "Benchmark results:"
  432. pprint(results)
  433. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  434. rmtmp = "find /tmp -maxdepth 1 -name \"*\" -print0 | xargs -0 sudo -u %s rm" % (self.benchmarker.runner_user)
  435. subprocess.Popen(rmtmp, shell=True, cwd=self.fwroot, executable='/bin/bash')
  436. out.write( "Complete\n" )
  437. out.flush()
  438. for test_type in self.runTests:
  439. benchmark_type(test_type)
  440. ############################################################
  441. # End benchmark
  442. ############################################################
  443. ############################################################
  444. # parse_all
  445. # Method meant to be run for a given timestamp
  446. ############################################################
  447. def parse_all(self):
  448. for test_type in self.runTests:
  449. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  450. results = self.__parse_test(test_type)
  451. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  452. ##########################################################################################
  453. # Private Methods
  454. ##########################################################################################
  455. ############################################################
  456. # __parse_test(test_type)
  457. ############################################################
  458. def __parse_test(self, test_type):
  459. try:
  460. results = dict()
  461. results['results'] = []
  462. stats = []
  463. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  464. with open(self.benchmarker.output_file(self.name, test_type)) as raw_data:
  465. is_warmup = True
  466. rawData = None
  467. for line in raw_data:
  468. if "Queries:" in line or "Concurrency:" in line:
  469. is_warmup = False
  470. rawData = None
  471. continue
  472. if "Warmup" in line or "Primer" in line:
  473. is_warmup = True
  474. continue
  475. if not is_warmup:
  476. if rawData == None:
  477. rawData = dict()
  478. results['results'].append(rawData)
  479. #if "Requests/sec:" in line:
  480. # m = re.search("Requests/sec:\s+([0-9]+)", line)
  481. # rawData['reportedResults'] = m.group(1)
  482. # search for weighttp data such as succeeded and failed.
  483. if "Latency" in line:
  484. m = re.findall("([0-9]+\.*[0-9]*[us|ms|s|m|%]+)", line)
  485. if len(m) == 4:
  486. rawData['latencyAvg'] = m[0]
  487. rawData['latencyStdev'] = m[1]
  488. rawData['latencyMax'] = m[2]
  489. # rawData['latencyStdevPercent'] = m[3]
  490. #if "Req/Sec" in line:
  491. # m = re.findall("([0-9]+\.*[0-9]*[k|%]*)", line)
  492. # if len(m) == 4:
  493. # rawData['requestsAvg'] = m[0]
  494. # rawData['requestsStdev'] = m[1]
  495. # rawData['requestsMax'] = m[2]
  496. # rawData['requestsStdevPercent'] = m[3]
  497. #if "requests in" in line:
  498. # m = re.search("requests in ([0-9]+\.*[0-9]*[ms|s|m|h]+)", line)
  499. # if m != None:
  500. # # parse out the raw time, which may be in minutes or seconds
  501. # raw_time = m.group(1)
  502. # if "ms" in raw_time:
  503. # rawData['total_time'] = float(raw_time[:len(raw_time)-2]) / 1000.0
  504. # elif "s" in raw_time:
  505. # rawData['total_time'] = float(raw_time[:len(raw_time)-1])
  506. # elif "m" in raw_time:
  507. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 60.0
  508. # elif "h" in raw_time:
  509. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 3600.0
  510. if "requests in" in line:
  511. m = re.search("([0-9]+) requests in", line)
  512. if m != None:
  513. rawData['totalRequests'] = int(m.group(1))
  514. if "Socket errors" in line:
  515. if "connect" in line:
  516. m = re.search("connect ([0-9]+)", line)
  517. rawData['connect'] = int(m.group(1))
  518. if "read" in line:
  519. m = re.search("read ([0-9]+)", line)
  520. rawData['read'] = int(m.group(1))
  521. if "write" in line:
  522. m = re.search("write ([0-9]+)", line)
  523. rawData['write'] = int(m.group(1))
  524. if "timeout" in line:
  525. m = re.search("timeout ([0-9]+)", line)
  526. rawData['timeout'] = int(m.group(1))
  527. if "Non-2xx" in line:
  528. m = re.search("Non-2xx or 3xx responses: ([0-9]+)", line)
  529. if m != None:
  530. rawData['5xx'] = int(m.group(1))
  531. if "STARTTIME" in line:
  532. m = re.search("[0-9]+", line)
  533. rawData["startTime"] = int(m.group(0))
  534. if "ENDTIME" in line:
  535. m = re.search("[0-9]+", line)
  536. rawData["endTime"] = int(m.group(0))
  537. test_stats = self.__parse_stats(test_type, rawData["startTime"], rawData["endTime"], 1)
  538. # rawData["averageStats"] = self.__calculate_average_stats(test_stats)
  539. stats.append(test_stats)
  540. with open(self.benchmarker.stats_file(self.name, test_type) + ".json", "w") as stats_file:
  541. json.dump(stats, stats_file, indent=2)
  542. return results
  543. except IOError:
  544. return None
  545. ############################################################
  546. # End benchmark
  547. ############################################################
  548. ############################################################
  549. # __generate_concurrency_script(url, port)
  550. # Generates the string containing the bash script that will
  551. # be run on the client to benchmark a single test. This
  552. # specifically works for the variable concurrency tests (JSON
  553. # and DB)
  554. ############################################################
  555. def __generate_concurrency_script(self, url, port, accept_header, wrk_command="wrk"):
  556. headers = self.headers_template.format(accept=accept_header)
  557. return self.concurrency_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  558. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  559. levels=" ".join("{}".format(item) for item in self.benchmarker.concurrency_levels),
  560. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command)
  561. ############################################################
  562. # __generate_pipeline_script(url, port)
  563. # Generates the string containing the bash script that will
  564. # be run on the client to benchmark a single pipeline test.
  565. ############################################################
  566. def __generate_pipeline_script(self, url, port, accept_header, wrk_command="wrk"):
  567. headers = self.headers_template.format(accept=accept_header)
  568. return self.pipeline_template.format(max_concurrency=16384,
  569. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  570. levels=" ".join("{}".format(item) for item in [256,1024,4096,16384]),
  571. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command,
  572. pipeline=16)
  573. ############################################################
  574. # __generate_query_script(url, port)
  575. # Generates the string containing the bash script that will
  576. # be run on the client to benchmark a single test. This
  577. # specifically works for the variable query tests (Query)
  578. ############################################################
  579. def __generate_query_script(self, url, port, accept_header):
  580. headers = self.headers_template.format(accept=accept_header)
  581. return self.query_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  582. max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
  583. levels=" ".join("{}".format(item) for item in self.benchmarker.query_levels),
  584. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers)
  585. ############################################################
  586. # Returns True if any test type this this framework test will use a DB
  587. ############################################################
  588. def requires_database(self):
  589. '''Returns True/False if this test requires a database'''
  590. return any(tobj.requires_db for (ttype,tobj) in self.runTests.iteritems())
  591. ############################################################
  592. # __begin_logging
  593. # Starts a thread to monitor the resource usage, to be synced with the client's time
  594. # TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  595. ############################################################
  596. def __begin_logging(self, test_type):
  597. output_file = "{file_name}".format(file_name=self.benchmarker.get_stats_file(self.name, test_type))
  598. dstat_string = "dstat -afilmprsT --aio --fs --ipc --lock --raw --socket --tcp \
  599. --raw --socket --tcp --udp --unix --vm --disk-util \
  600. --rpc --rpcd --output {output_file}".format(output_file=output_file)
  601. cmd = shlex.split(dstat_string)
  602. dev_null = open(os.devnull, "w")
  603. self.subprocess_handle = subprocess.Popen(cmd, stdout=dev_null)
  604. ##############################################################
  605. # Begin __end_logging
  606. # Stops the logger thread and blocks until shutdown is complete.
  607. ##############################################################
  608. def __end_logging(self):
  609. self.subprocess_handle.terminate()
  610. self.subprocess_handle.communicate()
  611. ##############################################################
  612. # Begin __parse_stats
  613. # For each test type, process all the statistics, and return a multi-layered dictionary
  614. # that has a structure as follows:
  615. # (timestamp)
  616. # | (main header) - group that the stat is in
  617. # | | (sub header) - title of the stat
  618. # | | | (stat) - the stat itself, usually a floating point number
  619. ##############################################################
  620. def __parse_stats(self, test_type, start_time, end_time, interval):
  621. stats_dict = dict()
  622. stats_file = self.benchmarker.stats_file(self.name, test_type)
  623. with open(stats_file) as stats:
  624. while(stats.next() != "\n"): # dstat doesn't output a completely compliant CSV file - we need to strip the header
  625. pass
  626. stats_reader = csv.reader(stats)
  627. main_header = stats_reader.next()
  628. sub_header = stats_reader.next()
  629. time_row = sub_header.index("epoch")
  630. int_counter = 0
  631. for row in stats_reader:
  632. time = float(row[time_row])
  633. int_counter+=1
  634. if time < start_time:
  635. continue
  636. elif time > end_time:
  637. return stats_dict
  638. if int_counter % interval != 0:
  639. continue
  640. row_dict = dict()
  641. for nextheader in main_header:
  642. if nextheader != "":
  643. row_dict[nextheader] = dict()
  644. header = ""
  645. for item_num, column in enumerate(row):
  646. if(len(main_header[item_num]) != 0):
  647. header = main_header[item_num]
  648. row_dict[header][sub_header[item_num]] = float(column) # all the stats are numbers, so we want to make sure that they stay that way in json
  649. stats_dict[time] = row_dict
  650. return stats_dict
  651. ##############################################################
  652. # End __parse_stats
  653. ##############################################################
  654. def __getattr__(self, name):
  655. """For backwards compatibility, we used to pass benchmarker
  656. as the argument to the setup.sh files"""
  657. try:
  658. x = getattr(self.benchmarker, name)
  659. except AttributeError:
  660. print "AttributeError: %s not a member of FrameworkTest or Benchmarker" % name
  661. print "This is probably a bug"
  662. raise
  663. return x
  664. ##############################################################
  665. # Begin __calculate_average_stats
  666. # We have a large amount of raw data for the statistics that
  667. # may be useful for the stats nerds, but most people care about
  668. # a couple of numbers. For now, we're only going to supply:
  669. # * Average CPU
  670. # * Average Memory
  671. # * Total network use
  672. # * Total disk use
  673. # More may be added in the future. If they are, please update
  674. # the above list.
  675. # Note: raw_stats is directly from the __parse_stats method.
  676. # Recall that this consists of a dictionary of timestamps,
  677. # each of which contain a dictionary of stat categories which
  678. # contain a dictionary of stats
  679. ##############################################################
  680. def __calculate_average_stats(self, raw_stats):
  681. raw_stat_collection = dict()
  682. for timestamp, time_dict in raw_stats.items():
  683. for main_header, sub_headers in time_dict.items():
  684. item_to_append = None
  685. if 'cpu' in main_header:
  686. # We want to take the idl stat and subtract it from 100
  687. # to get the time that the CPU is NOT idle.
  688. item_to_append = sub_headers['idl'] - 100.0
  689. elif main_header == 'memory usage':
  690. item_to_append = sub_headers['used']
  691. elif 'net' in main_header:
  692. # Network stats have two parts - recieve and send. We'll use a tuple of
  693. # style (recieve, send)
  694. item_to_append = (sub_headers['recv'], sub_headers['send'])
  695. elif 'dsk' or 'io' in main_header:
  696. # Similar for network, except our tuple looks like (read, write)
  697. item_to_append = (sub_headers['read'], sub_headers['writ'])
  698. if item_to_append is not None:
  699. if main_header not in raw_stat_collection:
  700. raw_stat_collection[main_header] = list()
  701. raw_stat_collection[main_header].append(item_to_append)
  702. # Simple function to determine human readable size
  703. # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
  704. def sizeof_fmt(num):
  705. # We'll assume that any number we get is convertable to a float, just in case
  706. num = float(num)
  707. for x in ['bytes','KB','MB','GB']:
  708. if num < 1024.0 and num > -1024.0:
  709. return "%3.1f%s" % (num, x)
  710. num /= 1024.0
  711. return "%3.1f%s" % (num, 'TB')
  712. # Now we have our raw stats in a readable format - we need to format it for display
  713. # We need a floating point sum, so the built in sum doesn't cut it
  714. display_stat_collection = dict()
  715. for header, values in raw_stat_collection.items():
  716. display_stat = None
  717. if 'cpu' in header:
  718. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  719. elif main_header == 'memory usage':
  720. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  721. elif 'net' in main_header:
  722. receive, send = zip(*values) # unzip
  723. display_stat = {'receive': sizeof_fmt(math.fsum(receive)), 'send': sizeof_fmt(math.fsum(send))}
  724. else: # if 'dsk' or 'io' in header:
  725. read, write = zip(*values) # unzip
  726. display_stat = {'read': sizeof_fmt(math.fsum(read)), 'write': sizeof_fmt(math.fsum(write))}
  727. display_stat_collection[header] = display_stat
  728. return display_stat
  729. ###########################################################################################
  730. # End __calculate_average_stats
  731. #########################################################################################
  732. ##########################################################################################
  733. # Constructor
  734. ##########################################################################################
  735. def __init__(self, name, directory, benchmarker, runTests, args):
  736. self.name = name
  737. self.directory = directory
  738. self.benchmarker = benchmarker
  739. self.runTests = runTests
  740. self.fwroot = benchmarker.fwroot
  741. self.approach = ""
  742. self.classification = ""
  743. self.database = ""
  744. self.framework = ""
  745. self.language = ""
  746. self.orm = ""
  747. self.platform = ""
  748. self.webserver = ""
  749. self.os = ""
  750. self.database_os = ""
  751. self.display_name = ""
  752. self.notes = ""
  753. self.versus = ""
  754. # setup logging
  755. logging.basicConfig(stream=sys.stderr, level=logging.INFO)
  756. self.install_root="%s/%s" % (self.fwroot, "installs")
  757. if benchmarker.install_strategy is 'pertest':
  758. self.install_root="%s/pertest/%s" % (self.install_root, name)
  759. # Used in setup.sh scripts for consistency with
  760. # the bash environment variables
  761. self.troot = self.directory
  762. self.iroot = self.install_root
  763. self.__dict__.update(args)
  764. ############################################################
  765. # End __init__
  766. ############################################################
  767. ############################################################
  768. # End FrameworkTest
  769. ############################################################
  770. # Static methods
  771. def test_order(type_name):
  772. """
  773. This sort ordering is set up specifically to return the length
  774. of the test name. There were SO many problems involved with
  775. 'plaintext' being run first (rather, just not last) that we
  776. needed to ensure that it was run last for every framework.
  777. """
  778. return len(type_name)
  779. def validate_urls(test_name, test_keys):
  780. """
  781. Separated from validate_test because urls are not required anywhere. We know a url is incorrect if it is
  782. empty or does not start with a "/" character. There is no validation done to ensure the url conforms to
  783. the suggested url specifications, although those suggestions are presented if a url fails validation here.
  784. """
  785. example_urls = {
  786. "json_url": "/json",
  787. "db_url": "/mysql/db",
  788. "query_url": "/mysql/queries?queries= or /mysql/queries/",
  789. "fortune_url": "/mysql/fortunes",
  790. "update_url": "/mysql/updates?queries= or /mysql/updates/",
  791. "plaintext_url": "/plaintext"
  792. }
  793. for test_url in ["json_url","db_url","query_url","fortune_url","update_url","plaintext_url"]:
  794. key_value = test_keys.get(test_url, None)
  795. if key_value != None and not key_value.startswith('/'):
  796. errmsg = """`%s` field in test \"%s\" does not appear to be a valid url: \"%s\"\n
  797. Example `%s` url: \"%s\"
  798. """ % (test_url, test_name, key_value, test_url, example_urls[test_url])
  799. raise Exception(errmsg)
  800. def validate_test(test_name, test_keys, directory):
  801. """
  802. Validate benchmark config values for this test based on a schema
  803. """
  804. # Ensure that each FrameworkTest has a framework property, inheriting from top-level if not
  805. if not test_keys['framework']:
  806. test_keys['framework'] = config['framework']
  807. recommended_lang = directory.split('/')[-2]
  808. windows_url = "https://github.com/TechEmpower/FrameworkBenchmarks/milestones/Windows%%20Compatibility"
  809. schema = {
  810. 'language': {
  811. 'help': ('language', 'The language of the framework used, suggestion: %s' % recommended_lang)
  812. },
  813. 'webserver': {
  814. 'help': ('webserver', 'Name of the webserver also referred to as the "front-end server"')
  815. },
  816. 'classification': {
  817. 'allowed': [
  818. ('Fullstack', '...'),
  819. ('Micro', '...'),
  820. ('Platform', '...')
  821. ]
  822. },
  823. 'database': {
  824. 'allowed': [
  825. ('MySQL', 'One of the most popular databases around the web and in TFB'),
  826. ('Postgres', 'An advanced SQL database with a larger feature set than MySQL'),
  827. ('MongoDB', 'A popular document-store database'),
  828. ('Cassandra', 'A highly performant and scalable NoSQL database'),
  829. ('Elasticsearch', 'A distributed RESTful search engine that is used as a database for TFB tests'),
  830. ('Redis', 'An open-sourced, BSD licensed, advanced key-value cache and store'),
  831. ('SQLite', 'A network-less database, still supported for backwards compatibility'),
  832. ('SQLServer', 'Microsoft\'s SQL implementation'),
  833. ('None', 'No database was used for these tests, as is the case with Json Serialization and Plaintext')
  834. ]
  835. },
  836. 'approach': {
  837. 'allowed': [
  838. ('Realistic', '...'),
  839. ('Stripped', '...')
  840. ]
  841. },
  842. 'orm': {
  843. 'allowed': [
  844. ('Full', 'Has a full suite of features like lazy loading, caching, multiple language support, sometimes pre-configured with scripts.'),
  845. ('Micro', 'Has basic database driver capabilities such as establishing a connection and sending queries.'),
  846. ('Raw', 'Tests that do not use an ORM will be classified as "raw" meaning they use the platform\'s raw database connectivity.')
  847. ]
  848. },
  849. 'platform': {
  850. 'help': ('platform', 'Name of the platform this framework runs on, e.g. Node.js, Pypy, hhvm, JRuby ...')
  851. },
  852. 'framework': {
  853. # Guranteed to be here and correct at this point
  854. # key is left here to produce the set of required keys
  855. },
  856. 'os': {
  857. 'allowed': [
  858. ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
  859. ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
  860. ]
  861. },
  862. 'database_os': {
  863. 'allowed': [
  864. ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
  865. ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
  866. ]
  867. }
  868. }
  869. # Confirm required keys are present
  870. required_keys = schema.keys()
  871. missing = list(set(required_keys) - set(test_keys))
  872. if len(missing) > 0:
  873. missingstr = (", ").join(map(str, missing))
  874. raise Exception("benchmark_config.json for test %s is invalid, please amend by adding the following required keys: [%s]"
  875. % (test_name, missingstr))
  876. # Check the (all optional) test urls
  877. validate_urls(test_name, test_keys)
  878. # Check values of keys against schema
  879. for key in required_keys:
  880. val = test_keys.get(key, "").lower()
  881. has_predefined_acceptables = 'allowed' in schema[key]
  882. if has_predefined_acceptables:
  883. allowed = schema[key].get('allowed', [])
  884. acceptable_values, descriptors = zip(*allowed)
  885. acceptable_values = [a.lower() for a in acceptable_values]
  886. if val not in acceptable_values:
  887. msg = ("Invalid `%s` value specified for test \"%s\" in framework \"%s\"; suggestions:\n"
  888. % (key, test_name, test_keys['framework']))
  889. helpinfo = ('\n').join([" `%s` -- %s" % (v, desc) for (v, desc) in zip(acceptable_values, descriptors)])
  890. fullerr = msg + helpinfo + "\n"
  891. raise Exception(fullerr)
  892. elif not has_predefined_acceptables and val == "":
  893. msg = ("Value for `%s` in test \"%s\" in framework \"%s\" was missing:\n"
  894. % (key, test_name, test_keys['framework']))
  895. helpinfo = " %s -- %s" % schema[key]['help']
  896. fullerr = msg + helpinfo + '\n'
  897. raise Exception(fullerr)
  898. def parse_config(config, directory, benchmarker):
  899. """
  900. Parses a config file into a list of FrameworkTest objects
  901. """
  902. tests = []
  903. # The config object can specify multiple tests
  904. # Loop over them and parse each into a FrameworkTest
  905. for test in config['tests']:
  906. tests_to_run = [name for (name,keys) in test.iteritems()]
  907. if "default" not in tests_to_run:
  908. logging.warn("Framework %s does not define a default test in benchmark_config.json", config['framework'])
  909. # Check that each test configuration is acceptable
  910. # Throw exceptions if a field is missing, or how to improve the field
  911. for test_name, test_keys in test.iteritems():
  912. # Validates the benchmark_config entry
  913. validate_test(test_name, test_keys, directory)
  914. # Map test type to a parsed FrameworkTestType object
  915. runTests = dict()
  916. for type_name, type_obj in benchmarker.types.iteritems():
  917. try:
  918. # Makes a FrameWorkTestType object using some of the keys in config
  919. # e.g. JsonTestType uses "json_url"
  920. runTests[type_name] = type_obj.copy().parse(test_keys)
  921. except AttributeError as ae:
  922. # This is quite common - most tests don't support all types
  923. # Quitely log it and move on (debug logging is on in travis and this causes
  924. # ~1500 lines of debug, so I'm totally ignoring it for now
  925. # logging.debug("Missing arguments for test type %s for framework test %s", type_name, test_name)
  926. pass
  927. # We need to sort by test_type to run
  928. sortedTestKeys = sorted(runTests.keys(), key=test_order)
  929. sortedRunTests = OrderedDict()
  930. for sortedTestKey in sortedTestKeys:
  931. sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
  932. # Prefix all test names with framework except 'default' test
  933. # Done at the end so we may still refer to the primary test as `default` in benchmark config error messages
  934. if test_name == 'default':
  935. test_name = config['framework']
  936. else:
  937. test_name = "%s-%s" % (config['framework'], test_name)
  938. # By passing the entire set of keys, each FrameworkTest will have a member for each key
  939. tests.append(FrameworkTest(test_name, directory, benchmarker, sortedRunTests, test_keys))
  940. return tests