framework_test.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. from benchmark.fortune_html_parser import FortuneHTMLParser
  2. from setup.linux import setup_util
  3. from benchmark.test_types import *
  4. import importlib
  5. import os
  6. import subprocess
  7. import socket
  8. import time
  9. import re
  10. from pprint import pprint
  11. import sys
  12. import traceback
  13. import json
  14. import logging
  15. import csv
  16. import shlex
  17. import math
  18. from collections import OrderedDict
  19. from requests import ConnectionError
  20. from threading import Thread
  21. from threading import Event
  22. from utils import header
  23. from utils import gather_docker_dependencies
  24. # Cross-platform colored text
  25. from colorama import Fore, Back, Style
  26. from datetime import datetime
  27. from datetime import timedelta
  28. class FrameworkTest:
  29. headers_template = "-H 'Host: {server_host}' -H 'Accept: {accept}' -H 'Connection: keep-alive'"
  30. # Used for test types that require no pipelining or query string params.
  31. concurrency_template = """
  32. let max_threads=$(cat /proc/cpuinfo | grep processor | wc -l)
  33. echo ""
  34. echo "---------------------------------------------------------"
  35. echo " Running Primer {name}"
  36. echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  37. echo "---------------------------------------------------------"
  38. echo ""
  39. {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  40. sleep 5
  41. echo ""
  42. echo "---------------------------------------------------------"
  43. echo " Running Warmup {name}"
  44. echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads \"http://{server_host}:{port}{url}\""
  45. echo "---------------------------------------------------------"
  46. echo ""
  47. {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads "http://{server_host}:{port}{url}"
  48. sleep 5
  49. echo ""
  50. echo "---------------------------------------------------------"
  51. echo " Synchronizing time"
  52. echo "---------------------------------------------------------"
  53. echo ""
  54. ntpdate -s pool.ntp.org
  55. for c in {levels}
  56. do
  57. echo ""
  58. echo "---------------------------------------------------------"
  59. echo " Concurrency: $c for {name}"
  60. echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>$max_threads?$max_threads:$c)) \"http://{server_host}:{port}{url}\""
  61. echo "---------------------------------------------------------"
  62. echo ""
  63. STARTTIME=$(date +"%s")
  64. {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>$max_threads?$max_threads:$c))" http://{server_host}:{port}{url}
  65. echo "STARTTIME $STARTTIME"
  66. echo "ENDTIME $(date +"%s")"
  67. sleep 2
  68. done
  69. """
  70. # Used for test types that require pipelining.
  71. pipeline_template = """
  72. let max_threads=$(cat /proc/cpuinfo | grep processor | wc -l)
  73. echo ""
  74. echo "---------------------------------------------------------"
  75. echo " Running Primer {name}"
  76. echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
  77. echo "---------------------------------------------------------"
  78. echo ""
  79. {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
  80. sleep 5
  81. echo ""
  82. echo "---------------------------------------------------------"
  83. echo " Running Warmup {name}"
  84. echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads \"http://{server_host}:{port}{url}\""
  85. echo "---------------------------------------------------------"
  86. echo ""
  87. {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads "http://{server_host}:{port}{url}"
  88. sleep 5
  89. echo ""
  90. echo "---------------------------------------------------------"
  91. echo " Synchronizing time"
  92. echo "---------------------------------------------------------"
  93. echo ""
  94. ntpdate -s pool.ntp.org
  95. for c in {levels}
  96. do
  97. echo ""
  98. echo "---------------------------------------------------------"
  99. echo " Concurrency: $c for {name}"
  100. echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>$max_threads?$max_threads:$c)) \"http://{server_host}:{port}{url}\" -s ~/pipeline.lua -- {pipeline}"
  101. echo "---------------------------------------------------------"
  102. echo ""
  103. STARTTIME=$(date +"%s")
  104. {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>$max_threads?$max_threads:$c))" http://{server_host}:{port}{url} -s ~/pipeline.lua -- {pipeline}
  105. echo "STARTTIME $STARTTIME"
  106. echo "ENDTIME $(date +"%s")"
  107. sleep 2
  108. done
  109. """
  110. # Used for test types that require a database -
  111. # These tests run at a static concurrency level and vary the size of
  112. # the query sent with each request
  113. query_template = """
  114. let max_threads=$(cat /proc/cpuinfo | grep processor | wc -l)
  115. echo ""
  116. echo "---------------------------------------------------------"
  117. echo " Running Primer {name}"
  118. echo " wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}2\""
  119. echo "---------------------------------------------------------"
  120. echo ""
  121. wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}2"
  122. sleep 5
  123. echo ""
  124. echo "---------------------------------------------------------"
  125. echo " Running Warmup {name}"
  126. echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads \"http://{server_host}:{port}{url}2\""
  127. echo "---------------------------------------------------------"
  128. echo ""
  129. wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads "http://{server_host}:{port}{url}2"
  130. sleep 5
  131. echo ""
  132. echo "---------------------------------------------------------"
  133. echo " Synchronizing time"
  134. echo "---------------------------------------------------------"
  135. echo ""
  136. ntpdate -s pool.ntp.org
  137. for c in {levels}
  138. do
  139. echo ""
  140. echo "---------------------------------------------------------"
  141. echo " Queries: $c for {name}"
  142. echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads \"http://{server_host}:{port}{url}$c\""
  143. echo "---------------------------------------------------------"
  144. echo ""
  145. STARTTIME=$(date +"%s")
  146. wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t $max_threads "http://{server_host}:{port}{url}$c"
  147. echo "STARTTIME $STARTTIME"
  148. echo "ENDTIME $(date +"%s")"
  149. sleep 2
  150. done
  151. """
  152. ############################################################
  153. # start(benchmarker)
  154. # Start the test using its setup file
  155. ############################################################
  156. def start(self, out):
  157. # Setup environment variables
  158. logDir = os.path.join(self.fwroot, self.benchmarker.full_results_directory(), 'logs', self.name.lower())
  159. # bash_functions_path= os.path.join(self.fwroot, 'toolset/setup/linux/bash_functions.sh')
  160. # os.environ['TROOT'] = self.directory
  161. # os.environ['IROOT'] = self.install_root
  162. # os.environ['DBHOST'] = socket.gethostbyname(self.database_host)
  163. # os.environ['LOGDIR'] = logDir
  164. # os.environ['MAX_CONCURRENCY'] = str(max(self.benchmarker.concurrency_levels))
  165. # Always ensure that IROOT exists
  166. # if not os.path.exists(self.install_root):
  167. # os.mkdir(self.install_root)
  168. # if not os.path.exists(os.path.join(self.install_root,"TFBReaper")):
  169. # subprocess.check_call(['gcc',
  170. # '-std=c99',
  171. # '-o%s/TFBReaper' % self.install_root,
  172. # os.path.join(self.fwroot,'toolset/setup/linux/TFBReaper.c')],
  173. # stderr=out, stdout=out)
  174. # Check that the client is setup
  175. # if not os.path.exists(os.path.join(self.install_root, 'client.installed')):
  176. # print("\nINSTALL: Installing client software\n")
  177. # # TODO: hax; should dynamically know where this file is
  178. # with open (self.fwroot + "/toolset/setup/linux/client.sh", "r") as myfile:
  179. # remote_script=myfile.read()
  180. # print("\nINSTALL: {!s}".format(self.benchmarker.client_ssh_string))
  181. # p = subprocess.Popen(self.benchmarker.client_ssh_string.split(" ") + ["bash"], stdin=subprocess.PIPE)
  182. # p.communicate(remote_script)
  183. # returncode = p.returncode
  184. # if returncode != 0:
  185. # self.__install_error("status code %s running subprocess '%s'." % (returncode, self.benchmarker.client_ssh_string))
  186. # print("\nINSTALL: Finished installing client software\n")
  187. # subprocess.check_call('touch client.installed', shell=True, cwd=self.install_root, executable='/bin/bash')
  188. # Run the module start inside parent of TROOT
  189. # - we use the parent as a historical accident, a number of tests
  190. # refer to their TROOT maually still
  191. # previousDir = os.getcwd()
  192. # os.chdir(os.path.dirname(self.troot))
  193. # logging.info("Running setup module start (cwd=%s)", self.directory)
  194. # command = 'bash -exc "source %s && source %s.sh"' % (
  195. # bash_functions_path,
  196. # os.path.join(self.troot, self.setup_file))
  197. # debug_command = '''\
  198. # export FWROOT=%s && \\
  199. # export TROOT=%s && \\
  200. # export IROOT=%s && \\
  201. # export DBHOST=%s && \\
  202. # export LOGDIR=%s && \\
  203. # export MAX_CONCURRENCY=%s && \\
  204. # cd %s && \\
  205. # %s/TFBReaper "bash -exc \\\"source %s && source %s.sh\\\"''' % (self.fwroot,
  206. # self.directory,
  207. # self.install_root,
  208. # socket.gethostbyname(self.database_host),
  209. # logDir,
  210. # max(self.benchmarker.concurrency_levels),
  211. # self.directory,
  212. # self.install_root,
  213. # bash_functions_path,
  214. # os.path.join(self.troot, self.setup_file))
  215. # logging.info("To run %s manually, copy/paste this:\n%s", self.name, debug_command)
  216. def tee_output(prefix, line):
  217. # Needs to be one atomic write
  218. # Explicitly use UTF-8 as it's the most common framework output
  219. # TODO improve encoding handling
  220. line = prefix.encode('utf-8') + line
  221. # Log to current terminal
  222. sys.stdout.write(line)
  223. sys.stdout.flush()
  224. # logging.error("".join([prefix, line]))
  225. out.write(line)
  226. out.flush()
  227. prefix = "Setup %s: " % self.name
  228. ##########################
  229. # Build the Docker images
  230. ##########################
  231. test_docker_file = os.path.join(self.directory, "%s.dockerfile" % self.name)
  232. deps = list(reversed(gather_docker_dependencies( test_docker_file )))
  233. docker_dir = os.path.join(setup_util.get_fwroot(), "toolset", "setup", "linux", "docker")
  234. for dependency in deps:
  235. docker_file = os.path.join(self.directory, dependency + ".dockerfile")
  236. if not os.path.exists(docker_file):
  237. docker_file = os.path.join(docker_dir, dependency + ".dockerfile")
  238. p = subprocess.Popen(["docker", "build", "-f", docker_file, "-t", dependency, os.path.dirname(docker_file)],
  239. stdout=subprocess.PIPE,
  240. stderr=subprocess.STDOUT)
  241. nbsr = setup_util.NonBlockingStreamReader(p.stdout)
  242. while (p.poll() is None):
  243. for i in xrange(10):
  244. try:
  245. line = nbsr.readline(0.05)
  246. if line:
  247. tee_output(prefix, line)
  248. except setup_util.EndOfStream:
  249. break
  250. p = subprocess.Popen(["docker", "build", "-f", test_docker_file, "-t", "tfb-test-%s" % self.name, self.directory],
  251. stdout=subprocess.PIPE,
  252. stderr=subprocess.STDOUT)
  253. nbsr = setup_util.NonBlockingStreamReader(p.stdout)
  254. while (p.poll() is None):
  255. for i in xrange(10):
  256. try:
  257. line = nbsr.readline(0.05)
  258. if line:
  259. tee_output(prefix, line)
  260. except setup_util.EndOfStream:
  261. break
  262. ##########################
  263. # Run the Docker container
  264. ##########################
  265. p = subprocess.Popen(["docker", "run", "--rm", "-p", "%s:%s" % (self.port, self.port), "--network=host", "tfb-test-%s" % self.name],
  266. stdout=subprocess.PIPE,
  267. stderr=subprocess.STDOUT)
  268. nbsr = setup_util.NonBlockingStreamReader(p.stdout,
  269. "%s: framework processes have terminated" % self.name)
  270. # Set a limit on total execution time of setup.sh
  271. timeout = datetime.now() + timedelta(minutes = 105)
  272. time_remaining = timeout - datetime.now()
  273. # Need to print to stdout once every 10 minutes or Travis-CI will abort
  274. travis_timeout = datetime.now() + timedelta(minutes = 5)
  275. # Flush output until docker run work is finished. This is
  276. # either a) when docker run exits b) when the port is bound
  277. # c) when we run out of time.
  278. prefix = "Server %s: " % self.name
  279. while (p.poll() is None
  280. and not self.benchmarker.is_port_bound(self.port)
  281. and not time_remaining.total_seconds() < 0):
  282. # The conditions above are slow to check, so
  283. # we will delay output substantially if we only
  284. # print one line per condition check.
  285. # Adding a tight loop here mitigates the effect,
  286. # ensuring that most of the output directly from
  287. # docker is sent to tee_output before the outer
  288. # loop exits and prints things like "docker exited"
  289. for i in xrange(10):
  290. try:
  291. line = nbsr.readline(0.05)
  292. if line:
  293. tee_output(prefix, line)
  294. # Reset Travis-CI timer
  295. travis_timeout = datetime.now() + timedelta(minutes = 5)
  296. except setup_util.EndOfStream:
  297. tee_output(prefix, "Docker has terminated\n")
  298. break
  299. time_remaining = timeout - datetime.now()
  300. if (travis_timeout - datetime.now()).total_seconds() < 0:
  301. sys.stdout.write(prefix + 'Printing so Travis-CI does not time out\n')
  302. sys.stdout.write(prefix + "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
  303. p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
  304. sys.stdout.flush()
  305. travis_timeout = datetime.now() + timedelta(minutes = 5)
  306. # Did we time out?
  307. if time_remaining.total_seconds() < 0:
  308. tee_output(prefix, "Docker run has timed out!! Aborting...\n" % self.setup_file)
  309. p.kill()
  310. return 1
  311. # What's our return code?
  312. # If docker run has terminated, use that code
  313. # Otherwise, detect if the port was bound
  314. tee_output(prefix, "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
  315. p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
  316. retcode = (p.poll() if p.poll() is not None else 0 if self.benchmarker.is_port_bound(self.port) else 1)
  317. if p.poll() is not None:
  318. tee_output(prefix, "Docker run process exited naturally with %s\n" % p.poll())
  319. elif self.benchmarker.is_port_bound(self.port):
  320. tee_output(prefix, "Bound port detected on %s\n" % self.port)
  321. # Before we return control to the benchmarker, spin up a
  322. # thread to keep an eye on the pipes in case the running
  323. # framework uses stdout/stderr. Once all processes accessing
  324. # the subprocess.PIPEs are dead, this thread will terminate.
  325. # Use a different prefix to indicate this is the framework
  326. # speaking
  327. def watch_child_pipes(nbsr, prefix):
  328. while True:
  329. try:
  330. line = nbsr.readline(60)
  331. if line:
  332. tee_output(prefix, line)
  333. except setup_util.EndOfStream:
  334. tee_output(prefix, "Framework processes have terminated\n")
  335. return
  336. watch_thread = Thread(target = watch_child_pipes,
  337. args = (nbsr, prefix))
  338. watch_thread.daemon = True
  339. watch_thread.start()
  340. return retcode
  341. ############################################################
  342. # End start
  343. ############################################################
  344. ############################################################
  345. # verify_urls
  346. # Verifys each of the URLs for this test. THis will sinply
  347. # curl the URL and check for it's return status.
  348. # For each url, a flag will be set on this object for whether
  349. # or not it passed
  350. # Returns True if all verifications succeeded
  351. ############################################################
  352. def verify_urls(self, logPath):
  353. result = True
  354. def verify_type(test_type):
  355. verificationPath = os.path.join(logPath, test_type)
  356. try:
  357. os.makedirs(verificationPath)
  358. except OSError:
  359. pass
  360. with open(os.path.join(verificationPath, 'verification.txt'), 'w') as verification:
  361. test = self.runTests[test_type]
  362. test.setup_out(verification)
  363. verification.write(header("VERIFYING %s" % test_type.upper()))
  364. base_url = "http://%s:%s" % (self.benchmarker.server_host, self.port)
  365. try:
  366. # Verifies headers from the server. This check is made from the
  367. # App Server using Pythons requests module. Will do a second check from
  368. # the client to make sure the server isn't only accepting connections
  369. # from localhost on a multi-machine setup.
  370. results = test.verify(base_url)
  371. # Now verify that the url is reachable from the client machine, unless
  372. # we're already failing
  373. if not any(result == 'fail' for (result, reason, url) in results):
  374. p = subprocess.call(["ssh", "TFB-client", "curl -sSf %s" % base_url + test.get_url()], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  375. if p is not 0:
  376. results = [('fail', "Server did not respond to request from client machine.", base_url)]
  377. logging.warning("""This error usually means your server is only accepting
  378. requests from localhost.""")
  379. except ConnectionError as e:
  380. results = [('fail',"Server did not respond to request", base_url)]
  381. logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
  382. except Exception as e:
  383. results = [('fail',"""Caused Exception in TFB
  384. This almost certainly means your return value is incorrect,
  385. but also that you have found a bug. Please submit an issue
  386. including this message: %s\n%s""" % (e, traceback.format_exc()),
  387. base_url)]
  388. logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
  389. traceback.format_exc()
  390. test.failed = any(result == 'fail' for (result, reason, url) in results)
  391. test.warned = any(result == 'warn' for (result, reason, url) in results)
  392. test.passed = all(result == 'pass' for (result, reason, url) in results)
  393. def output_result(result, reason, url):
  394. specific_rules_url = "http://frameworkbenchmarks.readthedocs.org/en/latest/Project-Information/Framework-Tests/#specific-test-requirements"
  395. color = Fore.GREEN
  396. if result.upper() == "WARN":
  397. color = Fore.YELLOW
  398. elif result.upper() == "FAIL":
  399. color = Fore.RED
  400. verification.write((" " + color + "%s" + Style.RESET_ALL + " for %s\n") % (result.upper(), url))
  401. print(" {!s}{!s}{!s} for {!s}\n".format(color, result.upper(), Style.RESET_ALL, url))
  402. if reason is not None and len(reason) != 0:
  403. for line in reason.splitlines():
  404. verification.write(" " + line + '\n')
  405. print(" " + line)
  406. if not test.passed:
  407. verification.write(" See %s\n" % specific_rules_url)
  408. print(" See {!s}\n".format(specific_rules_url))
  409. [output_result(r1,r2,url) for (r1, r2, url) in results]
  410. if test.failed:
  411. self.benchmarker.report_verify_results(self, test_type, 'fail')
  412. elif test.warned:
  413. self.benchmarker.report_verify_results(self, test_type, 'warn')
  414. elif test.passed:
  415. self.benchmarker.report_verify_results(self, test_type, 'pass')
  416. else:
  417. raise Exception("Unknown error - test did not pass,warn,or fail")
  418. verification.flush()
  419. result = True
  420. for test_type in self.runTests:
  421. verify_type(test_type)
  422. if self.runTests[test_type].failed:
  423. result = False
  424. return result
  425. ############################################################
  426. # End verify_urls
  427. ############################################################
  428. ############################################################
  429. # benchmark
  430. # Runs the benchmark for each type of test that it implements
  431. # JSON/DB/Query.
  432. ############################################################
  433. def benchmark(self, logPath):
  434. def benchmark_type(test_type):
  435. benchmarkPath = os.path.join(logPath, test_type)
  436. try:
  437. os.makedirs(benchmarkPath)
  438. except OSError:
  439. pass
  440. with open(os.path.join(benchmarkPath, 'benchmark.txt'), 'w') as out:
  441. out.write("BENCHMARKING %s ... " % test_type.upper())
  442. test = self.runTests[test_type]
  443. test.setup_out(out)
  444. output_file = self.benchmarker.output_file(self.name, test_type)
  445. if not os.path.exists(output_file):
  446. # Open to create the empty file
  447. with open(output_file, 'w'):
  448. pass
  449. if not test.failed:
  450. if test_type == 'plaintext': # One special case
  451. remote_script = self.__generate_pipeline_script(test.get_url(), self.port, test.accept_header)
  452. elif test_type == 'query' or test_type == 'update':
  453. remote_script = self.__generate_query_script(test.get_url(), self.port, test.accept_header, self.benchmarker.query_levels)
  454. elif test_type == 'cached_query':
  455. remote_script = self.__generate_query_script(test.get_url(), self.port, test.accept_header, self.benchmarker.cached_query_levels)
  456. else:
  457. remote_script = self.__generate_concurrency_script(test.get_url(), self.port, test.accept_header)
  458. # Begin resource usage metrics collection
  459. self.__begin_logging(test_type)
  460. # Run the benchmark
  461. with open(output_file, 'w') as raw_file:
  462. p = subprocess.Popen(self.benchmarker.client_ssh_string.split(" "), stdin=subprocess.PIPE, stdout=raw_file, stderr=raw_file)
  463. p.communicate(remote_script)
  464. out.flush()
  465. # End resource usage metrics collection
  466. self.__end_logging()
  467. results = self.__parse_test(test_type)
  468. print("Benchmark results:")
  469. pprint(results)
  470. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  471. out.write( "Complete\n" )
  472. out.flush()
  473. for test_type in self.runTests:
  474. benchmark_type(test_type)
  475. ############################################################
  476. # End benchmark
  477. ############################################################
  478. ############################################################
  479. # parse_all
  480. # Method meant to be run for a given timestamp
  481. ############################################################
  482. def parse_all(self):
  483. for test_type in self.runTests:
  484. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  485. results = self.__parse_test(test_type)
  486. self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
  487. ##########################################################################################
  488. # Private Methods
  489. ##########################################################################################
  490. ############################################################
  491. # __parse_test(test_type)
  492. ############################################################
  493. def __parse_test(self, test_type):
  494. try:
  495. results = dict()
  496. results['results'] = []
  497. stats = []
  498. if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
  499. with open(self.benchmarker.output_file(self.name, test_type)) as raw_data:
  500. is_warmup = True
  501. rawData = None
  502. for line in raw_data:
  503. if "Queries:" in line or "Concurrency:" in line:
  504. is_warmup = False
  505. rawData = None
  506. continue
  507. if "Warmup" in line or "Primer" in line:
  508. is_warmup = True
  509. continue
  510. if not is_warmup:
  511. if rawData == None:
  512. rawData = dict()
  513. results['results'].append(rawData)
  514. #if "Requests/sec:" in line:
  515. # m = re.search("Requests/sec:\s+([0-9]+)", line)
  516. # rawData['reportedResults'] = m.group(1)
  517. # search for weighttp data such as succeeded and failed.
  518. if "Latency" in line:
  519. m = re.findall("([0-9]+\.*[0-9]*[us|ms|s|m|%]+)", line)
  520. if len(m) == 4:
  521. rawData['latencyAvg'] = m[0]
  522. rawData['latencyStdev'] = m[1]
  523. rawData['latencyMax'] = m[2]
  524. # rawData['latencyStdevPercent'] = m[3]
  525. #if "Req/Sec" in line:
  526. # m = re.findall("([0-9]+\.*[0-9]*[k|%]*)", line)
  527. # if len(m) == 4:
  528. # rawData['requestsAvg'] = m[0]
  529. # rawData['requestsStdev'] = m[1]
  530. # rawData['requestsMax'] = m[2]
  531. # rawData['requestsStdevPercent'] = m[3]
  532. #if "requests in" in line:
  533. # m = re.search("requests in ([0-9]+\.*[0-9]*[ms|s|m|h]+)", line)
  534. # if m != None:
  535. # # parse out the raw time, which may be in minutes or seconds
  536. # raw_time = m.group(1)
  537. # if "ms" in raw_time:
  538. # rawData['total_time'] = float(raw_time[:len(raw_time)-2]) / 1000.0
  539. # elif "s" in raw_time:
  540. # rawData['total_time'] = float(raw_time[:len(raw_time)-1])
  541. # elif "m" in raw_time:
  542. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 60.0
  543. # elif "h" in raw_time:
  544. # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 3600.0
  545. if "requests in" in line:
  546. m = re.search("([0-9]+) requests in", line)
  547. if m != None:
  548. rawData['totalRequests'] = int(m.group(1))
  549. if "Socket errors" in line:
  550. if "connect" in line:
  551. m = re.search("connect ([0-9]+)", line)
  552. rawData['connect'] = int(m.group(1))
  553. if "read" in line:
  554. m = re.search("read ([0-9]+)", line)
  555. rawData['read'] = int(m.group(1))
  556. if "write" in line:
  557. m = re.search("write ([0-9]+)", line)
  558. rawData['write'] = int(m.group(1))
  559. if "timeout" in line:
  560. m = re.search("timeout ([0-9]+)", line)
  561. rawData['timeout'] = int(m.group(1))
  562. if "Non-2xx" in line:
  563. m = re.search("Non-2xx or 3xx responses: ([0-9]+)", line)
  564. if m != None:
  565. rawData['5xx'] = int(m.group(1))
  566. if "STARTTIME" in line:
  567. m = re.search("[0-9]+", line)
  568. rawData["startTime"] = int(m.group(0))
  569. if "ENDTIME" in line:
  570. m = re.search("[0-9]+", line)
  571. rawData["endTime"] = int(m.group(0))
  572. test_stats = self.__parse_stats(test_type, rawData["startTime"], rawData["endTime"], 1)
  573. # rawData["averageStats"] = self.__calculate_average_stats(test_stats)
  574. stats.append(test_stats)
  575. with open(self.benchmarker.stats_file(self.name, test_type) + ".json", "w") as stats_file:
  576. json.dump(stats, stats_file, indent=2)
  577. return results
  578. except IOError:
  579. return None
  580. ############################################################
  581. # End benchmark
  582. ############################################################
  583. ############################################################
  584. # __generate_concurrency_script(url, port)
  585. # Generates the string containing the bash script that will
  586. # be run on the client to benchmark a single test. This
  587. # specifically works for the variable concurrency tests (JSON
  588. # and DB)
  589. ############################################################
  590. def __generate_concurrency_script(self, url, port, accept_header, wrk_command="wrk"):
  591. headers = self.headers_template.format(server_host=self.benchmarker.server_host, accept=accept_header)
  592. return self.concurrency_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  593. name=self.name, duration=self.benchmarker.duration,
  594. levels=" ".join("{}".format(item) for item in self.benchmarker.concurrency_levels),
  595. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command)
  596. ############################################################
  597. # __generate_pipeline_script(url, port)
  598. # Generates the string containing the bash script that will
  599. # be run on the client to benchmark a single pipeline test.
  600. ############################################################
  601. def __generate_pipeline_script(self, url, port, accept_header, wrk_command="wrk"):
  602. headers = self.headers_template.format(server_host=self.benchmarker.server_host, accept=accept_header)
  603. return self.pipeline_template.format(max_concurrency=max(self.benchmarker.pipeline_concurrency_levels),
  604. name=self.name, duration=self.benchmarker.duration,
  605. levels=" ".join("{}".format(item) for item in self.benchmarker.pipeline_concurrency_levels),
  606. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command,
  607. pipeline=16)
  608. ############################################################
  609. # __generate_query_script(url, port)
  610. # Generates the string containing the bash script that will
  611. # be run on the client to benchmark a single test. This
  612. # specifically works for the variable query tests (Query)
  613. ############################################################
  614. def __generate_query_script(self, url, port, accept_header, query_levels):
  615. headers = self.headers_template.format(server_host=self.benchmarker.server_host, accept=accept_header)
  616. return self.query_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
  617. name=self.name, duration=self.benchmarker.duration,
  618. levels=" ".join("{}".format(item) for item in query_levels),
  619. server_host=self.benchmarker.server_host, port=port, url=url, headers=headers)
  620. ############################################################
  621. # Returns True if any test type this this framework test will use a DB
  622. ############################################################
  623. def requires_database(self):
  624. '''Returns True/False if this test requires a database'''
  625. return any(tobj.requires_db for (ttype,tobj) in self.runTests.iteritems())
  626. ############################################################
  627. # __begin_logging
  628. # Starts a thread to monitor the resource usage, to be synced with the client's time
  629. # TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  630. ############################################################
  631. def __begin_logging(self, test_type):
  632. output_file = "{file_name}".format(file_name=self.benchmarker.get_stats_file(self.name, test_type))
  633. dstat_string = "dstat -Tafilmprs --aio --fs --ipc --lock --raw --socket --tcp \
  634. --raw --socket --tcp --udp --unix --vm --disk-util \
  635. --rpc --rpcd --output {output_file}".format(output_file=output_file)
  636. cmd = shlex.split(dstat_string)
  637. dev_null = open(os.devnull, "w")
  638. self.subprocess_handle = subprocess.Popen(cmd, stdout=dev_null, stderr=subprocess.STDOUT)
  639. ##############################################################
  640. # Begin __end_logging
  641. # Stops the logger thread and blocks until shutdown is complete.
  642. ##############################################################
  643. def __end_logging(self):
  644. self.subprocess_handle.terminate()
  645. self.subprocess_handle.communicate()
  646. ##############################################################
  647. # Begin __parse_stats
  648. # For each test type, process all the statistics, and return a multi-layered dictionary
  649. # that has a structure as follows:
  650. # (timestamp)
  651. # | (main header) - group that the stat is in
  652. # | | (sub header) - title of the stat
  653. # | | | (stat) - the stat itself, usually a floating point number
  654. ##############################################################
  655. def __parse_stats(self, test_type, start_time, end_time, interval):
  656. stats_dict = dict()
  657. stats_file = self.benchmarker.stats_file(self.name, test_type)
  658. with open(stats_file) as stats:
  659. while(stats.next() != "\n"): # dstat doesn't output a completely compliant CSV file - we need to strip the header
  660. pass
  661. stats_reader = csv.reader(stats)
  662. main_header = stats_reader.next()
  663. sub_header = stats_reader.next()
  664. time_row = sub_header.index("epoch")
  665. int_counter = 0
  666. for row in stats_reader:
  667. time = float(row[time_row])
  668. int_counter+=1
  669. if time < start_time:
  670. continue
  671. elif time > end_time:
  672. return stats_dict
  673. if int_counter % interval != 0:
  674. continue
  675. row_dict = dict()
  676. for nextheader in main_header:
  677. if nextheader != "":
  678. row_dict[nextheader] = dict()
  679. header = ""
  680. for item_num, column in enumerate(row):
  681. if(len(main_header[item_num]) != 0):
  682. header = main_header[item_num]
  683. row_dict[header][sub_header[item_num]] = float(column) # all the stats are numbers, so we want to make sure that they stay that way in json
  684. stats_dict[time] = row_dict
  685. return stats_dict
  686. ##############################################################
  687. # End __parse_stats
  688. ##############################################################
  689. def __getattr__(self, name):
  690. """For backwards compatibility, we used to pass benchmarker
  691. as the argument to the setup.sh files"""
  692. try:
  693. x = getattr(self.benchmarker, name)
  694. except AttributeError:
  695. print("AttributeError: {!s} not a member of FrameworkTest or Benchmarker".format(name))
  696. print("This is probably a bug")
  697. raise
  698. return x
  699. ##############################################################
  700. # Begin __calculate_average_stats
  701. # We have a large amount of raw data for the statistics that
  702. # may be useful for the stats nerds, but most people care about
  703. # a couple of numbers. For now, we're only going to supply:
  704. # * Average CPU
  705. # * Average Memory
  706. # * Total network use
  707. # * Total disk use
  708. # More may be added in the future. If they are, please update
  709. # the above list.
  710. # Note: raw_stats is directly from the __parse_stats method.
  711. # Recall that this consists of a dictionary of timestamps,
  712. # each of which contain a dictionary of stat categories which
  713. # contain a dictionary of stats
  714. ##############################################################
  715. def __calculate_average_stats(self, raw_stats):
  716. raw_stat_collection = dict()
  717. for timestamp, time_dict in raw_stats.items():
  718. for main_header, sub_headers in time_dict.items():
  719. item_to_append = None
  720. if 'cpu' in main_header:
  721. # We want to take the idl stat and subtract it from 100
  722. # to get the time that the CPU is NOT idle.
  723. item_to_append = sub_headers['idl'] - 100.0
  724. elif main_header == 'memory usage':
  725. item_to_append = sub_headers['used']
  726. elif 'net' in main_header:
  727. # Network stats have two parts - recieve and send. We'll use a tuple of
  728. # style (recieve, send)
  729. item_to_append = (sub_headers['recv'], sub_headers['send'])
  730. elif 'dsk' or 'io' in main_header:
  731. # Similar for network, except our tuple looks like (read, write)
  732. item_to_append = (sub_headers['read'], sub_headers['writ'])
  733. if item_to_append is not None:
  734. if main_header not in raw_stat_collection:
  735. raw_stat_collection[main_header] = list()
  736. raw_stat_collection[main_header].append(item_to_append)
  737. # Simple function to determine human readable size
  738. # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
  739. def sizeof_fmt(num):
  740. # We'll assume that any number we get is convertable to a float, just in case
  741. num = float(num)
  742. for x in ['bytes','KB','MB','GB']:
  743. if num < 1024.0 and num > -1024.0:
  744. return "%3.1f%s" % (num, x)
  745. num /= 1024.0
  746. return "%3.1f%s" % (num, 'TB')
  747. # Now we have our raw stats in a readable format - we need to format it for display
  748. # We need a floating point sum, so the built in sum doesn't cut it
  749. display_stat_collection = dict()
  750. for header, values in raw_stat_collection.items():
  751. display_stat = None
  752. if 'cpu' in header:
  753. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  754. elif main_header == 'memory usage':
  755. display_stat = sizeof_fmt(math.fsum(values) / len(values))
  756. elif 'net' in main_header:
  757. receive, send = zip(*values) # unzip
  758. display_stat = {'receive': sizeof_fmt(math.fsum(receive)), 'send': sizeof_fmt(math.fsum(send))}
  759. else: # if 'dsk' or 'io' in header:
  760. read, write = zip(*values) # unzip
  761. display_stat = {'read': sizeof_fmt(math.fsum(read)), 'write': sizeof_fmt(math.fsum(write))}
  762. display_stat_collection[header] = display_stat
  763. return display_stat
  764. ###########################################################################################
  765. # End __calculate_average_stats
  766. #########################################################################################
  767. ##########################################################################################
  768. # Constructor
  769. ##########################################################################################
  770. def __init__(self, name, directory, benchmarker, runTests, args):
  771. self.name = name
  772. self.directory = directory
  773. self.benchmarker = benchmarker
  774. self.runTests = runTests
  775. self.fwroot = benchmarker.fwroot
  776. self.approach = ""
  777. self.classification = ""
  778. self.database = ""
  779. self.framework = ""
  780. self.language = ""
  781. self.orm = ""
  782. self.platform = ""
  783. self.webserver = ""
  784. self.os = ""
  785. self.database_os = ""
  786. self.display_name = ""
  787. self.notes = ""
  788. self.versus = ""
  789. # setup logging
  790. logging.basicConfig(stream=sys.stderr, level=logging.INFO)
  791. self.install_root="%s/%s" % (self.fwroot, "installs")
  792. # Used in setup.sh scripts for consistency with
  793. # the bash environment variables
  794. self.troot = self.directory
  795. self.iroot = self.install_root
  796. self.__dict__.update(args)
  797. ############################################################
  798. # End __init__
  799. ############################################################
  800. ############################################################
  801. # End FrameworkTest
  802. ############################################################
  803. # Static methods
  804. def test_order(type_name):
  805. """
  806. This sort ordering is set up specifically to return the length
  807. of the test name. There were SO many problems involved with
  808. 'plaintext' being run first (rather, just not last) that we
  809. needed to ensure that it was run last for every framework.
  810. """
  811. return len(type_name)
  812. def validate_urls(test_name, test_keys):
  813. """
  814. Separated from validate_test because urls are not required anywhere. We know a url is incorrect if it is
  815. empty or does not start with a "/" character. There is no validation done to ensure the url conforms to
  816. the suggested url specifications, although those suggestions are presented if a url fails validation here.
  817. """
  818. example_urls = {
  819. "json_url": "/json",
  820. "db_url": "/mysql/db",
  821. "query_url": "/mysql/queries?queries= or /mysql/queries/",
  822. "fortune_url": "/mysql/fortunes",
  823. "update_url": "/mysql/updates?queries= or /mysql/updates/",
  824. "plaintext_url": "/plaintext",
  825. "cached_query_url": "/mysql/cached_queries?queries= or /mysql/cached_queries"
  826. }
  827. for test_url in ["json_url","db_url","query_url","fortune_url","update_url","plaintext_url","cached_query_url"]:
  828. key_value = test_keys.get(test_url, None)
  829. if key_value != None and not key_value.startswith('/'):
  830. errmsg = """`%s` field in test \"%s\" does not appear to be a valid url: \"%s\"\n
  831. Example `%s` url: \"%s\"
  832. """ % (test_url, test_name, key_value, test_url, example_urls[test_url])
  833. raise Exception(errmsg)
  834. def validate_test(test_name, test_keys, directory):
  835. """
  836. Validate benchmark config values for this test based on a schema
  837. """
  838. # Ensure that each FrameworkTest has a framework property, inheriting from top-level if not
  839. if not test_keys['framework']:
  840. test_keys['framework'] = config['framework']
  841. recommended_lang = directory.split('/')[-2]
  842. windows_url = "https://github.com/TechEmpower/FrameworkBenchmarks/issues/1038"
  843. schema = {
  844. 'language': {
  845. 'help': ('language', 'The language of the framework used, suggestion: %s' % recommended_lang)
  846. },
  847. 'webserver': {
  848. 'help': ('webserver', 'Name of the webserver also referred to as the "front-end server"')
  849. },
  850. 'classification': {
  851. 'allowed': [
  852. ('Fullstack', '...'),
  853. ('Micro', '...'),
  854. ('Platform', '...')
  855. ]
  856. },
  857. 'database': {
  858. 'allowed': [
  859. ('MySQL', 'One of the most popular databases around the web and in TFB'),
  860. ('Postgres', 'An advanced SQL database with a larger feature set than MySQL'),
  861. ('MongoDB', 'A popular document-store database'),
  862. ('Cassandra', 'A highly performant and scalable NoSQL database'),
  863. ('Elasticsearch', 'A distributed RESTful search engine that is used as a database for TFB tests'),
  864. ('Redis', 'An open-sourced, BSD licensed, advanced key-value cache and store'),
  865. ('SQLite', 'A network-less database, still supported for backwards compatibility'),
  866. ('SQLServer', 'Microsoft\'s SQL implementation'),
  867. ('None', 'No database was used for these tests, as is the case with Json Serialization and Plaintext')
  868. ]
  869. },
  870. 'approach': {
  871. 'allowed': [
  872. ('Realistic', '...'),
  873. ('Stripped', '...')
  874. ]
  875. },
  876. 'orm': {
  877. 'allowed': [
  878. ('Full', 'Has a full suite of features like lazy loading, caching, multiple language support, sometimes pre-configured with scripts.'),
  879. ('Micro', 'Has basic database driver capabilities such as establishing a connection and sending queries.'),
  880. ('Raw', 'Tests that do not use an ORM will be classified as "raw" meaning they use the platform\'s raw database connectivity.')
  881. ]
  882. },
  883. 'platform': {
  884. 'help': ('platform', 'Name of the platform this framework runs on, e.g. Node.js, PyPy, hhvm, JRuby ...')
  885. },
  886. 'framework': {
  887. # Guranteed to be here and correct at this point
  888. # key is left here to produce the set of required keys
  889. },
  890. 'os': {
  891. 'allowed': [
  892. ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
  893. ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
  894. ]
  895. },
  896. 'database_os': {
  897. 'allowed': [
  898. ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
  899. ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
  900. ]
  901. }
  902. }
  903. # Confirm required keys are present
  904. required_keys = schema.keys()
  905. missing = list(set(required_keys) - set(test_keys))
  906. if len(missing) > 0:
  907. missingstr = (", ").join(map(str, missing))
  908. raise Exception("benchmark_config.json for test %s is invalid, please amend by adding the following required keys: [%s]"
  909. % (test_name, missingstr))
  910. # Check the (all optional) test urls
  911. validate_urls(test_name, test_keys)
  912. # Check values of keys against schema
  913. for key in required_keys:
  914. val = test_keys.get(key, "").lower()
  915. has_predefined_acceptables = 'allowed' in schema[key]
  916. if has_predefined_acceptables:
  917. allowed = schema[key].get('allowed', [])
  918. acceptable_values, descriptors = zip(*allowed)
  919. acceptable_values = [a.lower() for a in acceptable_values]
  920. if val not in acceptable_values:
  921. msg = ("Invalid `%s` value specified for test \"%s\" in framework \"%s\"; suggestions:\n"
  922. % (key, test_name, test_keys['framework']))
  923. helpinfo = ('\n').join([" `%s` -- %s" % (v, desc) for (v, desc) in zip(acceptable_values, descriptors)])
  924. fullerr = msg + helpinfo + "\n"
  925. raise Exception(fullerr)
  926. elif not has_predefined_acceptables and val == "":
  927. msg = ("Value for `%s` in test \"%s\" in framework \"%s\" was missing:\n"
  928. % (key, test_name, test_keys['framework']))
  929. helpinfo = " %s -- %s" % schema[key]['help']
  930. fullerr = msg + helpinfo + '\n'
  931. raise Exception(fullerr)
  932. def parse_config(config, directory, benchmarker):
  933. """
  934. Parses a config file into a list of FrameworkTest objects
  935. """
  936. tests = []
  937. # The config object can specify multiple tests
  938. # Loop over them and parse each into a FrameworkTest
  939. for test in config['tests']:
  940. tests_to_run = [name for (name,keys) in test.iteritems()]
  941. if "default" not in tests_to_run:
  942. logging.warn("Framework %s does not define a default test in benchmark_config.json", config['framework'])
  943. # Check that each test configuration is acceptable
  944. # Throw exceptions if a field is missing, or how to improve the field
  945. for test_name, test_keys in test.iteritems():
  946. # Validates the benchmark_config entry
  947. validate_test(test_name, test_keys, directory)
  948. # Map test type to a parsed FrameworkTestType object
  949. runTests = dict()
  950. for type_name, type_obj in benchmarker.types.iteritems():
  951. try:
  952. # Makes a FrameWorkTestType object using some of the keys in config
  953. # e.g. JsonTestType uses "json_url"
  954. runTests[type_name] = type_obj.copy().parse(test_keys)
  955. except AttributeError as ae:
  956. # This is quite common - most tests don't support all types
  957. # Quitely log it and move on (debug logging is on in travis and this causes
  958. # ~1500 lines of debug, so I'm totally ignoring it for now
  959. # logging.debug("Missing arguments for test type %s for framework test %s", type_name, test_name)
  960. pass
  961. # We need to sort by test_type to run
  962. sortedTestKeys = sorted(runTests.keys(), key=test_order)
  963. sortedRunTests = OrderedDict()
  964. for sortedTestKey in sortedTestKeys:
  965. sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
  966. # Prefix all test names with framework except 'default' test
  967. # Done at the end so we may still refer to the primary test as `default` in benchmark config error messages
  968. if test_name == 'default':
  969. test_name = config['framework']
  970. else:
  971. test_name = "%s-%s" % (config['framework'], test_name)
  972. # By passing the entire set of keys, each FrameworkTest will have a member for each key
  973. tests.append(FrameworkTest(test_name, directory, benchmarker, sortedRunTests, test_keys))
  974. return tests