benchmarker.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. from toolset.utils.output_helper import header
  2. from toolset.utils.metadata_helper import gather_tests, gather_remaining_tests
  3. from toolset.utils import docker_helper
  4. import os
  5. import subprocess
  6. import traceback
  7. import sys
  8. import logging
  9. import socket
  10. import time
  11. import json
  12. import shlex
  13. from pprint import pprint
  14. from multiprocessing import Process
  15. class Benchmarker:
  16. def __init__(self, config, results):
  17. '''
  18. Initialize the benchmarker.
  19. '''
  20. self.config = config
  21. self.results = results
  22. ##########################################################################################
  23. # Public methods
  24. ##########################################################################################
  25. def run(self):
  26. '''
  27. This process involves setting up the client/server machines
  28. with any necessary change. Then going through each test,
  29. running their setup script, verifying the URLs, and
  30. running benchmarks against them.
  31. '''
  32. # Generate metadata
  33. self.__run_list_test_metadata()
  34. # Get a list of all known tests that we can run.
  35. all_tests = gather_remaining_tests(self.config, self.results)
  36. # Setup client/server
  37. print(
  38. header(
  39. "Preparing Server, Database, and Client ...",
  40. top='=',
  41. bottom='='))
  42. with self.config.quiet_out.enable():
  43. self.__setup_server()
  44. self.__setup_database()
  45. self.__setup_client()
  46. # Run tests
  47. print(header("Running Tests...", top='=', bottom='='))
  48. result = self.__run_tests(all_tests)
  49. # Parse results
  50. if self.config.mode == "benchmark":
  51. print(header("Parsing Results ...", top='=', bottom='='))
  52. self.results.parse(all_tests)
  53. self.results.set_completion_time()
  54. self.results.upload()
  55. self.results.finish()
  56. return result
  57. ##########################################################################################
  58. # Private methods
  59. ##########################################################################################
  60. def __benchmark(self, framework_test, logPath):
  61. '''
  62. Runs the benchmark for each type of test that it implements
  63. '''
  64. def benchmark_type(test_type):
  65. benchmarkPath = os.path.join(logPath, test_type)
  66. try:
  67. os.makedirs(benchmarkPath)
  68. except OSError:
  69. pass
  70. with open(os.path.join(benchmarkPath, 'benchmark.txt'),
  71. 'w') as out:
  72. out.write("BENCHMARKING %s ... " % test_type.upper())
  73. test = framework_test.runTests[test_type]
  74. test.setup_out(out)
  75. raw_file = self.results.get_raw_file(framework_test.name,
  76. test_type)
  77. if not os.path.exists(raw_file):
  78. # Open to create the empty file
  79. with open(raw_file, 'w'):
  80. pass
  81. if not test.failed:
  82. remote_script = self.config.types[
  83. test_type].get_remote_script(self.config, test.name,
  84. test.get_url(),
  85. framework_test.port)
  86. # Begin resource usage metrics collection
  87. self.__begin_logging(framework_test, test_type)
  88. # Run the benchmark
  89. with open(raw_file, 'w') as raw_file:
  90. p = subprocess.Popen(
  91. self.config.client_ssh_string.split(" "),
  92. stdin=subprocess.PIPE,
  93. stdout=raw_file,
  94. stderr=raw_file)
  95. p.communicate(remote_script)
  96. out.flush()
  97. # End resource usage metrics collection
  98. self.__end_logging()
  99. results = self.results.parse_test(framework_test, test_type)
  100. print("Benchmark results:")
  101. pprint(results)
  102. self.results.report_benchmark_results(
  103. framework_test, test_type, results['results'])
  104. out.write("Complete\n")
  105. out.flush()
  106. for test_type in framework_test.runTests:
  107. benchmark_type(test_type)
  108. def __run_list_test_metadata(self):
  109. '''
  110. Prints the metadata for all the available tests
  111. '''
  112. all_tests = gather_tests(benchmarker_config=self.config)
  113. all_tests_json = json.dumps(map(lambda test: {
  114. "name": test.name,
  115. "approach": test.approach,
  116. "classification": test.classification,
  117. "database": test.database,
  118. "framework": test.framework,
  119. "language": test.language,
  120. "orm": test.orm,
  121. "platform": test.platform,
  122. "webserver": test.webserver,
  123. "os": test.os,
  124. "database_os": test.database_os,
  125. "display_name": test.display_name,
  126. "notes": test.notes,
  127. "versus": test.versus
  128. }, all_tests))
  129. with open(
  130. os.path.join(self.results.directory, "test_metadata.json"),
  131. "w") as f:
  132. f.write(all_tests_json)
  133. def __begin_logging(self, framework_test, test_type):
  134. '''
  135. Starts a thread to monitor the resource usage, to be synced with the
  136. client's time.
  137. TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  138. '''
  139. output_file = "{file_name}".format(
  140. file_name=self.results.get_stats_file(framework_test.name,
  141. test_type))
  142. dstat_string = "dstat -Tafilmprs --aio --fs --ipc --lock --raw --socket --tcp \
  143. --raw --socket --tcp --udp --unix --vm --disk-util \
  144. --rpc --rpcd --output {output_file}".format(
  145. output_file=output_file)
  146. cmd = shlex.split(dstat_string)
  147. dev_null = open(os.devnull, "w")
  148. self.subprocess_handle = subprocess.Popen(
  149. cmd, stdout=dev_null, stderr=subprocess.STDOUT)
  150. def __end_logging(self):
  151. '''
  152. Stops the logger thread and blocks until shutdown is complete.
  153. '''
  154. self.subprocess_handle.terminate()
  155. self.subprocess_handle.communicate()
  156. def __setup_server(self):
  157. '''
  158. Makes any necessary changes to the server that should be
  159. made before running the tests. This involves setting kernal
  160. settings to allow for more connections, or more file
  161. descriptiors
  162. http://redmine.lighttpd.net/projects/weighttp/wiki#Troubleshooting
  163. '''
  164. try:
  165. if os.name == 'nt':
  166. return True
  167. subprocess.call(
  168. ['sudo', 'sysctl', '-w', 'net.ipv4.tcp_max_syn_backlog=65535'],
  169. stdout=self.config.quiet_out,
  170. stderr=subprocess.STDOUT)
  171. subprocess.call(
  172. ['sudo', 'sysctl', '-w', 'net.core.somaxconn=65535'],
  173. stdout=self.config.quiet_out,
  174. stderr=subprocess.STDOUT)
  175. subprocess.call(
  176. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_reuse=1'],
  177. stdout=self.config.quiet_out,
  178. stderr=subprocess.STDOUT)
  179. subprocess.call(
  180. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_recycle=1'],
  181. stdout=self.config.quiet_out,
  182. stderr=subprocess.STDOUT)
  183. subprocess.call(
  184. ['sudo', 'sysctl', '-w', 'kernel.shmmax=134217728'],
  185. stdout=self.config.quiet_out,
  186. stderr=subprocess.STDOUT)
  187. subprocess.call(
  188. ['sudo', 'sysctl', '-w', 'kernel.shmall=2097152'],
  189. stdout=self.config.quiet_out,
  190. stderr=subprocess.STDOUT)
  191. with open(os.path.join(self.results.directory, 'sysctl.txt'),
  192. 'w') as f:
  193. f.write(subprocess.check_output(['sudo', 'sysctl', '-a']))
  194. except subprocess.CalledProcessError:
  195. return False
  196. def __setup_database(self):
  197. '''
  198. Makes any necessary changes to the database machine that should be made
  199. before running the tests. Is very similar to the server setup, but may also
  200. include database specific changes.
  201. Explanations:
  202. net.ipv4.tcp_max_syn_backlog, net.core.somaxconn, kernel.sched_autogroup_enabled: http://tweaked.io/guide/kernel/
  203. ulimit -n: http://www.cyberciti.biz/faq/linux-increase-the-maximum-number-of-open-files/
  204. net.ipv4.tcp_tw_*: http://www.linuxbrigade.com/reduce-time_wait-socket-connections/
  205. kernel.shm*: http://seriousbirder.com/blogs/linux-understanding-shmmax-and-shmall-settings/
  206. For kernel.sem: https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/5/html/Tuning_and_Optimizing_Red_Hat_Enterprise_Linux_for_Oracle_9i_and_10g_Databases/chap-Oracle_9i_and_10g_Tuning_Guide-Setting_Semaphores.html
  207. '''
  208. p = subprocess.Popen(
  209. self.config.database_ssh_string,
  210. stdin=subprocess.PIPE,
  211. shell=True,
  212. stdout=self.config.quiet_out,
  213. stderr=subprocess.STDOUT)
  214. p.communicate("""
  215. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  216. sudo sysctl -w net.core.somaxconn=65535
  217. sudo sysctl -w kernel.sched_autogroup_enabled=0
  218. sudo -s ulimit -n 65535
  219. sudo sysctl net.ipv4.tcp_tw_reuse=1
  220. sudo sysctl net.ipv4.tcp_tw_recycle=1
  221. sudo sysctl -w kernel.shmmax=2147483648
  222. sudo sysctl -w kernel.shmall=2097152
  223. sudo sysctl -w kernel.sem="250 32000 256 512"
  224. """)
  225. # TODO - print kernel configuration to file
  226. # echo "Printing kernel configuration:" && sudo sysctl -a
  227. def __setup_client(self):
  228. '''
  229. Makes any necessary changes to the client machine that should be made
  230. before running the tests. Is very similar to the server setup, but may also
  231. include client specific changes.
  232. '''
  233. p = subprocess.Popen(
  234. self.config.client_ssh_string,
  235. stdin=subprocess.PIPE,
  236. shell=True,
  237. stdout=self.config.quiet_out,
  238. stderr=subprocess.STDOUT)
  239. p.communicate("""
  240. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  241. sudo sysctl -w net.core.somaxconn=65535
  242. sudo -s ulimit -n 65535
  243. sudo sysctl net.ipv4.tcp_tw_reuse=1
  244. sudo sysctl net.ipv4.tcp_tw_recycle=1
  245. sudo sysctl -w kernel.shmmax=2147483648
  246. sudo sysctl -w kernel.shmall=2097152
  247. """)
  248. def __run_tests(self, tests):
  249. '''
  250. Calls each test passed in tests to __run_test in a separate process.
  251. Each test is given a set amount of time and if kills the child process
  252. (and subsequently all of its child processes).
  253. '''
  254. if len(tests) == 0:
  255. return 0
  256. logging.debug("Start __run_tests.")
  257. logging.debug("__name__ = %s", __name__)
  258. error_happened = False
  259. if self.config.os.lower() == 'windows':
  260. logging.debug("Executing __run_tests on Windows")
  261. for test in tests:
  262. with self.config.quiet_out.enable():
  263. if self.__run_test(test) != 0:
  264. error_happened = True
  265. else:
  266. logging.debug("Executing __run_tests on Linux")
  267. # These features do not work on Windows
  268. for test in tests:
  269. print(header("Running Test: %s" % test.name))
  270. with self.config.quiet_out.enable():
  271. test_process = Process(
  272. target=self.__run_test,
  273. name="Test Runner (%s)" % test.name,
  274. args=(test, ))
  275. test_process.start()
  276. test_process.join(self.config.run_test_timeout_seconds)
  277. # Load intermediate result from child process
  278. self.results.load()
  279. if (test_process.is_alive()):
  280. logging.debug(
  281. "Child process for {name} is still alive. Terminating.".
  282. format(name=test.name))
  283. self.results.write_intermediate(
  284. test.name, "__run_test timeout (=" +
  285. str(self.config.run_test_timeout_seconds) +
  286. " seconds)")
  287. test_process.terminate()
  288. test_process.join()
  289. if test_process.exitcode != 0:
  290. error_happened = True
  291. logging.debug("End __run_tests.")
  292. if error_happened:
  293. return 1
  294. return 0
  295. def __run_test(self, test):
  296. '''
  297. Ensures that the system has all necessary software to run the tests.
  298. This does not include that software for the individual test, but covers
  299. software such as curl and weighttp that are needed.
  300. '''
  301. logDir = os.path.join(self.results.directory, test.name.lower())
  302. try:
  303. os.makedirs(logDir)
  304. except Exception:
  305. pass
  306. with open(os.path.join(logDir, 'out.txt'), 'w') as out:
  307. if test.os.lower() != self.config.os.lower(
  308. ) or test.database_os.lower() != self.config.database_os.lower():
  309. out.write(
  310. "OS or Database OS specified in benchmark_config.json does not match the current environment. Skipping.\n"
  311. )
  312. return sys.exit(0)
  313. # If the test is in the excludes list, we skip it
  314. if self.config.exclude != None and test.name in self.config.exclude:
  315. out.write(
  316. "Test {name} has been added to the excludes list. Skipping.\n".
  317. format(name=test.name))
  318. return sys.exit(0)
  319. database_container_id = None
  320. try:
  321. if self.__is_port_bound(test.port):
  322. time.sleep(60)
  323. if self.__is_port_bound(test.port):
  324. # We gave it our all
  325. self.results.write_intermediate(test.name, "port " + str(
  326. test.port) + " is not available before start")
  327. out.write(
  328. header(
  329. "Error: Port %s is not available, cannot start %s"
  330. % (test.port, test.name)))
  331. out.flush()
  332. print("Error: Unable to recover port, cannot start test")
  333. return sys.exit(1)
  334. # Start database container
  335. if test.database.lower() != "none":
  336. database_container_id = docker_helper.start_database(
  337. self.config, test.database.lower())
  338. if not database_container_id:
  339. out.write(
  340. "ERROR: Problem building/running database container"
  341. )
  342. out.flush()
  343. self.results.write_intermediate(
  344. test.name, "ERROR: Problem starting")
  345. return sys.exit(1)
  346. # Start webapp
  347. result = test.start(out, database_container_id)
  348. if result != 0:
  349. docker_helper.stop(self.config, database_container_id,
  350. test, out)
  351. time.sleep(5)
  352. out.write("ERROR: Problem starting {name}\n".format(
  353. name=test.name))
  354. out.flush()
  355. self.results.write_intermediate(test.name,
  356. "ERROR: Problem starting")
  357. return sys.exit(1)
  358. logging.info("Sleeping %s seconds to ensure framework is ready"
  359. % self.config.sleep)
  360. time.sleep(self.config.sleep)
  361. # Verify URLs
  362. if self.config.mode == "debug":
  363. logging.info(
  364. "Entering debug mode. Server has started. CTRL-c to stop."
  365. )
  366. while True:
  367. time.sleep(1)
  368. else:
  369. logging.info("Verifying framework URLs")
  370. passed_verify = test.verify_urls(logDir)
  371. # Benchmark this test
  372. if self.config.mode == "benchmark":
  373. logging.info("Benchmarking")
  374. out.write(header("Benchmarking %s" % test.name))
  375. out.flush()
  376. self.__benchmark(test, logDir)
  377. # Stop this test
  378. docker_helper.stop(self.config, database_container_id, test,
  379. out)
  380. # Remove contents of /tmp folder
  381. try:
  382. subprocess.check_call(
  383. 'sudo rm -rf /tmp/*',
  384. shell=True,
  385. stderr=out,
  386. stdout=out)
  387. except Exception:
  388. out.write(header("Error: Could not empty /tmp"))
  389. # Save results thus far into the latest results directory
  390. self.results.write_intermediate(test.name,
  391. time.strftime(
  392. "%Y%m%d%H%M%S",
  393. time.localtime()))
  394. # Upload the results thus far to another server (optional)
  395. self.results.upload()
  396. if self.config.mode == "verify" and not passed_verify:
  397. print("Failed verify!")
  398. return sys.exit(1)
  399. except KeyboardInterrupt:
  400. docker_helper.stop(self.config, database_container_id, test,
  401. out)
  402. except (OSError, IOError, subprocess.CalledProcessError) as e:
  403. traceback.print_exc()
  404. self.results.write_intermediate(
  405. test.name, "error during test setup: " + str(e))
  406. out.write(header("Subprocess Error %s" % test.name))
  407. traceback.print_exc(file=out)
  408. out.flush()
  409. out.close()
  410. return sys.exit(1)
  411. out.close()
  412. return sys.exit(0)
  413. def __is_port_bound(self, port):
  414. '''
  415. Check if the requested port is available. If it isn't available, then a
  416. previous test probably didn't shutdown properly.
  417. '''
  418. port = int(port)
  419. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  420. try:
  421. # Try to bind to all IP addresses, this port
  422. s.bind(("", port))
  423. # If we get here, we were able to bind successfully,
  424. # which means the port is free.
  425. except socket.error:
  426. # If we get an exception, it might be because the port is still bound
  427. # which would be bad, or maybe it is a privileged port (<1024) and we
  428. # are not running as root, or maybe the server is gone, but sockets are
  429. # still in TIME_WAIT (SO_REUSEADDR). To determine which scenario, try to
  430. # connect.
  431. try:
  432. s.connect(("127.0.0.1", port))
  433. # If we get here, we were able to connect to something, which means
  434. # that the port is still bound.
  435. return True
  436. except socket.error:
  437. # An exception means that we couldn't connect, so a server probably
  438. # isn't still running on the port.
  439. pass
  440. finally:
  441. s.close()
  442. return False