benchmarker.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. from toolset.utils.output_helper import header, tee_output
  2. from toolset.utils.metadata_helper import gather_tests, gather_remaining_tests
  3. from toolset.utils import docker_helper
  4. import os
  5. import subprocess
  6. import traceback
  7. import sys
  8. import logging
  9. import socket
  10. import time
  11. import json
  12. import shlex
  13. from pprint import pprint
  14. from multiprocessing import Process
  15. class Benchmarker:
  16. def __init__(self, config, results):
  17. '''
  18. Initialize the benchmarker.
  19. '''
  20. self.config = config
  21. self.results = results
  22. ##########################################################################################
  23. # Public methods
  24. ##########################################################################################
  25. def run(self):
  26. '''
  27. This process involves setting up the client/server machines
  28. with any necessary change. Then going through each test,
  29. running their setup script, verifying the URLs, and
  30. running benchmarks against them.
  31. '''
  32. # Generate metadata
  33. self.__run_list_test_metadata()
  34. # Get a list of all known tests that we can run.
  35. all_tests = gather_remaining_tests(self.config, self.results)
  36. # Setup client/server
  37. print(
  38. header(
  39. "Preparing Server, Database, and Client ...",
  40. top='=',
  41. bottom='='))
  42. with self.config.quiet_out.enable():
  43. self.__setup_server()
  44. self.__setup_database()
  45. self.__setup_client()
  46. # Run tests
  47. print(header("Running Tests...", top='=', bottom='='))
  48. result = self.__run_tests(all_tests)
  49. # Parse results
  50. if self.config.mode == "benchmark":
  51. print(header("Parsing Results ...", top='=', bottom='='))
  52. self.results.parse(all_tests)
  53. self.results.set_completion_time()
  54. self.results.upload()
  55. self.results.finish()
  56. return result
  57. ##########################################################################################
  58. # Private methods
  59. ##########################################################################################
  60. def __benchmark(self, framework_test, logPath):
  61. '''
  62. Runs the benchmark for each type of test that it implements
  63. '''
  64. def benchmark_type(test_type):
  65. benchmarkPath = os.path.join(logPath, test_type)
  66. try:
  67. os.makedirs(benchmarkPath)
  68. except OSError:
  69. pass
  70. with open(os.path.join(benchmarkPath, 'benchmark.txt'),
  71. 'w') as out:
  72. out.write("BENCHMARKING %s ... " % test_type.upper())
  73. test = framework_test.runTests[test_type]
  74. test.setup_out(out)
  75. raw_file = self.results.get_raw_file(framework_test.name,
  76. test_type)
  77. if not os.path.exists(raw_file):
  78. # Open to create the empty file
  79. with open(raw_file, 'w'):
  80. pass
  81. if not test.failed:
  82. remote_script = self.config.types[
  83. test_type].get_remote_script(self.config, test.name,
  84. test.get_url(),
  85. framework_test.port)
  86. # Begin resource usage metrics collection
  87. self.__begin_logging(framework_test, test_type)
  88. # Run the benchmark
  89. with open(raw_file, 'w') as raw_file:
  90. p = subprocess.Popen(
  91. self.config.client_ssh_command,
  92. stdin=subprocess.PIPE,
  93. stdout=raw_file,
  94. stderr=raw_file)
  95. p.communicate(remote_script)
  96. out.flush()
  97. # End resource usage metrics collection
  98. self.__end_logging()
  99. results = self.results.parse_test(framework_test, test_type)
  100. print("Benchmark results:")
  101. pprint(results)
  102. self.results.report_benchmark_results(
  103. framework_test, test_type, results['results'])
  104. out.write("Complete\n")
  105. out.flush()
  106. for test_type in framework_test.runTests:
  107. benchmark_type(test_type)
  108. def __run_list_test_metadata(self):
  109. '''
  110. Prints the metadata for all the available tests
  111. '''
  112. all_tests = gather_tests(benchmarker_config=self.config)
  113. all_tests_json = json.dumps(map(lambda test: {
  114. "name": test.name,
  115. "approach": test.approach,
  116. "classification": test.classification,
  117. "database": test.database,
  118. "framework": test.framework,
  119. "language": test.language,
  120. "orm": test.orm,
  121. "platform": test.platform,
  122. "webserver": test.webserver,
  123. "os": test.os,
  124. "database_os": test.database_os,
  125. "display_name": test.display_name,
  126. "notes": test.notes,
  127. "versus": test.versus
  128. }, all_tests))
  129. with open(
  130. os.path.join(self.results.directory, "test_metadata.json"),
  131. "w") as f:
  132. f.write(all_tests_json)
  133. def __begin_logging(self, framework_test, test_type):
  134. '''
  135. Starts a thread to monitor the resource usage, to be synced with the
  136. client's time.
  137. TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  138. '''
  139. output_file = "{file_name}".format(
  140. file_name=self.results.get_stats_file(framework_test.name,
  141. test_type))
  142. dstat_string = "dstat -Tafilmprs --aio --fs --ipc --lock --raw --socket --tcp \
  143. --raw --socket --tcp --udp --unix --vm --disk-util \
  144. --rpc --rpcd --output {output_file}".format(
  145. output_file=output_file)
  146. cmd = shlex.split(dstat_string)
  147. dev_null = open(os.devnull, "w")
  148. self.subprocess_handle = subprocess.Popen(
  149. cmd, stdout=dev_null, stderr=subprocess.STDOUT)
  150. def __end_logging(self):
  151. '''
  152. Stops the logger thread and blocks until shutdown is complete.
  153. '''
  154. self.subprocess_handle.terminate()
  155. self.subprocess_handle.communicate()
  156. def __setup_server(self):
  157. '''
  158. Makes any necessary changes to the server that should be
  159. made before running the tests. This involves setting kernal
  160. settings to allow for more connections, or more file
  161. descriptiors
  162. http://redmine.lighttpd.net/projects/weighttp/wiki#Troubleshooting
  163. '''
  164. try:
  165. if os.name == 'nt':
  166. return True
  167. subprocess.call(
  168. ['sudo', 'sysctl', '-w', 'net.ipv4.tcp_max_syn_backlog=65535'],
  169. stdout=self.config.quiet_out,
  170. stderr=subprocess.STDOUT)
  171. subprocess.call(
  172. ['sudo', 'sysctl', '-w', 'net.core.somaxconn=65535'],
  173. stdout=self.config.quiet_out,
  174. stderr=subprocess.STDOUT)
  175. subprocess.call(
  176. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_reuse=1'],
  177. stdout=self.config.quiet_out,
  178. stderr=subprocess.STDOUT)
  179. subprocess.call(
  180. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_recycle=1'],
  181. stdout=self.config.quiet_out,
  182. stderr=subprocess.STDOUT)
  183. subprocess.call(
  184. ['sudo', 'sysctl', '-w', 'kernel.shmmax=134217728'],
  185. stdout=self.config.quiet_out,
  186. stderr=subprocess.STDOUT)
  187. subprocess.call(
  188. ['sudo', 'sysctl', '-w', 'kernel.shmall=2097152'],
  189. stdout=self.config.quiet_out,
  190. stderr=subprocess.STDOUT)
  191. with open(os.path.join(self.results.directory, 'sysctl.txt'),
  192. 'w') as f:
  193. f.write(subprocess.check_output(['sudo', 'sysctl', '-a']))
  194. except subprocess.CalledProcessError:
  195. return False
  196. def __setup_database(self):
  197. '''
  198. Makes any necessary changes to the database machine that should be made
  199. before running the tests. Is very similar to the server setup, but may also
  200. include database specific changes.
  201. Explanations:
  202. net.ipv4.tcp_max_syn_backlog, net.core.somaxconn, kernel.sched_autogroup_enabled: http://tweaked.io/guide/kernel/
  203. ulimit -n: http://www.cyberciti.biz/faq/linux-increase-the-maximum-number-of-open-files/
  204. net.ipv4.tcp_tw_*: http://www.linuxbrigade.com/reduce-time_wait-socket-connections/
  205. kernel.shm*: http://seriousbirder.com/blogs/linux-understanding-shmmax-and-shmall-settings/
  206. For kernel.sem: https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/5/html/Tuning_and_Optimizing_Red_Hat_Enterprise_Linux_for_Oracle_9i_and_10g_Databases/chap-Oracle_9i_and_10g_Tuning_Guide-Setting_Semaphores.html
  207. '''
  208. command = list(self.config.database_ssh_command)
  209. command.extend([
  210. """
  211. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  212. sudo sysctl -w net.core.somaxconn=65535
  213. sudo sysctl -w kernel.sched_autogroup_enabled=0
  214. sudo -s ulimit -n 65535
  215. sudo sysctl net.ipv4.tcp_tw_reuse=1
  216. sudo sysctl net.ipv4.tcp_tw_recycle=1
  217. sudo sysctl -w kernel.shmmax=2147483648
  218. sudo sysctl -w kernel.shmall=2097152
  219. sudo sysctl -w kernel.sem="250 32000 256 512"
  220. """
  221. ])
  222. subprocess.check_call(command)
  223. # TODO - print kernel configuration to file
  224. # echo "Printing kernel configuration:" && sudo sysctl -a
  225. def __setup_client(self):
  226. '''
  227. Makes any necessary changes to the client machine that should be made
  228. before running the tests. Is very similar to the server setup, but may also
  229. include client specific changes.
  230. '''
  231. command = list(self.config.client_ssh_command)
  232. command.extend([
  233. """
  234. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  235. sudo sysctl -w net.core.somaxconn=65535
  236. sudo -s ulimit -n 65535
  237. sudo sysctl net.ipv4.tcp_tw_reuse=1
  238. sudo sysctl net.ipv4.tcp_tw_recycle=1
  239. sudo sysctl -w kernel.shmmax=2147483648
  240. sudo sysctl -w kernel.shmall=2097152
  241. """
  242. ])
  243. def __run_tests(self, tests):
  244. '''
  245. Calls each test passed in tests to __run_test in a separate process.
  246. Each test is given a set amount of time and if kills the child process
  247. (and subsequently all of its child processes).
  248. '''
  249. if len(tests) == 0:
  250. return 0
  251. logging.debug("Start __run_tests.")
  252. logging.debug("__name__ = %s", __name__)
  253. error_happened = False
  254. if self.config.os.lower() == 'windows':
  255. logging.debug("Executing __run_tests on Windows")
  256. for test in tests:
  257. with self.config.quiet_out.enable():
  258. if self.__run_test(test) != 0:
  259. error_happened = True
  260. else:
  261. logging.debug("Executing __run_tests on Linux")
  262. # These features do not work on Windows
  263. for test in tests:
  264. print(header("Running Test: %s" % test.name))
  265. with self.config.quiet_out.enable():
  266. test_process = Process(
  267. target=self.__run_test,
  268. name="Test Runner (%s)" % test.name,
  269. args=(test, ))
  270. test_process.start()
  271. test_process.join(self.config.run_test_timeout_seconds)
  272. # Load intermediate result from child process
  273. self.results.load()
  274. if (test_process.is_alive()):
  275. logging.debug(
  276. "Child process for {name} is still alive. Terminating.".
  277. format(name=test.name))
  278. self.results.write_intermediate(
  279. test.name, "__run_test timeout (=" +
  280. str(self.config.run_test_timeout_seconds) +
  281. " seconds)")
  282. test_process.terminate()
  283. test_process.join()
  284. if test_process.exitcode != 0:
  285. error_happened = True
  286. logging.debug("End __run_tests.")
  287. if error_happened:
  288. return 1
  289. return 0
  290. def __run_test(self, test):
  291. '''
  292. Ensures that the system has all necessary software to run the tests.
  293. This does not include that software for the individual test, but covers
  294. software such as curl and weighttp that are needed.
  295. '''
  296. logDir = os.path.join(self.results.directory, test.name.lower())
  297. try:
  298. os.makedirs(logDir)
  299. except Exception:
  300. pass
  301. with open(os.path.join(logDir, 'out.txt'), 'w') as out:
  302. if test.os.lower() != self.config.os.lower(
  303. ) or test.database_os.lower() != self.config.database_os.lower():
  304. out.write(
  305. "OS or Database OS specified in benchmark_config.json does not match the current environment. Skipping.\n"
  306. )
  307. return sys.exit(0)
  308. # If the test is in the excludes list, we skip it
  309. if self.config.exclude != None and test.name in self.config.exclude:
  310. out.write(
  311. "Test {name} has been added to the excludes list. Skipping.\n".
  312. format(name=test.name))
  313. return sys.exit(0)
  314. database_container_id = None
  315. try:
  316. if self.__is_port_bound(test.port):
  317. time.sleep(60)
  318. if self.__is_port_bound(test.port):
  319. # We gave it our all
  320. self.results.write_intermediate(test.name, "port " + str(
  321. test.port) + " is not available before start")
  322. out.write(
  323. header(
  324. "Error: Port %s is not available, cannot start %s"
  325. % (test.port, test.name)))
  326. out.flush()
  327. print("Error: Unable to recover port, cannot start test")
  328. return sys.exit(1)
  329. # Start database container
  330. if test.database.lower() != "none":
  331. database_container_id = docker_helper.start_database(
  332. self.config, test.database.lower())
  333. if not database_container_id:
  334. out.write(
  335. "ERROR: Problem building/running database container"
  336. )
  337. out.flush()
  338. self.results.write_intermediate(
  339. test.name, "ERROR: Problem starting")
  340. return sys.exit(1)
  341. # Start webapp
  342. result = test.start(out, database_container_id)
  343. if result != 0:
  344. docker_helper.stop(self.config, database_container_id,
  345. test, out)
  346. time.sleep(5)
  347. out.write("ERROR: Problem starting {name}\n".format(
  348. name=test.name))
  349. out.flush()
  350. self.results.write_intermediate(test.name,
  351. "ERROR: Problem starting")
  352. return sys.exit(1)
  353. slept = 0
  354. max_sleep = 60
  355. while not test.is_running() and slept < max_sleep:
  356. if not docker_helper.successfully_running_containers(
  357. test.get_docker_files(), database_container_id,
  358. out):
  359. tee_output(
  360. out,
  361. "ERROR: One or more expected docker container exited early"
  362. + os.linesep)
  363. return sys.exit(1)
  364. time.sleep(1)
  365. slept += 1
  366. # Debug mode blocks execution here until ctrl+c
  367. if self.config.mode == "debug":
  368. logging.info(
  369. "Entering debug mode. Server has started. CTRL-c to stop."
  370. )
  371. while True:
  372. time.sleep(1)
  373. # Verify URLs
  374. logging.info("Verifying framework URLs")
  375. passed_verify = test.verify_urls(logDir)
  376. # Benchmark this test
  377. if self.config.mode == "benchmark":
  378. logging.info("Benchmarking")
  379. out.write(header("Benchmarking %s" % test.name))
  380. out.flush()
  381. self.__benchmark(test, logDir)
  382. # Stop this test
  383. docker_helper.stop(self.config, database_container_id, test,
  384. out)
  385. # Remove contents of /tmp folder
  386. try:
  387. subprocess.check_call(
  388. 'sudo rm -rf /tmp/*',
  389. shell=True,
  390. stderr=out,
  391. stdout=out)
  392. except Exception:
  393. out.write(header("Error: Could not empty /tmp"))
  394. # Save results thus far into the latest results directory
  395. self.results.write_intermediate(test.name,
  396. time.strftime(
  397. "%Y%m%d%H%M%S",
  398. time.localtime()))
  399. # Upload the results thus far to another server (optional)
  400. self.results.upload()
  401. if self.config.mode == "verify" and not passed_verify:
  402. print("Failed verify!")
  403. return sys.exit(1)
  404. except KeyboardInterrupt:
  405. docker_helper.stop(self.config, database_container_id, test,
  406. out)
  407. except (OSError, IOError, subprocess.CalledProcessError) as e:
  408. traceback.print_exc()
  409. self.results.write_intermediate(
  410. test.name, "error during test setup: " + str(e))
  411. out.write(header("Subprocess Error %s" % test.name))
  412. traceback.print_exc(file=out)
  413. out.flush()
  414. out.close()
  415. return sys.exit(1)
  416. out.close()
  417. return sys.exit(0)
  418. def __is_port_bound(self, port):
  419. '''
  420. Check if the requested port is available. If it isn't available, then a
  421. previous test probably didn't shutdown properly.
  422. '''
  423. port = int(port)
  424. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  425. try:
  426. # Try to bind to all IP addresses, this port
  427. s.bind(("", port))
  428. # If we get here, we were able to bind successfully,
  429. # which means the port is free.
  430. except socket.error:
  431. # If we get an exception, it might be because the port is still bound
  432. # which would be bad, or maybe it is a privileged port (<1024) and we
  433. # are not running as root, or maybe the server is gone, but sockets are
  434. # still in TIME_WAIT (SO_REUSEADDR). To determine which scenario, try to
  435. # connect.
  436. try:
  437. s.connect(("127.0.0.1", port))
  438. # If we get here, we were able to connect to something, which means
  439. # that the port is still bound.
  440. return True
  441. except socket.error:
  442. # An exception means that we couldn't connect, so a server probably
  443. # isn't still running on the port.
  444. pass
  445. finally:
  446. s.close()
  447. return False