benchmarker.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. from toolset.utils.output_helper import header, log, log_error, FNULL
  2. from toolset.utils.metadata_helper import gather_tests, gather_remaining_tests
  3. from toolset.utils import docker_helper
  4. import os
  5. import subprocess
  6. import traceback
  7. import socket
  8. import time
  9. import json
  10. import shlex
  11. from pprint import pprint
  12. class Benchmarker:
  13. def __init__(self, config, results):
  14. '''
  15. Initialize the benchmarker.
  16. '''
  17. self.config = config
  18. self.results = results
  19. ##########################################################################################
  20. # Public methods
  21. ##########################################################################################
  22. def run(self):
  23. '''
  24. This process involves setting up the client/server machines
  25. with any necessary change. Then going through each test,
  26. running their setup script, verifying the URLs, and
  27. running benchmarks against them.
  28. '''
  29. # Generate metadata
  30. self.__run_list_test_metadata()
  31. # Get a list of all known tests that we can run.
  32. all_tests = gather_remaining_tests(self.config, self.results)
  33. # Setup client/server
  34. header(
  35. "Preparing Server, Database, and Client ...", top='=', bottom='=')
  36. with self.config.quiet_out.enable():
  37. self.__setup_server()
  38. self.__setup_database()
  39. self.__setup_client()
  40. # Run tests
  41. success = True
  42. header("Running Tests...", top='=', bottom='=')
  43. with open(os.path.join(self.results.directory, 'benchmark.log'),
  44. 'w') as benchmark_log:
  45. for test in all_tests:
  46. header("Running Test: %s" % test.name)
  47. with self.config.quiet_out.enable():
  48. success = self.__run_test(test, benchmark_log) and success
  49. # Load intermediate result from child process
  50. self.results.load()
  51. # Parse results
  52. if self.config.mode == "benchmark":
  53. header("Parsing Results ...", top='=', bottom='=')
  54. self.results.parse(all_tests)
  55. self.results.set_completion_time()
  56. self.results.upload()
  57. self.results.finish()
  58. return success
  59. ##########################################################################################
  60. # Private methods
  61. ##########################################################################################
  62. def __run_list_test_metadata(self):
  63. '''
  64. Prints the metadata for all the available tests
  65. '''
  66. all_tests = gather_tests(benchmarker_config=self.config)
  67. all_tests_json = json.dumps(map(lambda test: {
  68. "name": test.name,
  69. "approach": test.approach,
  70. "classification": test.classification,
  71. "database": test.database,
  72. "framework": test.framework,
  73. "language": test.language,
  74. "orm": test.orm,
  75. "platform": test.platform,
  76. "webserver": test.webserver,
  77. "os": test.os,
  78. "database_os": test.database_os,
  79. "display_name": test.display_name,
  80. "notes": test.notes,
  81. "versus": test.versus
  82. }, all_tests))
  83. with open(
  84. os.path.join(self.results.directory, "test_metadata.json"),
  85. "w") as f:
  86. f.write(all_tests_json)
  87. def __setup_server(self):
  88. '''
  89. Makes any necessary changes to the server that should be
  90. made before running the tests. This involves setting kernal
  91. settings to allow for more connections, or more file
  92. descriptiors
  93. http://redmine.lighttpd.net/projects/weighttp/wiki#Troubleshooting
  94. '''
  95. try:
  96. subprocess.call(
  97. ['sudo', 'sysctl', '-w', 'net.ipv4.tcp_max_syn_backlog=65535'],
  98. stdout=FNULL,
  99. stderr=subprocess.STDOUT)
  100. subprocess.call(
  101. ['sudo', 'sysctl', '-w', 'net.core.somaxconn=65535'],
  102. stdout=FNULL,
  103. stderr=subprocess.STDOUT)
  104. subprocess.call(
  105. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_reuse=1'],
  106. stdout=FNULL,
  107. stderr=subprocess.STDOUT)
  108. subprocess.call(
  109. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_recycle=1'],
  110. stdout=FNULL,
  111. stderr=subprocess.STDOUT)
  112. subprocess.call(
  113. ['sudo', 'sysctl', '-w', 'kernel.shmmax=134217728'],
  114. stdout=FNULL,
  115. stderr=subprocess.STDOUT)
  116. subprocess.call(
  117. ['sudo', 'sysctl', '-w', 'kernel.shmall=2097152'],
  118. stdout=FNULL,
  119. stderr=subprocess.STDOUT)
  120. with open(os.path.join(self.results.directory, 'sysctl.txt'),
  121. 'w') as f:
  122. f.write(subprocess.check_output(['sudo', 'sysctl', '-a']))
  123. except subprocess.CalledProcessError:
  124. return False
  125. def __setup_database(self):
  126. '''
  127. Makes any necessary changes to the database machine that should be made
  128. before running the tests. Is very similar to the server setup, but may also
  129. include database specific changes.
  130. Explanations:
  131. net.ipv4.tcp_max_syn_backlog, net.core.somaxconn, kernel.sched_autogroup_enabled: http://tweaked.io/guide/kernel/
  132. ulimit -n: http://www.cyberciti.biz/faq/linux-increase-the-maximum-number-of-open-files/
  133. net.ipv4.tcp_tw_*: http://www.linuxbrigade.com/reduce-time_wait-socket-connections/
  134. kernel.shm*: http://seriousbirder.com/blogs/linux-understanding-shmmax-and-shmall-settings/
  135. For kernel.sem: https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/5/html/Tuning_and_Optimizing_Red_Hat_Enterprise_Linux_for_Oracle_9i_and_10g_Databases/chap-Oracle_9i_and_10g_Tuning_Guide-Setting_Semaphores.html
  136. '''
  137. command = list(self.config.database_ssh_command)
  138. command.extend([
  139. """
  140. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  141. sudo sysctl -w net.core.somaxconn=65535
  142. sudo sysctl -w kernel.sched_autogroup_enabled=0
  143. sudo -s ulimit -n 65535
  144. sudo sysctl net.ipv4.tcp_tw_reuse=1
  145. sudo sysctl net.ipv4.tcp_tw_recycle=1
  146. sudo sysctl -w kernel.shmmax=2147483648
  147. sudo sysctl -w kernel.shmall=2097152
  148. sudo sysctl -w kernel.sem="250 32000 256 512"
  149. """
  150. ])
  151. subprocess.check_call(command, stdout=FNULL, stderr=subprocess.STDOUT)
  152. # TODO - print kernel configuration to file
  153. # echo "Printing kernel configuration:" && sudo sysctl -a
  154. def __setup_client(self):
  155. '''
  156. Makes any necessary changes to the client machine that should be made
  157. before running the tests. Is very similar to the server setup, but may also
  158. include client specific changes.
  159. '''
  160. command = list(self.config.client_ssh_command)
  161. command.extend([
  162. """
  163. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  164. sudo sysctl -w net.core.somaxconn=65535
  165. sudo -s ulimit -n 65535
  166. sudo sysctl net.ipv4.tcp_tw_reuse=1
  167. sudo sysctl net.ipv4.tcp_tw_recycle=1
  168. sudo sysctl -w kernel.shmmax=2147483648
  169. sudo sysctl -w kernel.shmall=2097152
  170. """
  171. ])
  172. subprocess.check_call(command, stdout=FNULL, stderr=subprocess.STDOUT)
  173. def __run_test(self, test, benchmark_log):
  174. '''
  175. Runs the given test, verifies that the webapp is accepting requests,
  176. optionally benchmarks the webapp, and ultimately stops all services
  177. started for this test.
  178. '''
  179. log_prefix = "%s: " % test.name
  180. if test.os.lower() != self.config.os.lower() or test.database_os.lower(
  181. ) != self.config.database_os.lower():
  182. log("OS or Database OS specified in benchmark_config.json does not match the current environment. Skipping.",
  183. log_prefix, benchmark_log)
  184. return False
  185. # If the test is in the excludes list, we skip it
  186. if self.config.exclude != None and test.name in self.config.exclude:
  187. log("Test {name} has been added to the excludes list. Skipping.".
  188. format(name=test.name),
  189. log_prefix,
  190. benchmark_log)
  191. return False
  192. database_container_id = None
  193. try:
  194. if self.__is_port_bound(test.port):
  195. time.sleep(60)
  196. if self.__is_port_bound(test.port):
  197. # We gave it our all
  198. self.results.write_intermediate(test.name, "port " + str(
  199. test.port) + " is not available before start")
  200. header("Error: Port %s is not available, cannot start %s" %
  201. (test.port, test.name), log_prefix, benchmark_log)
  202. return False
  203. # Start database container
  204. if test.database.lower() != "none":
  205. database_container_id = docker_helper.start_database(
  206. self.config, test.database.lower())
  207. if not database_container_id:
  208. self.results.write_intermediate(test.name,
  209. "ERROR: Problem starting")
  210. log("ERROR: Problem building/running database container",
  211. log_prefix, benchmark_log)
  212. return False
  213. # Start webapp
  214. result = test.start(database_container_id)
  215. if result != 0:
  216. docker_helper.stop(self.config, database_container_id, test)
  217. self.results.write_intermediate(test.name,
  218. "ERROR: Problem starting")
  219. log("ERROR: Problem starting {name}".format(name=test.name),
  220. log_prefix,
  221. benchmark_log)
  222. return False
  223. slept = 0
  224. max_sleep = 60
  225. while not test.is_running() and slept < max_sleep:
  226. if not docker_helper.successfully_running_containers(
  227. test.get_docker_files(), benchmark_log):
  228. docker_helper.stop(self.config, database_container_id,
  229. test)
  230. log("ERROR: One or more expected docker container exited early",
  231. log_prefix, benchmark_log)
  232. return False
  233. time.sleep(1)
  234. slept += 1
  235. # Debug mode blocks execution here until ctrl+c
  236. if self.config.mode == "debug":
  237. log("Entering debug mode. Server has started. CTRL-c to stop.",
  238. log_prefix, benchmark_log)
  239. while True:
  240. time.sleep(1)
  241. # Verify URLs
  242. log("Verifying framework URLs", log_prefix)
  243. passed_verify = test.verify_urls()
  244. # Benchmark this test
  245. if self.config.mode == "benchmark":
  246. header(
  247. message="Benchmarking %s" % test.name,
  248. log_file=benchmark_log)
  249. self.__benchmark(test, benchmark_log)
  250. # Stop this test
  251. docker_helper.stop(self.config, database_container_id, test)
  252. # Remove contents of /tmp folder
  253. try:
  254. subprocess.check_call(
  255. 'sudo rm -rf /tmp/*',
  256. shell=True,
  257. stderr=benchmark_log,
  258. stdout=benchmark_log)
  259. except Exception:
  260. header(
  261. message="Error: Could not empty /tmp",
  262. log_file=benchmark_log)
  263. # Save results thus far into the latest results directory
  264. self.results.write_intermediate(test.name,
  265. time.strftime(
  266. "%Y%m%d%H%M%S",
  267. time.localtime()))
  268. # Upload the results thus far to another server (optional)
  269. self.results.upload()
  270. if self.config.mode == "verify" and not passed_verify:
  271. log("Failed verify!", log_prefix, benchmark_log)
  272. return False
  273. except KeyboardInterrupt:
  274. docker_helper.stop(self.config, database_container_id, test)
  275. except (OSError, IOError, subprocess.CalledProcessError) as e:
  276. tb = traceback.format_exc()
  277. self.results.write_intermediate(
  278. test.name, "error during test setup: " + str(e))
  279. header(
  280. message="Subprocess Error %s" % test.name,
  281. log_file=benchmark_log)
  282. log_error(tb, log_prefix, benchmark_log)
  283. return False
  284. return True
  285. def __benchmark(self, framework_test, benchmark_log):
  286. '''
  287. Runs the benchmark for each type of test that it implements
  288. '''
  289. def benchmark_type(test_type):
  290. log("BENCHMARKING %s ... " % test_type.upper(), benchmark_log)
  291. test = framework_test.runTests[test_type]
  292. test.setup_out(benchmark_log)
  293. raw_file = self.results.get_raw_file(framework_test.name,
  294. test_type)
  295. if not os.path.exists(raw_file):
  296. # Open to create the empty file
  297. with open(raw_file, 'w'):
  298. pass
  299. if not test.failed:
  300. remote_script = self.config.types[test_type].get_remote_script(
  301. self.config, test.name, test.get_url(),
  302. framework_test.port)
  303. # Begin resource usage metrics collection
  304. self.__begin_logging(framework_test, test_type)
  305. # Run the benchmark
  306. with open(raw_file, 'w') as raw_file:
  307. p = subprocess.Popen(
  308. self.config.client_ssh_command,
  309. stdin=subprocess.PIPE,
  310. stdout=raw_file,
  311. stderr=raw_file)
  312. p.communicate(remote_script)
  313. # End resource usage metrics collection
  314. self.__end_logging()
  315. results = self.results.parse_test(framework_test, test_type)
  316. log("Benchmark results:", benchmark_log)
  317. # TODO move into log somehow
  318. pprint(results)
  319. self.results.report_benchmark_results(framework_test, test_type,
  320. results['results'])
  321. log("Complete", benchmark_log)
  322. for test_type in framework_test.runTests:
  323. benchmark_type(test_type)
  324. def __begin_logging(self, framework_test, test_type):
  325. '''
  326. Starts a thread to monitor the resource usage, to be synced with the
  327. client's time.
  328. TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  329. '''
  330. output_file = "{file_name}".format(
  331. file_name=self.results.get_stats_file(framework_test.name,
  332. test_type))
  333. dstat_string = "dstat -Tafilmprs --aio --fs --ipc --lock --raw --socket --tcp \
  334. --raw --socket --tcp --udp --unix --vm --disk-util \
  335. --rpc --rpcd --output {output_file}".format(
  336. output_file=output_file)
  337. cmd = shlex.split(dstat_string)
  338. self.subprocess_handle = subprocess.Popen(
  339. cmd, stdout=FNULL, stderr=subprocess.STDOUT)
  340. def __end_logging(self):
  341. '''
  342. Stops the logger thread and blocks until shutdown is complete.
  343. '''
  344. self.subprocess_handle.terminate()
  345. self.subprocess_handle.communicate()
  346. def __is_port_bound(self, port):
  347. '''
  348. Check if the requested port is available. If it isn't available, then a
  349. previous test probably didn't shutdown properly.
  350. '''
  351. port = int(port)
  352. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  353. try:
  354. # Try to bind to all IP addresses, this port
  355. s.bind(("", port))
  356. # If we get here, we were able to bind successfully,
  357. # which means the port is free.
  358. except socket.error:
  359. # If we get an exception, it might be because the port is still bound
  360. # which would be bad, or maybe it is a privileged port (<1024) and we
  361. # are not running as root, or maybe the server is gone, but sockets are
  362. # still in TIME_WAIT (SO_REUSEADDR). To determine which scenario, try to
  363. # connect.
  364. try:
  365. s.connect(("127.0.0.1", port))
  366. # If we get here, we were able to connect to something, which means
  367. # that the port is still bound.
  368. return True
  369. except socket.error:
  370. # An exception means that we couldn't connect, so a server probably
  371. # isn't still running on the port.
  372. pass
  373. finally:
  374. s.close()
  375. return False