benchmarker.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. from toolset.utils.output_helper import log, FNULL
  2. from toolset.utils.metadata_helper import gather_tests, gather_remaining_tests
  3. from toolset.utils import docker_helper
  4. import os
  5. import subprocess
  6. import traceback
  7. import socket
  8. import time
  9. import json
  10. import shlex
  11. from pprint import pprint
  12. from colorama import Fore
  13. class Benchmarker:
  14. def __init__(self, config, results):
  15. '''
  16. Initialize the benchmarker.
  17. '''
  18. self.config = config
  19. self.results = results
  20. ##########################################################################################
  21. # Public methods
  22. ##########################################################################################
  23. def run(self):
  24. '''
  25. This process involves setting up the client/server machines
  26. with any necessary change. Then going through each test,
  27. running their setup script, verifying the URLs, and
  28. running benchmarks against them.
  29. '''
  30. # Generate metadata
  31. self.__run_list_test_metadata()
  32. # Get a list of all known tests that we can run.
  33. all_tests = gather_remaining_tests(self.config, self.results)
  34. # Setup client/server
  35. log("Preparing Server, Database, and Client ...", border='=')
  36. with self.config.quiet_out.enable():
  37. self.__setup_server()
  38. self.__setup_database()
  39. self.__setup_client()
  40. # Run tests
  41. success = True
  42. log("Running Tests...", border='=')
  43. with open(os.path.join(self.results.directory, 'benchmark.log'),
  44. 'w') as benchmark_log:
  45. for test in all_tests:
  46. log("Running Test: %s" % test.name, border='-')
  47. with self.config.quiet_out.enable():
  48. success = self.__run_test(test, benchmark_log) and success
  49. # Load intermediate result from child process
  50. self.results.load()
  51. # Parse results
  52. if self.config.mode == "benchmark":
  53. log("Parsing Results ...", border='=')
  54. self.results.parse(all_tests)
  55. self.results.set_completion_time()
  56. self.results.upload()
  57. self.results.finish()
  58. return success
  59. ##########################################################################################
  60. # Private methods
  61. ##########################################################################################
  62. def __run_list_test_metadata(self):
  63. '''
  64. Prints the metadata for all the available tests
  65. '''
  66. all_tests = gather_tests(benchmarker_config=self.config)
  67. all_tests_json = json.dumps(map(lambda test: {
  68. "name": test.name,
  69. "approach": test.approach,
  70. "classification": test.classification,
  71. "database": test.database,
  72. "framework": test.framework,
  73. "language": test.language,
  74. "orm": test.orm,
  75. "platform": test.platform,
  76. "webserver": test.webserver,
  77. "os": test.os,
  78. "database_os": test.database_os,
  79. "display_name": test.display_name,
  80. "notes": test.notes,
  81. "versus": test.versus
  82. }, all_tests))
  83. with open(
  84. os.path.join(self.results.directory, "test_metadata.json"),
  85. "w") as f:
  86. f.write(all_tests_json)
  87. def __setup_server(self):
  88. '''
  89. Makes any necessary changes to the server that should be
  90. made before running the tests. This involves setting kernal
  91. settings to allow for more connections, or more file
  92. descriptiors
  93. http://redmine.lighttpd.net/projects/weighttp/wiki#Troubleshooting
  94. '''
  95. try:
  96. subprocess.call(
  97. ['sudo', 'sysctl', '-w', 'net.ipv4.tcp_max_syn_backlog=65535'],
  98. stdout=FNULL,
  99. stderr=subprocess.STDOUT)
  100. subprocess.call(
  101. ['sudo', 'sysctl', '-w', 'net.core.somaxconn=65535'],
  102. stdout=FNULL,
  103. stderr=subprocess.STDOUT)
  104. subprocess.call(
  105. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_reuse=1'],
  106. stdout=FNULL,
  107. stderr=subprocess.STDOUT)
  108. subprocess.call(
  109. ['sudo', 'sysctl', 'net.ipv4.tcp_tw_recycle=1'],
  110. stdout=FNULL,
  111. stderr=subprocess.STDOUT)
  112. subprocess.call(
  113. ['sudo', 'sysctl', '-w', 'kernel.shmmax=134217728'],
  114. stdout=FNULL,
  115. stderr=subprocess.STDOUT)
  116. subprocess.call(
  117. ['sudo', 'sysctl', '-w', 'kernel.shmall=2097152'],
  118. stdout=FNULL,
  119. stderr=subprocess.STDOUT)
  120. with open(os.path.join(self.results.directory, 'sysctl.txt'),
  121. 'w') as f:
  122. f.write(subprocess.check_output(['sudo', 'sysctl', '-a']))
  123. except subprocess.CalledProcessError:
  124. return False
  125. def __setup_database(self):
  126. '''
  127. Makes any necessary changes to the database machine that should be made
  128. before running the tests. Is very similar to the server setup, but may also
  129. include database specific changes.
  130. Explanations:
  131. net.ipv4.tcp_max_syn_backlog, net.core.somaxconn, kernel.sched_autogroup_enabled: http://tweaked.io/guide/kernel/
  132. ulimit -n: http://www.cyberciti.biz/faq/linux-increase-the-maximum-number-of-open-files/
  133. net.ipv4.tcp_tw_*: http://www.linuxbrigade.com/reduce-time_wait-socket-connections/
  134. kernel.shm*: http://seriousbirder.com/blogs/linux-understanding-shmmax-and-shmall-settings/
  135. For kernel.sem: https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/5/html/Tuning_and_Optimizing_Red_Hat_Enterprise_Linux_for_Oracle_9i_and_10g_Databases/chap-Oracle_9i_and_10g_Tuning_Guide-Setting_Semaphores.html
  136. '''
  137. command = list(self.config.database_ssh_command)
  138. command.extend([
  139. """
  140. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  141. sudo sysctl -w net.core.somaxconn=65535
  142. sudo sysctl -w kernel.sched_autogroup_enabled=0
  143. sudo -s ulimit -n 65535
  144. sudo sysctl net.ipv4.tcp_tw_reuse=1
  145. sudo sysctl net.ipv4.tcp_tw_recycle=1
  146. sudo sysctl -w kernel.shmmax=2147483648
  147. sudo sysctl -w kernel.shmall=2097152
  148. sudo sysctl -w kernel.sem="250 32000 256 512"
  149. """
  150. ])
  151. subprocess.check_call(command, stdout=FNULL, stderr=subprocess.STDOUT)
  152. # TODO - print kernel configuration to file
  153. # echo "Printing kernel configuration:" && sudo sysctl -a
  154. def __setup_client(self):
  155. '''
  156. Makes any necessary changes to the client machine that should be made
  157. before running the tests. Is very similar to the server setup, but may also
  158. include client specific changes.
  159. '''
  160. command = list(self.config.client_ssh_command)
  161. command.extend([
  162. """
  163. sudo sysctl -w net.ipv4.tcp_max_syn_backlog=65535
  164. sudo sysctl -w net.core.somaxconn=65535
  165. sudo -s ulimit -n 65535
  166. sudo sysctl net.ipv4.tcp_tw_reuse=1
  167. sudo sysctl net.ipv4.tcp_tw_recycle=1
  168. sudo sysctl -w kernel.shmmax=2147483648
  169. sudo sysctl -w kernel.shmall=2097152
  170. """
  171. ])
  172. subprocess.check_call(command, stdout=FNULL, stderr=subprocess.STDOUT)
  173. def __run_test(self, test, benchmark_log):
  174. '''
  175. Runs the given test, verifies that the webapp is accepting requests,
  176. optionally benchmarks the webapp, and ultimately stops all services
  177. started for this test.
  178. '''
  179. log_prefix = "%s: " % test.name
  180. if test.os.lower() != self.config.os.lower() or test.database_os.lower(
  181. ) != self.config.database_os.lower():
  182. log("OS or Database OS specified in benchmark_config.json does not match the current environment. Skipping.",
  183. prefix=log_prefix,
  184. file=benchmark_log)
  185. return False
  186. # If the test is in the excludes list, we skip it
  187. if self.config.exclude != None and test.name in self.config.exclude:
  188. log("Test {name} has been added to the excludes list. Skipping.".
  189. format(name=test.name),
  190. prefix=log_prefix,
  191. file=benchmark_log)
  192. return False
  193. database_container_id = None
  194. try:
  195. if self.__is_port_bound(test.port):
  196. time.sleep(60)
  197. if self.__is_port_bound(test.port):
  198. # We gave it our all
  199. self.results.write_intermediate(test.name, "port " + str(
  200. test.port) + " is not available before start")
  201. log("Error: Port %s is not available, cannot start %s" %
  202. (test.port, test.name),
  203. prefix=log_prefix,
  204. file=benchmark_log,
  205. color=Fore.RED)
  206. return False
  207. # Start database container
  208. if test.database.lower() != "none":
  209. database_container_id = docker_helper.start_database(
  210. self.config, test.database.lower())
  211. if not database_container_id:
  212. self.results.write_intermediate(test.name,
  213. "ERROR: Problem starting")
  214. log("ERROR: Problem building/running database container",
  215. prefix=log_prefix,
  216. file=benchmark_log,
  217. color=Fore.RED)
  218. return False
  219. # Start webapp
  220. result = test.start(database_container_id)
  221. if result != 0:
  222. docker_helper.stop(self.config, database_container_id, test)
  223. self.results.write_intermediate(test.name,
  224. "ERROR: Problem starting")
  225. log("ERROR: Problem starting {name}".format(name=test.name),
  226. prefix=log_prefix,
  227. file=benchmark_log,
  228. color=Fore.RED)
  229. return False
  230. slept = 0
  231. max_sleep = 60
  232. while not test.is_running() and slept < max_sleep:
  233. if not docker_helper.successfully_running_containers(
  234. test.get_docker_files(), benchmark_log):
  235. docker_helper.stop(self.config, database_container_id,
  236. test)
  237. log("ERROR: One or more expected docker container exited early",
  238. prefix=log_prefix,
  239. file=benchmark_log,
  240. color=Fore.RED)
  241. return False
  242. time.sleep(1)
  243. slept += 1
  244. # Debug mode blocks execution here until ctrl+c
  245. if self.config.mode == "debug":
  246. log("Entering debug mode. Server has started. CTRL-c to stop.",
  247. prefix=log_prefix,
  248. file=benchmark_log,
  249. color=Fore.YELLOW)
  250. while True:
  251. time.sleep(1)
  252. # Verify URLs
  253. log("Verifying framework URLs", prefix=log_prefix)
  254. passed_verify = test.verify_urls()
  255. # Benchmark this test
  256. if self.config.mode == "benchmark":
  257. log("Benchmarking %s" % test.name,
  258. file=benchmark_log,
  259. border='-')
  260. self.__benchmark(test, benchmark_log)
  261. # Stop this test
  262. docker_helper.stop(self.config, database_container_id, test)
  263. # Remove contents of /tmp folder
  264. try:
  265. subprocess.check_call(
  266. 'sudo rm -rf /tmp/*',
  267. shell=True,
  268. stderr=benchmark_log,
  269. stdout=benchmark_log)
  270. except Exception:
  271. log("Error: Could not empty /tmp",
  272. file=benchmark_log,
  273. color=Fore.RED)
  274. # Save results thus far into the latest results directory
  275. self.results.write_intermediate(test.name,
  276. time.strftime(
  277. "%Y%m%d%H%M%S",
  278. time.localtime()))
  279. # Upload the results thus far to another server (optional)
  280. self.results.upload()
  281. if self.config.mode == "verify" and not passed_verify:
  282. log("Failed verify!",
  283. prefix=log_prefix,
  284. file=benchmark_log,
  285. color=Fore.RED)
  286. return False
  287. except KeyboardInterrupt:
  288. docker_helper.stop(self.config, database_container_id, test)
  289. except (OSError, IOError, subprocess.CalledProcessError) as e:
  290. tb = traceback.format_exc()
  291. self.results.write_intermediate(
  292. test.name, "error during test setup: " + str(e))
  293. log("Subprocess Error %s" % test.name,
  294. file=benchmark_log,
  295. border='-',
  296. color=Fore.RED)
  297. log(tb, prefix=log_prefix, file=benchmark_log)
  298. return False
  299. return True
  300. def __benchmark(self, framework_test, benchmark_log):
  301. '''
  302. Runs the benchmark for each type of test that it implements
  303. '''
  304. def benchmark_type(test_type):
  305. log("BENCHMARKING %s ... " % test_type.upper(), file=benchmark_log)
  306. test = framework_test.runTests[test_type]
  307. test.setup_out(benchmark_log)
  308. raw_file = self.results.get_raw_file(framework_test.name,
  309. test_type)
  310. if not os.path.exists(raw_file):
  311. # Open to create the empty file
  312. with open(raw_file, 'w'):
  313. pass
  314. if not test.failed:
  315. remote_script = self.config.types[test_type].get_remote_script(
  316. self.config, test.name, test.get_url(),
  317. framework_test.port)
  318. # Begin resource usage metrics collection
  319. self.__begin_logging(framework_test, test_type)
  320. # Run the benchmark
  321. with open(raw_file, 'w') as raw_file:
  322. p = subprocess.Popen(
  323. self.config.client_ssh_command,
  324. stdin=subprocess.PIPE,
  325. stdout=raw_file,
  326. stderr=raw_file)
  327. p.communicate(remote_script)
  328. # End resource usage metrics collection
  329. self.__end_logging()
  330. results = self.results.parse_test(framework_test, test_type)
  331. log("Benchmark results:", file=benchmark_log)
  332. # TODO move into log somehow
  333. pprint(results)
  334. self.results.report_benchmark_results(framework_test, test_type,
  335. results['results'])
  336. log("Complete", file=benchmark_log)
  337. for test_type in framework_test.runTests:
  338. benchmark_type(test_type)
  339. def __begin_logging(self, framework_test, test_type):
  340. '''
  341. Starts a thread to monitor the resource usage, to be synced with the
  342. client's time.
  343. TODO: MySQL and InnoDB are possible. Figure out how to implement them.
  344. '''
  345. output_file = "{file_name}".format(
  346. file_name=self.results.get_stats_file(framework_test.name,
  347. test_type))
  348. dstat_string = "dstat -Tafilmprs --aio --fs --ipc --lock --raw --socket --tcp \
  349. --raw --socket --tcp --udp --unix --vm --disk-util \
  350. --rpc --rpcd --output {output_file}".format(
  351. output_file=output_file)
  352. cmd = shlex.split(dstat_string)
  353. self.subprocess_handle = subprocess.Popen(
  354. cmd, stdout=FNULL, stderr=subprocess.STDOUT)
  355. def __end_logging(self):
  356. '''
  357. Stops the logger thread and blocks until shutdown is complete.
  358. '''
  359. self.subprocess_handle.terminate()
  360. self.subprocess_handle.communicate()
  361. def __is_port_bound(self, port):
  362. '''
  363. Check if the requested port is available. If it isn't available, then a
  364. previous test probably didn't shutdown properly.
  365. '''
  366. port = int(port)
  367. s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  368. try:
  369. # Try to bind to all IP addresses, this port
  370. s.bind(("", port))
  371. # If we get here, we were able to bind successfully,
  372. # which means the port is free.
  373. except socket.error:
  374. # If we get an exception, it might be because the port is still bound
  375. # which would be bad, or maybe it is a privileged port (<1024) and we
  376. # are not running as root, or maybe the server is gone, but sockets are
  377. # still in TIME_WAIT (SO_REUSEADDR). To determine which scenario, try to
  378. # connect.
  379. try:
  380. s.connect(("127.0.0.1", port))
  381. # If we get here, we were able to connect to something, which means
  382. # that the port is still bound.
  383. return True
  384. except socket.error:
  385. # An exception means that we couldn't connect, so a server probably
  386. # isn't still running on the port.
  387. pass
  388. finally:
  389. s.close()
  390. return False