docker_helper.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. import os
  2. import socket
  3. import json
  4. import docker
  5. import time
  6. import re
  7. import traceback
  8. from threading import Thread
  9. from colorama import Fore, Style
  10. from toolset.utils.output_helper import log
  11. from toolset.databases import databases
  12. from psutil import virtual_memory
  13. class DockerHelper:
  14. def __init__(self, benchmarker=None):
  15. self.benchmarker = benchmarker
  16. self.client = docker.DockerClient(
  17. base_url=self.benchmarker.config.client_docker_host)
  18. self.server = docker.DockerClient(
  19. base_url=self.benchmarker.config.server_docker_host)
  20. self.database = docker.DockerClient(
  21. base_url=self.benchmarker.config.database_docker_host)
  22. def __build(self, base_url, path, build_log_file, log_prefix, dockerfile,
  23. tag, buildargs={}):
  24. '''
  25. Builds docker containers using docker-py low-level api
  26. '''
  27. self.benchmarker.time_logger.mark_build_start()
  28. with open(build_log_file, 'w') as build_log:
  29. try:
  30. client = docker.APIClient(base_url=base_url)
  31. output = client.build(
  32. path=path,
  33. dockerfile=dockerfile,
  34. tag=tag,
  35. forcerm=True,
  36. timeout=3600,
  37. pull=True,
  38. buildargs=buildargs,
  39. decode=True
  40. )
  41. buffer = ""
  42. for token in output:
  43. if 'stream' in token:
  44. buffer += token[list(token.keys())[0]]
  45. elif 'errorDetail' in token:
  46. raise Exception(token['errorDetail']['message'])
  47. while "\n" in buffer:
  48. index = buffer.index("\n")
  49. line = buffer[:index]
  50. buffer = buffer[index + 1:]
  51. log(line,
  52. prefix=log_prefix,
  53. file=build_log,
  54. color=Fore.WHITE + Style.BRIGHT \
  55. if re.match(r'^Step \d+\/\d+', line) else '')
  56. # Kill docker builds if they exceed 60 mins. This will only
  57. # catch builds that are still printing output.
  58. if self.benchmarker.time_logger.time_since_start() > 3600:
  59. log("Build time exceeded 60 minutes",
  60. prefix=log_prefix,
  61. file=build_log,
  62. color=Fore.RED)
  63. raise Exception
  64. if buffer:
  65. log(buffer,
  66. prefix=log_prefix,
  67. file=build_log,
  68. color=Fore.WHITE + Style.BRIGHT \
  69. if re.match(r'^Step \d+\/\d+', buffer) else '')
  70. except Exception:
  71. tb = traceback.format_exc()
  72. log("Docker build failed; terminating",
  73. prefix=log_prefix,
  74. file=build_log,
  75. color=Fore.RED)
  76. log(tb, prefix=log_prefix, file=build_log)
  77. self.benchmarker.time_logger.log_build_end(
  78. log_prefix=log_prefix, file=build_log)
  79. raise
  80. self.benchmarker.time_logger.log_build_end(
  81. log_prefix=log_prefix, file=build_log)
  82. def clean(self):
  83. '''
  84. Cleans all the docker test images from the system and prunes
  85. '''
  86. for image in self.server.images.list():
  87. if len(image.tags) > 0:
  88. if 'tfb.test.' in image.tags[0]:
  89. try:
  90. self.server.images.remove(image.id, force=True)
  91. except Exception:
  92. pass
  93. self.server.images.prune()
  94. self.database.images.prune()
  95. def build(self, test, build_log_dir=os.devnull):
  96. '''
  97. Builds the test docker containers
  98. '''
  99. log_prefix = "%s: " % test.name
  100. # Build the test image
  101. test_docker_file = '%s.dockerfile' % test.name
  102. if hasattr(test, 'dockerfile'):
  103. test_docker_file = test.dockerfile
  104. test_database = ''
  105. if hasattr(test, 'database'):
  106. test_database = test.database
  107. build_log_file = build_log_dir
  108. if build_log_dir is not os.devnull:
  109. build_log_file = os.path.join(
  110. build_log_dir,
  111. "%s.log" % test_docker_file.replace(".dockerfile", "").lower())
  112. try:
  113. self.__build(
  114. base_url=self.benchmarker.config.server_docker_host,
  115. build_log_file=build_log_file,
  116. log_prefix=log_prefix,
  117. path=test.directory,
  118. dockerfile=test_docker_file,
  119. buildargs=({
  120. 'BENCHMARK_ENV':
  121. self.benchmarker.config.results_environment,
  122. 'TFB_TEST_NAME': test.name,
  123. 'TFB_TEST_DATABASE': test_database
  124. }),
  125. tag="techempower/tfb.test.%s" % test.name)
  126. except Exception:
  127. return 1
  128. return 0
  129. def run(self, test, run_log_dir):
  130. '''
  131. Run the given Docker container(s)
  132. '''
  133. log_prefix = "%s: " % test.name
  134. container = None
  135. try:
  136. def watch_container(docker_container, docker_file):
  137. with open(
  138. os.path.join(
  139. run_log_dir, "%s.log" % docker_file.replace(
  140. ".dockerfile", "").lower()), 'w') as run_log:
  141. for line in docker_container.logs(stream=True):
  142. log(line.decode(), prefix=log_prefix, file=run_log)
  143. extra_hosts = None
  144. name = "tfb-server"
  145. if self.benchmarker.config.network is None:
  146. extra_hosts = {
  147. socket.gethostname():
  148. str(self.benchmarker.config.server_host),
  149. 'tfb-server':
  150. str(self.benchmarker.config.server_host),
  151. 'tfb-database':
  152. str(self.benchmarker.config.database_host)
  153. }
  154. name = None
  155. if self.benchmarker.config.network_mode is None:
  156. sysctl = {'net.core.somaxconn': 65535}
  157. else:
  158. # Do not pass `net.*` kernel params when using host network mode
  159. sysctl = None
  160. ulimit = [{
  161. 'name': 'nofile',
  162. 'hard': 200000,
  163. 'soft': 200000
  164. }, {
  165. 'name': 'rtprio',
  166. 'hard': 99,
  167. 'soft': 99
  168. }]
  169. cpuset_cpus = ''
  170. if self.benchmarker.config.cpuset_cpus is not None:
  171. cpuset_cpus = self.benchmarker.config.cpuset_cpus
  172. log("Running docker container with cpu set: %s" %cpuset_cpus)
  173. docker_cmd = ''
  174. if hasattr(test, 'docker_cmd'):
  175. docker_cmd = test.docker_cmd
  176. # Expose ports in debugging mode
  177. ports = {}
  178. environment = {}
  179. if self.benchmarker.config.mode == "debug":
  180. environment['DEBUG'] = 'true'
  181. ports = {test.port: test.port}
  182. # This allows to expose a debugger port to attach
  183. # to the webserver from IDE
  184. if hasattr(test, 'debug_port'):
  185. ports[test.debug_port] = test.debug_port
  186. # Total memory limit allocated for the test container
  187. if self.benchmarker.config.test_container_memory is not None:
  188. mem_limit = self.benchmarker.config.test_container_memory
  189. else:
  190. mem_limit = int(round(virtual_memory().total * .95))
  191. # Convert extra docker runtime args to a dictionary
  192. extra_docker_args = {}
  193. if self.benchmarker.config.extra_docker_runtime_args is not None:
  194. extra_docker_args = {key: int(value) if value.isdigit() else value for key, value in (pair.split(":", 1) for pair in self.benchmarker.config.extra_docker_runtime_args)}
  195. container = self.server.containers.run(
  196. "techempower/tfb.test.%s" % test.name,
  197. name=name,
  198. command=docker_cmd,
  199. network=self.benchmarker.config.network,
  200. network_mode=self.benchmarker.config.network_mode,
  201. ports=ports,
  202. environment=environment,
  203. stderr=True,
  204. detach=True,
  205. init=True,
  206. extra_hosts=extra_hosts,
  207. privileged=True,
  208. ulimits=ulimit,
  209. mem_limit=mem_limit,
  210. sysctls=sysctl,
  211. remove=True,
  212. log_config={'type': None},
  213. cpuset_cpus=cpuset_cpus,
  214. **extra_docker_args
  215. )
  216. watch_thread = Thread(
  217. target=watch_container,
  218. args=(
  219. container,
  220. "%s.dockerfile" % test.name,
  221. ))
  222. watch_thread.daemon = True
  223. watch_thread.start()
  224. except Exception:
  225. with open(
  226. os.path.join(run_log_dir, "%s.log" % test.name.lower()),
  227. 'w') as run_log:
  228. tb = traceback.format_exc()
  229. log("Running docker container: %s.dockerfile failed" %
  230. test.name,
  231. prefix=log_prefix,
  232. file=run_log)
  233. log(tb, prefix=log_prefix, file=run_log)
  234. return container
  235. @staticmethod
  236. def __stop_container(container):
  237. try:
  238. container.stop(timeout=2)
  239. time.sleep(2)
  240. except:
  241. # container has already been killed
  242. pass
  243. @staticmethod
  244. def __stop_all(docker_client):
  245. for container in docker_client.containers.list():
  246. if len(container.image.tags) > 0 \
  247. and 'techempower' in container.image.tags[0] \
  248. and 'tfb:latest' not in container.image.tags[0]:
  249. DockerHelper.__stop_container(container)
  250. def stop(self, containers=None):
  251. '''
  252. Attempts to stop a container or list of containers.
  253. If no containers are passed, stops all running containers.
  254. '''
  255. is_multi_setup = self.benchmarker.config.server_docker_host != \
  256. self.benchmarker.config.database_docker_host
  257. if containers:
  258. if not isinstance(containers, list):
  259. containers = [containers]
  260. for container in containers:
  261. DockerHelper.__stop_container(container)
  262. else:
  263. DockerHelper.__stop_all(self.server)
  264. if is_multi_setup:
  265. DockerHelper.__stop_all(self.database)
  266. DockerHelper.__stop_all(self.client)
  267. self.database.containers.prune()
  268. if is_multi_setup:
  269. # Then we're on a 3 machine set up
  270. self.server.containers.prune()
  271. self.client.containers.prune()
  272. def build_databases(self):
  273. '''
  274. Builds all the databases necessary to run the list of benchmarker tests
  275. '''
  276. built = []
  277. for test in self.benchmarker.tests:
  278. db = test.database.lower()
  279. if db not in built and db != "none":
  280. image_name = "techempower/%s:latest" % db
  281. log_prefix = image_name + ": "
  282. database_dir = os.path.join(self.benchmarker.config.db_root,
  283. db)
  284. docker_file = "%s.dockerfile" % db
  285. self.__build(
  286. base_url=self.benchmarker.config.database_docker_host,
  287. path=database_dir,
  288. dockerfile=docker_file,
  289. log_prefix=log_prefix,
  290. build_log_file=os.devnull,
  291. tag="techempower/%s" % db)
  292. built.append(db)
  293. def start_database(self, database):
  294. '''
  295. Sets up a container for the given database and port, and starts said docker
  296. container.
  297. '''
  298. image_name = "techempower/%s:latest" % database
  299. log_prefix = image_name + ": "
  300. if self.benchmarker.config.network_mode is None:
  301. sysctl = {
  302. 'net.core.somaxconn': 65535,
  303. 'kernel.sem': "250 32000 256 512"
  304. }
  305. else:
  306. # Do not pass `net.*` kernel params when using host network mode
  307. sysctl = {
  308. 'kernel.sem': "250 32000 256 512"
  309. }
  310. ulimit = [{'name': 'nofile', 'hard': 65535, 'soft': 65535}]
  311. container = self.database.containers.run(
  312. "techempower/%s" % database,
  313. name="tfb-database",
  314. network=self.benchmarker.config.network,
  315. network_mode=self.benchmarker.config.network_mode,
  316. detach=True,
  317. ulimits=ulimit,
  318. sysctls=sysctl,
  319. remove=True,
  320. log_config={'type': None})
  321. # Sleep until the database accepts connections
  322. slept = 0
  323. max_sleep = 60
  324. database_ready = False
  325. while not database_ready and slept < max_sleep:
  326. time.sleep(1)
  327. slept += 1
  328. database_ready = databases[database].test_connection(self.benchmarker.config)
  329. if not database_ready:
  330. log("Database was not ready after startup", prefix=log_prefix)
  331. return container
  332. def build_wrk(self):
  333. '''
  334. Builds the techempower/tfb.wrk container
  335. '''
  336. self.__build(
  337. base_url=self.benchmarker.config.client_docker_host,
  338. path=self.benchmarker.config.wrk_root,
  339. dockerfile="wrk.dockerfile",
  340. log_prefix="wrk: ",
  341. build_log_file=os.devnull,
  342. tag="techempower/tfb.wrk")
  343. def test_client_connection(self, url):
  344. '''
  345. Tests that the app server at the given url responds successfully to a
  346. request.
  347. '''
  348. try:
  349. self.client.containers.run(
  350. 'techempower/tfb.wrk',
  351. 'curl --fail --max-time 5 %s' % url,
  352. remove=True,
  353. log_config={'type': None},
  354. network=self.benchmarker.config.network,
  355. network_mode=self.benchmarker.config.network_mode)
  356. except Exception:
  357. return False
  358. return True
  359. def server_container_exists(self, container_id_or_name):
  360. '''
  361. Returns True if the container still exists on the server.
  362. '''
  363. try:
  364. self.server.containers.get(container_id_or_name)
  365. return True
  366. except:
  367. return False
  368. def benchmark(self, script, variables):
  369. '''
  370. Runs the given remote_script on the wrk container on the client machine.
  371. '''
  372. if self.benchmarker.config.network_mode is None:
  373. sysctl = {'net.core.somaxconn': 65535}
  374. else:
  375. # Do not pass `net.*` kernel params when using host network mode
  376. sysctl = None
  377. ulimit = [{'name': 'nofile', 'hard': 65535, 'soft': 65535}]
  378. return self.client.containers.run(
  379. "techempower/tfb.wrk",
  380. "/bin/bash /%s" % script,
  381. environment=variables,
  382. network=self.benchmarker.config.network,
  383. network_mode=self.benchmarker.config.network_mode,
  384. detach=True,
  385. stderr=True,
  386. ulimits=ulimit,
  387. sysctls=sysctl,
  388. remove=True,
  389. log_config={'type': None})