docker_helper.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. import os
  2. import socket
  3. import json
  4. import docker
  5. import time
  6. import re
  7. import traceback
  8. from threading import Thread
  9. from colorama import Fore, Style
  10. from toolset.utils.database_helper import test_database
  11. from psutil import virtual_memory
  12. # total memory limit allocated for the test container
  13. mem_limit = int(round(virtual_memory().total * .95))
  14. class DockerHelper:
  15. def __init__(self, benchmarker=None):
  16. self.benchmarker = benchmarker
  17. self.log = benchmarker.log
  18. self.client = docker.DockerClient(
  19. base_url=self.benchmarker.config.client_docker_host)
  20. self.server = docker.DockerClient(
  21. base_url=self.benchmarker.config.server_docker_host)
  22. self.database = docker.DockerClient(
  23. base_url=self.benchmarker.config.database_docker_host)
  24. def __build(self, base_url, path, build_log_file, log_prefix, dockerfile,
  25. tag, buildargs={}):
  26. '''
  27. Builds docker containers using docker-py low-level api
  28. '''
  29. self.benchmarker.time_logger.mark_build_start()
  30. with open(build_log_file, 'w') as build_log:
  31. try:
  32. client = docker.APIClient(base_url=base_url)
  33. output = client.build(
  34. path=path,
  35. dockerfile=dockerfile,
  36. tag=tag,
  37. forcerm=True,
  38. timeout=3600,
  39. pull=True,
  40. buildargs=buildargs
  41. )
  42. buffer = ""
  43. for token in output:
  44. if token.startswith('{"stream":'):
  45. token = json.loads(token)
  46. token = token[token.keys()[0]].encode('utf-8')
  47. buffer += token
  48. elif token.startswith('{"errorDetail":'):
  49. token = json.loads(token)
  50. raise Exception(token['errorDetail']['message'])
  51. while "\n" in buffer:
  52. index = buffer.index("\n")
  53. line = buffer[:index]
  54. buffer = buffer[index + 1:]
  55. self.log(line,
  56. prefix=log_prefix,
  57. file=build_log,
  58. color=Fore.WHITE + Style.BRIGHT \
  59. if re.match(r'^Step \d+\/\d+', line) else '')
  60. # Kill docker builds if they exceed 60 mins. This will only
  61. # catch builds that are still printing output.
  62. if self.benchmarker.time_logger.time_since_start() > 3600:
  63. self.log("Build time exceeded 60 minutes",
  64. prefix=log_prefix,
  65. file=build_log,
  66. color=Fore.RED)
  67. raise Exception
  68. if buffer:
  69. self.log(buffer,
  70. prefix=log_prefix,
  71. file=build_log,
  72. color=Fore.WHITE + Style.BRIGHT \
  73. if re.match(r'^Step \d+\/\d+', buffer) else '')
  74. except Exception:
  75. tb = traceback.format_exc()
  76. self.log("Docker build failed; terminating",
  77. prefix=log_prefix,
  78. file=build_log,
  79. color=Fore.RED)
  80. self.log(tb,
  81. squash=False,
  82. prefix=log_prefix,
  83. file=build_log)
  84. self.benchmarker.time_logger.log_build_end(
  85. log_prefix=log_prefix, file=build_log)
  86. raise
  87. self.benchmarker.time_logger.log_build_end(
  88. log_prefix=log_prefix, file=build_log)
  89. def clean(self):
  90. '''
  91. Cleans all the docker images from the system
  92. '''
  93. self.server.images.prune()
  94. for image in self.server.images.list():
  95. if len(image.tags) > 0:
  96. # 'techempower/tfb.test.gemini:0.1' -> 'techempower/tfb.test.gemini'
  97. image_tag = image.tags[0].split(':')[0]
  98. if image_tag != 'techempower/tfb' and 'techempower' in image_tag:
  99. self.server.images.remove(image.id, force=True)
  100. self.server.images.prune()
  101. self.database.images.prune()
  102. for image in self.database.images.list():
  103. if len(image.tags) > 0:
  104. # 'techempower/tfb.test.gemini:0.1' -> 'techempower/tfb.test.gemini'
  105. image_tag = image.tags[0].split(':')[0]
  106. if image_tag != 'techempower/tfb' and 'techempower' in image_tag:
  107. self.database.images.remove(image.id, force=True)
  108. self.database.images.prune()
  109. def build(self, test, build_log_dir=os.devnull):
  110. '''
  111. Builds the test docker containers
  112. '''
  113. log_prefix = "%s: " % test.name
  114. # Build the test image
  115. test_docker_file = '%s.dockerfile' % test.name
  116. if hasattr(test, 'dockerfile'):
  117. test_docker_file = test.dockerfile
  118. test_database = ''
  119. if hasattr(test, 'database'):
  120. test_database = test.database
  121. build_log_file = build_log_dir
  122. if build_log_dir is not os.devnull:
  123. build_log_file = os.path.join(
  124. build_log_dir,
  125. "%s.log" % test_docker_file.replace(".dockerfile", "").lower())
  126. try:
  127. self.__build(
  128. base_url=self.benchmarker.config.server_docker_host,
  129. build_log_file=build_log_file,
  130. log_prefix=log_prefix,
  131. path=test.directory,
  132. dockerfile=test_docker_file,
  133. buildargs=({
  134. 'BENCHMARK_ENV':
  135. self.benchmarker.config.results_environment,
  136. 'TFB_TEST_NAME': test.name,
  137. 'TFB_TEST_DATABASE': test_database
  138. }),
  139. tag="techempower/tfb.test.%s" % test.name)
  140. except Exception:
  141. return 1
  142. return 0
  143. def run(self, test, run_log_dir):
  144. '''
  145. Run the given Docker container(s)
  146. '''
  147. log_prefix = "%s: " % test.name
  148. container = None
  149. try:
  150. def watch_container(docker_container, docker_file):
  151. with open(
  152. os.path.join(
  153. run_log_dir, "%s.log" % docker_file.replace(
  154. ".dockerfile", "").lower()), 'w') as run_log:
  155. for line in docker_container.logs(stream=True):
  156. self.log(line, prefix=log_prefix, file=run_log)
  157. extra_hosts = None
  158. name = "tfb-server"
  159. if self.benchmarker.config.network is None:
  160. extra_hosts = {
  161. socket.gethostname():
  162. str(self.benchmarker.config.server_host),
  163. 'tfb-server':
  164. str(self.benchmarker.config.server_host),
  165. 'tfb-database':
  166. str(self.benchmarker.config.database_host)
  167. }
  168. name = None
  169. sysctl = {'net.core.somaxconn': 65535}
  170. ulimit = [{
  171. 'name': 'nofile',
  172. 'hard': 200000,
  173. 'soft': 200000
  174. }, {
  175. 'name': 'rtprio',
  176. 'hard': 99,
  177. 'soft': 99
  178. }]
  179. docker_cmd = ''
  180. if hasattr(test, 'docker_cmd'):
  181. docker_cmd = test.docker_cmd
  182. container = self.server.containers.run(
  183. "techempower/tfb.test.%s" % test.name,
  184. name=name,
  185. command=docker_cmd,
  186. network=self.benchmarker.config.network,
  187. network_mode=self.benchmarker.config.network_mode,
  188. stderr=True,
  189. detach=True,
  190. init=True,
  191. extra_hosts=extra_hosts,
  192. privileged=True,
  193. ulimits=ulimit,
  194. mem_limit=mem_limit,
  195. sysctls=sysctl,
  196. remove=True,
  197. log_config={'type': None})
  198. watch_thread = Thread(
  199. target=watch_container,
  200. args=(
  201. container,
  202. "%s.dockerfile" % test.name,
  203. ))
  204. watch_thread.daemon = True
  205. watch_thread.start()
  206. except Exception:
  207. with open(
  208. os.path.join(run_log_dir, "%s.log" % test.name.lower()),
  209. 'w') as run_log:
  210. tb = traceback.format_exc()
  211. self.log("Running docker container: %s.dockerfile failed" %
  212. test.name,
  213. prefix=log_prefix,
  214. file=run_log)
  215. self.log(tb,
  216. squash=False,
  217. prefix=log_prefix,
  218. file=run_log)
  219. return container
  220. @staticmethod
  221. def __stop_container(container):
  222. try:
  223. container.kill()
  224. time.sleep(2)
  225. except:
  226. # container has already been killed
  227. pass
  228. @staticmethod
  229. def __stop_all(docker_client):
  230. for container in docker_client.containers.list():
  231. if len(container.image.tags) > 0 \
  232. and 'techempower' in container.image.tags[0] \
  233. and 'tfb:latest' not in container.image.tags[0]:
  234. DockerHelper.__stop_container(container)
  235. def stop(self, containers=None):
  236. '''
  237. Attempts to stop a container or list of containers.
  238. If no containers are passed, stops all running containers.
  239. '''
  240. is_multi_setup = self.benchmarker.config.server_docker_host != \
  241. self.benchmarker.config.database_docker_host
  242. if containers:
  243. if not isinstance(containers, list):
  244. containers = [containers]
  245. for container in containers:
  246. DockerHelper.__stop_container(container)
  247. else:
  248. self.__stop_all(self.server)
  249. if is_multi_setup:
  250. self.__stop_all(self.database)
  251. self.database.containers.prune()
  252. if is_multi_setup:
  253. # Then we're on a 3 machine set up
  254. self.server.containers.prune()
  255. self.client.containers.prune()
  256. def build_databases(self):
  257. '''
  258. Builds all the databases necessary to run the list of benchmarker tests
  259. '''
  260. built = []
  261. for test in self.benchmarker.tests:
  262. db = test.database.lower()
  263. if db not in built and db != "none":
  264. image_name = "techempower/%s:latest" % db
  265. log_prefix = image_name + ": "
  266. database_dir = os.path.join(self.benchmarker.config.db_root,
  267. db)
  268. docker_file = "%s.dockerfile" % db
  269. self.__build(
  270. base_url=self.benchmarker.config.database_docker_host,
  271. path=database_dir,
  272. dockerfile=docker_file,
  273. log_prefix=log_prefix,
  274. build_log_file=os.devnull,
  275. tag="techempower/%s" % db)
  276. built.append(db)
  277. def start_database(self, database):
  278. '''
  279. Sets up a container for the given database and port, and starts said docker
  280. container.
  281. '''
  282. image_name = "techempower/%s:latest" % database
  283. log_prefix = image_name + ": "
  284. sysctl = {
  285. 'net.core.somaxconn': 65535,
  286. 'kernel.sem': "250 32000 256 512"
  287. }
  288. ulimit = [{'name': 'nofile', 'hard': 65535, 'soft': 65535}]
  289. container = self.database.containers.run(
  290. "techempower/%s" % database,
  291. name="tfb-database",
  292. network=self.benchmarker.config.network,
  293. network_mode=self.benchmarker.config.network_mode,
  294. detach=True,
  295. ulimits=ulimit,
  296. sysctls=sysctl,
  297. remove=True,
  298. log_config={'type': None})
  299. # Sleep until the database accepts connections
  300. slept = 0
  301. max_sleep = 60
  302. database_ready = False
  303. while not database_ready and slept < max_sleep:
  304. time.sleep(1)
  305. slept += 1
  306. database_ready = test_database(self.benchmarker.config, database)
  307. if not database_ready:
  308. self.log("Database was not ready after startup", prefix=log_prefix)
  309. return container
  310. def build_wrk(self):
  311. '''
  312. Builds the techempower/tfb.wrk container
  313. '''
  314. self.__build(
  315. base_url=self.benchmarker.config.client_docker_host,
  316. path=self.benchmarker.config.wrk_root,
  317. dockerfile="wrk.dockerfile",
  318. log_prefix="wrk: ",
  319. build_log_file=os.devnull,
  320. tag="techempower/tfb.wrk")
  321. def test_client_connection(self, url):
  322. '''
  323. Tests that the app server at the given url responds successfully to a
  324. request.
  325. '''
  326. try:
  327. self.client.containers.run(
  328. 'techempower/tfb.wrk',
  329. 'curl --fail --max-time 5 %s' % url,
  330. remove=True,
  331. log_config={'type': None},
  332. network=self.benchmarker.config.network,
  333. network_mode=self.benchmarker.config.network_mode)
  334. except Exception:
  335. return False
  336. return True
  337. def server_container_exists(self, container_id_or_name):
  338. '''
  339. Returns True if the container still exists on the server.
  340. '''
  341. try:
  342. self.server.containers.get(container_id_or_name)
  343. return True
  344. except:
  345. return False
  346. def benchmark(self, script, variables, raw_file):
  347. '''
  348. Runs the given remote_script on the wrk container on the client machine.
  349. '''
  350. def watch_container(container):
  351. with open(raw_file, 'w') as benchmark_file:
  352. for line in container.logs(stream=True):
  353. self.log(line, file=benchmark_file)
  354. sysctl = {'net.core.somaxconn': 65535}
  355. ulimit = [{'name': 'nofile', 'hard': 65535, 'soft': 65535}]
  356. watch_container(
  357. self.client.containers.run(
  358. "techempower/tfb.wrk",
  359. "/bin/bash /%s" % script,
  360. environment=variables,
  361. network=self.benchmarker.config.network,
  362. network_mode=self.benchmarker.config.network_mode,
  363. detach=True,
  364. stderr=True,
  365. ulimits=ulimit,
  366. sysctls=sysctl,
  367. remove=True,
  368. log_config={'type': None}))