run-ci.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #!/usr/bin/env python
  2. import subprocess
  3. import os
  4. import sys
  5. from benchmark import framework_test
  6. from benchmark.utils import gather_tests
  7. import glob
  8. import json
  9. import traceback
  10. import re
  11. import logging
  12. log = logging.getLogger('run-ci')
  13. import time
  14. import threading
  15. # Needed for various imports
  16. sys.path.append('.')
  17. sys.path.append('toolset/setup/linux')
  18. sys.path.append('toolset/benchmark')
  19. class CIRunnner:
  20. '''
  21. Manages running TFB on the Travis Continuous Integration system.
  22. Makes a best effort to avoid wasting time and resources by running
  23. useless jobs.
  24. Only verifies the first test in each directory
  25. '''
  26. def __init__(self, mode, testdir=None):
  27. '''
  28. mode = [cisetup|jobcleaner|prereq|install|verify] for what we want to do
  29. testdir = framework directory we are running
  30. '''
  31. logging.basicConfig(level=logging.INFO)
  32. self.directory = testdir
  33. self.name = testdir # Temporary value, reset below
  34. self.mode = mode
  35. self.should_run_cache = None
  36. self.travis = Travis()
  37. try:
  38. # See http://git.io/hs_qRQ
  39. # TRAVIS_COMMIT_RANGE is empty for pull requests
  40. if self.travis.is_pull_req:
  41. self.commit_range = "%s..FETCH_HEAD" % os.environ['TRAVIS_BRANCH'].rstrip('\n')
  42. else:
  43. self.commit_range = os.environ['TRAVIS_COMMIT_RANGE']
  44. except KeyError:
  45. log.warning("I should only be used for automated integration tests e.g. Travis-CI")
  46. log.warning("Were you looking for run-tests.py?")
  47. last_commit = subprocess.check_output("git rev-parse HEAD^", shell=True).rstrip('\n')
  48. self.commit_range = "%s...HEAD" % last_commit
  49. log.info("Using commit range %s", self.commit_range)
  50. log.info("Running `git diff --name-only %s`" % self.commit_range)
  51. changes = subprocess.check_output("git diff --name-only %s" % self.commit_range, shell=True)
  52. log.info(changes)
  53. # Nothing else to setup
  54. if mode == 'cisetup' or mode == 'jobcleaner' or mode == 'prereq':
  55. return
  56. # Should we bother to continue
  57. if not self._should_run():
  58. return
  59. #
  60. # Find the one test from benchmark_config that we are going to run
  61. #
  62. tests = gather_tests()
  63. dirtests = [t for t in tests if t.directory == testdir]
  64. # Travis-CI is linux only
  65. osvalidtests = [t for t in dirtests if t.os.lower() == "linux"
  66. and (t.database_os.lower() == "linux" or t.database_os.lower() == "none")]
  67. # Travis-CI only has some supported databases
  68. validtests = [t for t in osvalidtests if t.database.lower() == "mysql"
  69. or t.database.lower() == "postgres"
  70. or t.database.lower() == "none"]
  71. log.info("Found %s tests (%s for linux, %s for linux and mysql) in directory '%s'",
  72. len(dirtests), len(osvalidtests), len(validtests), testdir)
  73. if len(validtests) == 0:
  74. log.critical("Found no test that is possible to run in Travis-CI! Aborting!")
  75. if len(osvalidtests) != 0:
  76. log.critical("Note: Found these tests that could run in Travis-CI if more databases were supported")
  77. log.criticat("Note: %s", osvalidtests)
  78. sys.exit(1)
  79. # Prefer database tests over 'none' if we have both
  80. preferred = [t for t in validtests if t.database.lower() != "none"]
  81. if len(preferred) > 0:
  82. self.test = preferred[0]
  83. else:
  84. self.test = validtests[0]
  85. self.name = self.test.name
  86. log.info("Choosing to run test %s in %s", self.name, testdir)
  87. def _should_run(self):
  88. '''
  89. Decides if the current framework test should be tested or if we can cancel it.
  90. Examines git commits included in the latest push to see if any files relevant to
  91. this framework were changed.
  92. This is a rather primitive strategy for things like pull requests, where
  93. we probably want to examine the entire branch of commits. Also, this cannot handle
  94. history re-writing very well, so avoid rebasing onto any published history
  95. '''
  96. # Don't use git diff twice, it's mega slow sometimes
  97. if self.should_run_cache is not None:
  98. return self.should_run_cache
  99. # Look for changes to core TFB framework code
  100. find_tool_changes = "git diff --name-only %s | grep '^toolset/' | wc -l" % self.commit_range
  101. changes = subprocess.check_output(find_tool_changes, shell=True)
  102. if int(changes) != 0:
  103. log.info("Found changes to core framework code")
  104. self.should_run_cache = True
  105. return True
  106. # Look for changes relevant to this test
  107. find_test_changes = "git diff --name-only %s | grep '^%s/' | wc -l" % (self.commit_range, self.directory)
  108. changes = subprocess.check_output(find_test_changes, shell=True)
  109. if int(changes) == 0:
  110. log.info("No changes found for %s", self.name)
  111. self.should_run_cache = False
  112. return False
  113. log.info("Changes found for %s", self.name)
  114. self.should_run_cache = True
  115. return True
  116. def run(self):
  117. ''' Do the requested command using TFB '''
  118. if self.mode == 'jobcleaner':
  119. self.cancel_unneeded_jobs()
  120. return 0
  121. if self.mode == 'cisetup' and self._should_run():
  122. self.run_travis_setup()
  123. return 0
  124. if not self._should_run():
  125. log.info("Not running %s", self.name)
  126. # Cancel ourselves
  127. self.travis.cancel(self.travis.jobid)
  128. return 0
  129. command = 'toolset/run-tests.py '
  130. if self.mode == 'prereq':
  131. command = command + "--install server --install-only --test ''"
  132. elif self.mode == 'install':
  133. command = command + "--install server --install-only --test %s" % self.name
  134. elif self.mode == 'verify':
  135. command = command + "--mode verify --test %s" % self.name
  136. else:
  137. log.critical('Unknown mode passed')
  138. return 1
  139. # Run the command
  140. log.info("Running mode %s with commmand %s", self.mode, command)
  141. try:
  142. p = subprocess.Popen(command, shell=True)
  143. p.wait()
  144. return p.returncode
  145. except subprocess.CalledProcessError:
  146. log.critical("Subprocess Error")
  147. print traceback.format_exc()
  148. return 1
  149. except Exception as err:
  150. log.critical("Subprocess Error")
  151. log.error(err.child_traceback)
  152. return 1
  153. def run_travis_setup(self):
  154. log.info("Setting up Travis-CI")
  155. script = '''
  156. sudo apt-get update
  157. sudo apt-get install openssh-server
  158. # Run as travis user (who already has passwordless sudo)
  159. ssh-keygen -f /home/travis/.ssh/id_rsa -N '' -t rsa
  160. cat /home/travis/.ssh/id_rsa.pub > /home/travis/.ssh/authorized_keys
  161. chmod 600 /home/travis/.ssh/authorized_keys
  162. # Setup database manually
  163. # NOTE: Do not run database installation! It restarts mysql with a different
  164. # configuration and will break travis's mysql setup
  165. mysql -uroot < config/create.sql
  166. # Setup Postgres
  167. psql --version
  168. sudo useradd benchmarkdbuser -p benchmarkdbpass
  169. sudo -u postgres psql template1 < config/create-postgres-database.sql
  170. sudo -u benchmarkdbuser psql hello_world < config/create-postgres.sql
  171. '''
  172. def sh(command):
  173. log.info("Running `%s`", command)
  174. subprocess.check_call(command, shell=True)
  175. for command in script.split('\n'):
  176. command = command.lstrip()
  177. if command != "" and command[0] != '#':
  178. sh(command.lstrip())
  179. # Needed to cancel build jobs from run-ci.py
  180. if not self.travis.is_pull_req:
  181. sh('time gem install travis -v 1.6.16 --no-rdoc --no-ri')
  182. def cancel_unneeded_jobs(self):
  183. log.info("I am jobcleaner")
  184. log.info("Sleeping to ensure Travis-CI has queued all jobs")
  185. time.sleep(20)
  186. # Look for changes to core TFB framework code
  187. find_tool_changes = "git diff --name-only %s | grep toolset | wc -l" % self.commit_range
  188. changes = subprocess.check_output(find_tool_changes, shell=True)
  189. if int(changes) != 0:
  190. log.info("Found changes to core framework code. Running all tests")
  191. self.travis.cancel(self.travis.jobid) # Cancel ourselves
  192. return 0
  193. build = self.travis.build_details()
  194. log.info("Build details:\n%s", build)
  195. def parse_job_id(directory):
  196. for line in build.split('\n'):
  197. if "TESTDIR=%s" % directory in line:
  198. job = re.findall("\d+.\d+", line)[0]
  199. return job
  200. # Build a list of modified directories
  201. changes = subprocess.check_output("git diff --name-only %s" % self.commit_range, shell=True)
  202. dirchanges = []
  203. for line in changes.split('\n'):
  204. dirchanges.append(line[0:line.find('/')])
  205. # For each test, launch a Thread to cancel it's job if
  206. # it's directory has not been modified
  207. cancelled_testdirs = []
  208. threads = []
  209. for test in self.gather_tests():
  210. if test.directory not in dirchanges:
  211. job = parse_job_id(test.directory)
  212. log.info("No changes found for %s (job=%s) (dir=%s)", test.name, job, test.directory)
  213. if job and test.directory not in cancelled_testdirs:
  214. cancelled_testdirs.append(test.directory)
  215. t = threading.Thread(target=self.travis.cancel, args=(job,),
  216. name="%s (%s)" % (job, test.name))
  217. t.start()
  218. threads.append(t)
  219. # Wait for all threads
  220. for t in threads:
  221. t.join()
  222. # Cancel ourselves
  223. self.travis.cancel(self.travis.jobid)
  224. class Travis():
  225. '''Integrates the travis-ci build environment and the travis command line'''
  226. def __init__(self):
  227. self.jobid = os.environ['TRAVIS_JOB_NUMBER']
  228. self.buildid = os.environ['TRAVIS_BUILD_NUMBER']
  229. self.is_pull_req = (os.environ['TRAVIS_PULL_REQUEST'] != "false")
  230. self.logged_in = False
  231. def _login(self):
  232. if self.logged_in:
  233. return
  234. # If this is a PR, we cannot access the secure variable
  235. # GH_TOKEN, and instead must return success for all jobs
  236. if not self.is_pull_req:
  237. self.token = os.environ['GH_TOKEN']
  238. subprocess.check_call("travis login --skip-version-check --no-interactive --github-token %s" % self.token, shell=True)
  239. log.info("Logged into travis") # NEVER PRINT OUTPUT, GH_TOKEN MIGHT BE REVEALED
  240. else:
  241. log.info("Pull Request Detected. Non-necessary jobs will return pass instead of being canceled")
  242. self.logged_in = True
  243. def cancel(self, job):
  244. self._login()
  245. # If this is a pull request, we cannot interact with the CLI
  246. if self.is_pull_req:
  247. log.info("Thread %s: Return pass for job %s", threading.current_thread().name, job)
  248. return
  249. # Ignore errors in case job is already cancelled
  250. try:
  251. subprocess.check_call("travis cancel %s --skip-version-check --no-interactive" % job, shell=True)
  252. log.info("Thread %s: Canceled job %s", threading.current_thread().name, job)
  253. except subprocess.CalledProcessError:
  254. log.exception("Error halting job %s. Report:", job)
  255. subprocess.call("travis report --skip-version-check --no-interactive --org", shell=True)
  256. log.error("Trying to halt %s one more time", job)
  257. subprocess.call("travis cancel %s --skip-version-check --no-interactive" % job, shell=True)
  258. def build_details(self):
  259. self._login()
  260. # If this is a pull request, we cannot interact with the CLI
  261. if self.is_pull_req:
  262. return "No details available"
  263. build = subprocess.check_output("travis show %s --skip-version-check" % self.buildid, shell=True)
  264. return build
  265. if __name__ == "__main__":
  266. args = sys.argv[1:]
  267. usage = '''Usage: toolset/run-ci.py [cisetup|prereq]
  268. OR toolset/run-ci.py [install|verify] <framework-directory>
  269. run-ci.py selects one test from <framework-directory>/benchark_config, and
  270. automates a number of calls into run-tests.py specific to the selected test.
  271. It is guaranteed to always select the same test from the benchark_config, so
  272. multiple runs with the same <framework-directory> reference the same test.
  273. The name of the selected test will be printed to standard output.
  274. cisetup - configure the Travis-CI environment for our test suite
  275. prereq - trigger standard prerequisite installation
  276. install - trigger server installation for the selected test_directory
  277. verify - run a verification on the selected test using `--mode verify`
  278. run-ci.py expects to be run inside the Travis-CI build environment, and
  279. will expect environment variables such as $TRAVIS_BUILD'''
  280. mode = args[0]
  281. if mode == 'cisetup' or mode == 'prereq':
  282. runner = CIRunnner(mode)
  283. elif len(args) == 2 and args[1] == 'jobcleaner':
  284. # Only run jobcleaner once
  285. if mode != 'verify':
  286. sys.exit(0)
  287. # Translate jobcleaner from a directory name to a mode
  288. mode = 'jobcleaner'
  289. runner = CIRunnner(args[1])
  290. elif len(args) == 2 and (mode == "install"
  291. or mode == "verify"):
  292. runner = CIRunnner(mode, args[1])
  293. else:
  294. print usage
  295. sys.exit(1)
  296. retcode = 0
  297. try:
  298. retcode = runner.run()
  299. except KeyError as ke:
  300. log.warning("Environment key missing, are you running inside Travis-CI?")
  301. print traceback.format_exc()
  302. except:
  303. log.critical("Unknown error")
  304. print traceback.format_exc()
  305. finally: # Ensure that logs are printed
  306. # Only print logs if we are not jobcleaner and we ran a verify
  307. if mode == 'jobcleaner' or mode != 'verify':
  308. sys.exit(retcode)
  309. log.error("Running inside travis, so I will print err and out to console")
  310. try:
  311. log.error("Here is ERR:")
  312. with open("results/ec2/latest/logs/%s/err.txt" % runner.test.name, 'r') as err:
  313. for line in err:
  314. log.info(line)
  315. except IOError:
  316. log.error("No ERR file found")
  317. try:
  318. log.error("Here is OUT:")
  319. with open("results/ec2/latest/logs/%s/out.txt" % runner.test.name, 'r') as out:
  320. for line in out:
  321. log.info(line)
  322. except IOError:
  323. log.error("No OUT file found")
  324. sys.exit(retcode)