metadata_helper.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. import ConfigParser
  2. import os
  3. import glob
  4. import json
  5. import logging
  6. from ast import literal_eval
  7. from collections import OrderedDict
  8. def gather_langauges():
  9. '''
  10. Gathers all the known languages in the suite via the folder names
  11. beneath FWROOT.
  12. '''
  13. # Avoid setting up a circular import
  14. from toolset.utils import setup_util
  15. lang_dir = os.path.join(setup_util.get_fwroot(), "frameworks")
  16. langs = []
  17. for dir in glob.glob(os.path.join(lang_dir, "*")):
  18. langs.append(dir.replace(lang_dir, "")[1:])
  19. return langs
  20. def gather_tests(include=[], exclude=[], benchmarker_config=None,
  21. results=None):
  22. '''
  23. Given test names as strings, returns a list of FrameworkTest objects.
  24. For example, 'aspnet-mysql-raw' turns into a FrameworkTest object with
  25. variables for checking the test directory, the test database os, and
  26. other useful items.
  27. With no arguments, every test in this framework will be returned.
  28. With include, only tests with this exact name will be returned.
  29. With exclude, all tests but those excluded will be returned.
  30. A config is needed to construct full FrameworkTest objects. If
  31. one is not provided, a default config will be created.
  32. '''
  33. # Avoid setting up a circular import
  34. from toolset.utils.benchmark_config import BenchmarkConfig
  35. from toolset.utils import setup_util
  36. # Help callers out a bit
  37. if include is None:
  38. include = []
  39. if exclude is None:
  40. exclude = []
  41. # Old, hacky method to exclude all tests was to
  42. # request a test known to not exist, such as ''.
  43. # If test '' was requested, short-circuit and return
  44. # nothing immediately
  45. if len(include) == 1 and '' in include:
  46. return []
  47. # Setup default BenchmarkerConfig using example configuration
  48. if benchmarker_config is None:
  49. default_config = setup_util.get_fwroot() + "/benchmark.cfg"
  50. config = ConfigParser.SafeConfigParser()
  51. config.readfp(open(default_config))
  52. defaults = dict(config.items("Defaults"))
  53. # Convert strings into proper python types
  54. for k, v in defaults.items():
  55. try:
  56. defaults[k] = literal_eval(v)
  57. except Exception:
  58. pass
  59. defaults[
  60. 'results_name'] = "(unspecified, datetime = %Y-%m-%d %H:%M:%S)"
  61. defaults['results_environment'] = "My Server Environment"
  62. defaults['test_dir'] = None
  63. defaults['test_lang'] = None
  64. defaults['quiet'] = True
  65. benchmarker_config = BenchmarkConfig(defaults)
  66. # Search for configuration files
  67. config_files = []
  68. if benchmarker_config.test_lang:
  69. benchmarker_config.test_dir = []
  70. for lang in benchmarker_config.test_lang:
  71. if os.path.exists("{!s}/frameworks/{!s}".format(
  72. benchmarker_config.fwroot, lang)):
  73. for test_dir in os.listdir("{!s}/frameworks/{!s}".format(
  74. benchmarker_config.fwroot, lang)):
  75. benchmarker_config.test_dir.append("{!s}/{!s}".format(
  76. lang, test_dir))
  77. else:
  78. raise Exception(
  79. "Unable to locate language directory: {!s}".format(lang))
  80. if benchmarker_config.test_dir:
  81. for test_dir in benchmarker_config.test_dir:
  82. dir_config_files = glob.glob(
  83. "{!s}/frameworks/{!s}/benchmark_config.json".format(
  84. benchmarker_config.fwroot, test_dir))
  85. if len(dir_config_files):
  86. config_files.extend(dir_config_files)
  87. else:
  88. raise Exception(
  89. "Unable to locate tests in test-dir: {!s}".format(
  90. test_dir))
  91. else:
  92. config_files.extend(
  93. glob.glob("{!s}/frameworks/*/*/benchmark_config.json".format(
  94. benchmarker_config.fwroot)))
  95. tests = []
  96. for config_file_name in config_files:
  97. config = None
  98. with open(config_file_name, 'r') as config_file:
  99. try:
  100. config = json.load(config_file)
  101. except ValueError:
  102. raise Exception(
  103. "Error loading '{!s}'.".format(config_file_name))
  104. # Find all tests in the config file
  105. config_tests = parse_config(config, os.path.dirname(config_file_name),
  106. benchmarker_config, results)
  107. # Filter
  108. for test in config_tests:
  109. if len(include) is 0 and len(exclude) is 0:
  110. # No filters, we are running everything
  111. tests.append(test)
  112. elif test.name in exclude:
  113. continue
  114. elif test.name in include:
  115. tests.append(test)
  116. else:
  117. # An include list exists, but this test is
  118. # not listed there, so we ignore it
  119. pass
  120. # Ensure we were able to locate everything that was
  121. # explicitly included
  122. if 0 != len(include):
  123. names = {test.name for test in tests}
  124. if 0 != len(set(include) - set(names)):
  125. missing = list(set(include) - set(names))
  126. raise Exception("Unable to locate tests %s" % missing)
  127. tests.sort(key=lambda x: x.name)
  128. return tests
  129. def gather_remaining_tests(config, results):
  130. '''
  131. Gathers the tests remaining in a current benchmark run.
  132. '''
  133. tests = gather_tests(config.test, config.exclude, config, results)
  134. # If the tests have been interrupted somehow, then we want to resume them where we left
  135. # off, rather than starting from the beginning
  136. if os.path.isfile(config.current_benchmark):
  137. with open(config.current_benchmark, 'r') as interrupted_benchmark:
  138. interrupt_bench = interrupted_benchmark.read().strip()
  139. for index, atest in enumerate(tests):
  140. if atest.name == interrupt_bench:
  141. tests = tests[index:]
  142. break
  143. return tests
  144. def gather_frameworks(include=[], exclude=[], config=None):
  145. '''
  146. Return a dictionary mapping frameworks->[test1,test2,test3]
  147. for quickly grabbing all tests in a grouped manner.
  148. Args have the same meaning as gather_tests
  149. '''
  150. tests = gather_tests(include, exclude, config)
  151. frameworks = dict()
  152. for test in tests:
  153. if test.framework not in frameworks:
  154. frameworks[test.framework] = []
  155. frameworks[test.framework].append(test)
  156. return frameworks
  157. def test_order(type_name):
  158. """
  159. This sort ordering is set up specifically to return the length
  160. of the test name. There were SO many problems involved with
  161. 'plaintext' being run first (rather, just not last) that we
  162. needed to ensure that it was run last for every framework.
  163. """
  164. return len(type_name)
  165. def parse_config(config, directory, benchmarker_config, results):
  166. """
  167. Parses a config file into a list of FrameworkTest objects
  168. """
  169. from toolset.benchmark.framework_test import FrameworkTest
  170. tests = []
  171. # The config object can specify multiple tests
  172. # Loop over them and parse each into a FrameworkTest
  173. for test in config['tests']:
  174. tests_to_run = [name for (name, keys) in test.iteritems()]
  175. if "default" not in tests_to_run:
  176. logging.warn(
  177. "Framework %s does not define a default test in benchmark_config.json",
  178. config['framework'])
  179. # Check that each test configuration is acceptable
  180. # Throw exceptions if a field is missing, or how to improve the field
  181. for test_name, test_keys in test.iteritems():
  182. # Validates the benchmark_config entry
  183. validate_test(test_name, test_keys, directory)
  184. # Map test type to a parsed FrameworkTestType object
  185. runTests = dict()
  186. for type_name, type_obj in benchmarker_config.types.iteritems():
  187. try:
  188. # Makes a FrameWorkTestType object using some of the keys in config
  189. # e.g. JsonTestType uses "json_url"
  190. runTests[type_name] = type_obj.copy().parse(test_keys)
  191. except AttributeError:
  192. # This is quite common - most tests don't support all types
  193. # Quitely log it and move on (debug logging is on in travis and this causes
  194. # ~1500 lines of debug, so I'm totally ignoring it for now
  195. # logging.debug("Missing arguments for test type %s for framework test %s", type_name, test_name)
  196. pass
  197. # We need to sort by test_type to run
  198. sortedTestKeys = sorted(runTests.keys(), key=test_order)
  199. sortedRunTests = OrderedDict()
  200. for sortedTestKey in sortedTestKeys:
  201. sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
  202. # Prefix all test names with framework except 'default' test
  203. # Done at the end so we may still refer to the primary test as `default` in benchmark config error messages
  204. if test_name == 'default':
  205. test_name = config['framework']
  206. else:
  207. test_name = "%s-%s" % (config['framework'], test_name)
  208. # By passing the entire set of keys, each FrameworkTest will have a member for each key
  209. tests.append(
  210. FrameworkTest(test_name, directory, benchmarker_config,
  211. results, sortedRunTests, test_keys))
  212. return tests
  213. def validate_test(test_name, test_keys, directory):
  214. """
  215. Validate benchmark config values for this test based on a schema
  216. """
  217. recommended_lang = directory.split('/')[-2]
  218. windows_url = "https://github.com/TechEmpower/FrameworkBenchmarks/issues/1038"
  219. schema = {
  220. 'language': {
  221. 'help':
  222. ('language', 'The language of the framework used, suggestion: %s' %
  223. recommended_lang)
  224. },
  225. 'webserver': {
  226. 'help':
  227. ('webserver',
  228. 'Name of the webserver also referred to as the "front-end server"'
  229. )
  230. },
  231. 'classification': {
  232. 'allowed': [('Fullstack', '...'), ('Micro', '...'), ('Platform',
  233. '...')]
  234. },
  235. 'database': {
  236. 'allowed':
  237. [('MySQL',
  238. 'One of the most popular databases around the web and in TFB'),
  239. ('Postgres',
  240. 'An advanced SQL database with a larger feature set than MySQL'),
  241. ('MongoDB', 'A popular document-store database'),
  242. ('Cassandra', 'A highly performant and scalable NoSQL database'),
  243. ('Elasticsearch',
  244. 'A distributed RESTful search engine that is used as a database for TFB tests'
  245. ),
  246. ('Redis',
  247. 'An open-sourced, BSD licensed, advanced key-value cache and store'
  248. ),
  249. ('SQLite',
  250. 'A network-less database, still supported for backwards compatibility'
  251. ), ('SQLServer', 'Microsoft\'s SQL implementation'),
  252. ('None',
  253. 'No database was used for these tests, as is the case with Json Serialization and Plaintext'
  254. )]
  255. },
  256. 'approach': {
  257. 'allowed': [('Realistic', '...'), ('Stripped', '...')]
  258. },
  259. 'orm': {
  260. 'allowed':
  261. [('Full',
  262. 'Has a full suite of features like lazy loading, caching, multiple language support, sometimes pre-configured with scripts.'
  263. ),
  264. ('Micro',
  265. 'Has basic database driver capabilities such as establishing a connection and sending queries.'
  266. ),
  267. ('Raw',
  268. 'Tests that do not use an ORM will be classified as "raw" meaning they use the platform\'s raw database connectivity.'
  269. )]
  270. },
  271. 'platform': {
  272. 'help':
  273. ('platform',
  274. 'Name of the platform this framework runs on, e.g. Node.js, PyPy, hhvm, JRuby ...'
  275. )
  276. },
  277. 'framework': {
  278. # Guranteed to be here and correct at this point
  279. # key is left here to produce the set of required keys
  280. },
  281. 'os': {
  282. 'allowed':
  283. [('Linux',
  284. 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'
  285. ),
  286. ('Windows',
  287. 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s'
  288. % windows_url)]
  289. },
  290. 'database_os': {
  291. 'allowed':
  292. [('Linux',
  293. 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'
  294. ),
  295. ('Windows',
  296. 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s'
  297. % windows_url)]
  298. }
  299. }
  300. # Confirm required keys are present
  301. required_keys = schema.keys()
  302. missing = list(set(required_keys) - set(test_keys))
  303. if len(missing) > 0:
  304. missingstr = (", ").join(map(str, missing))
  305. raise Exception(
  306. "benchmark_config.json for test %s is invalid, please amend by adding the following required keys: [%s]"
  307. % (test_name, missingstr))
  308. # Check the (all optional) test urls
  309. validate_urls(test_name, test_keys)
  310. # Check values of keys against schema
  311. for key in required_keys:
  312. val = test_keys.get(key, "").lower()
  313. has_predefined_acceptables = 'allowed' in schema[key]
  314. if has_predefined_acceptables:
  315. allowed = schema[key].get('allowed', [])
  316. acceptable_values, descriptors = zip(*allowed)
  317. acceptable_values = [a.lower() for a in acceptable_values]
  318. if val not in acceptable_values:
  319. msg = (
  320. "Invalid `%s` value specified for test \"%s\" in framework \"%s\"; suggestions:\n"
  321. % (key, test_name, test_keys['framework']))
  322. helpinfo = ('\n').join([
  323. " `%s` -- %s" % (v, desc)
  324. for (v, desc) in zip(acceptable_values, descriptors)
  325. ])
  326. fullerr = msg + helpinfo + "\n"
  327. raise Exception(fullerr)
  328. elif not has_predefined_acceptables and val == "":
  329. msg = (
  330. "Value for `%s` in test \"%s\" in framework \"%s\" was missing:\n"
  331. % (key, test_name, test_keys['framework']))
  332. helpinfo = " %s -- %s" % schema[key]['help']
  333. fullerr = msg + helpinfo + '\n'
  334. raise Exception(fullerr)
  335. def validate_urls(test_name, test_keys):
  336. """
  337. Separated from validate_test because urls are not required anywhere. We know a url is incorrect if it is
  338. empty or does not start with a "/" character. There is no validation done to ensure the url conforms to
  339. the suggested url specifications, although those suggestions are presented if a url fails validation here.
  340. """
  341. example_urls = {
  342. "json_url":
  343. "/json",
  344. "db_url":
  345. "/mysql/db",
  346. "query_url":
  347. "/mysql/queries?queries= or /mysql/queries/",
  348. "fortune_url":
  349. "/mysql/fortunes",
  350. "update_url":
  351. "/mysql/updates?queries= or /mysql/updates/",
  352. "plaintext_url":
  353. "/plaintext",
  354. "cached_query_url":
  355. "/mysql/cached_queries?queries= or /mysql/cached_queries"
  356. }
  357. for test_url in [
  358. "json_url", "db_url", "query_url", "fortune_url", "update_url",
  359. "plaintext_url", "cached_query_url"
  360. ]:
  361. key_value = test_keys.get(test_url, None)
  362. if key_value != None and not key_value.startswith('/'):
  363. errmsg = """`%s` field in test \"%s\" does not appear to be a valid url: \"%s\"\n
  364. Example `%s` url: \"%s\"
  365. """ % (test_url, test_name, key_value, test_url, example_urls[test_url])
  366. raise Exception(errmsg)