metadata.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. import os
  2. import glob
  3. import json
  4. from collections import OrderedDict
  5. from colorama import Fore
  6. from toolset.databases import databases
  7. from toolset.utils.output_helper import log
  8. class Metadata:
  9. supported_dbs = []
  10. for name in databases:
  11. supported_dbs.append((name, '...'))
  12. def __init__(self, benchmarker=None):
  13. self.benchmarker = benchmarker
  14. def gather_languages(self):
  15. '''
  16. Gathers all the known languages in the suite via the folder names
  17. beneath FWROOT.
  18. '''
  19. lang_dir = os.path.join(self.benchmarker.config.lang_root)
  20. langs = []
  21. for dir in glob.glob(os.path.join(lang_dir, "*")):
  22. langs.append(dir.replace(lang_dir, "")[1:])
  23. return langs
  24. def gather_language_tests(self, language):
  25. '''
  26. Gathers all the test names from a known language
  27. '''
  28. try:
  29. dir = os.path.join(self.benchmarker.config.lang_root, language)
  30. tests = map(lambda x: os.path.join(language, x), os.listdir(dir))
  31. return filter(lambda x: os.path.isdir(
  32. os.path.join(self.benchmarker.config.lang_root, x)), tests)
  33. except Exception:
  34. raise Exception(
  35. "Unable to locate language directory: {!s}".format(language))
  36. def get_framework_config(self, test_dir):
  37. '''
  38. Gets a framework's benchmark_config from the given
  39. test directory
  40. '''
  41. dir_config_files = glob.glob("{!s}/{!s}/benchmark_config.json".format(
  42. self.benchmarker.config.lang_root, test_dir))
  43. if len(dir_config_files):
  44. return dir_config_files[0]
  45. else:
  46. raise Exception(
  47. "Unable to locate tests in test-dir: {!s}".format(test_dir))
  48. def gather_tests(self, include=None, exclude=None):
  49. '''
  50. Given test names as strings, returns a list of FrameworkTest objects.
  51. For example, 'aspnet-mysql-raw' turns into a FrameworkTest object with
  52. variables for checking the test directory, the test database os, and
  53. other useful items.
  54. With no arguments, every test in this framework will be returned.
  55. With include, only tests with this exact name will be returned.
  56. With exclude, all tests but those excluded will be returned.
  57. '''
  58. # Help callers out a bit
  59. include = include or []
  60. exclude = exclude or []
  61. # Search for configuration files
  62. config_files = []
  63. if self.benchmarker.config.test_lang:
  64. self.benchmarker.config.test_dir = []
  65. for lang in self.benchmarker.config.test_lang:
  66. self.benchmarker.config.test_dir.extend(
  67. self.gather_language_tests(lang))
  68. if self.benchmarker.config.test_dir:
  69. for test_dir in self.benchmarker.config.test_dir:
  70. config_files.append(self.get_framework_config(test_dir))
  71. else:
  72. config_files.extend(
  73. glob.glob("{!s}/*/*/benchmark_config.json".format(
  74. self.benchmarker.config.lang_root)))
  75. tests = []
  76. for config_file_name in config_files:
  77. config = None
  78. with open(config_file_name, 'r') as config_file:
  79. try:
  80. config = json.load(config_file)
  81. except ValueError:
  82. log("Error loading config: {!s}".format(config_file_name),
  83. color=Fore.RED)
  84. raise Exception("Error loading config file")
  85. # Find all tests in the config file
  86. config_tests = self.parse_config(config,
  87. os.path.dirname(config_file_name))
  88. # Filter
  89. for test in config_tests:
  90. if hasattr(test, "tags"):
  91. if "broken" in test.tags:
  92. continue
  93. if self.benchmarker.config.tag:
  94. for t in self.benchmarker.config.tag:
  95. if t in test.tags and test.name not in exclude:
  96. tests.append(test)
  97. break
  98. if len(include) > 0:
  99. if test.name in include:
  100. tests.append(test)
  101. elif test.name not in exclude and not self.benchmarker.config.tag:
  102. tests.append(test)
  103. # Ensure we were able to locate everything that was
  104. # explicitly included
  105. if len(include):
  106. names = {test.name for test in tests}
  107. if len(set(include) - set(names)):
  108. missing = list(set(include) - set(names))
  109. raise Exception("Unable to locate tests %s" % missing)
  110. tests = list(set(tests))
  111. tests.sort(key=lambda x: x.name)
  112. return tests
  113. def tests_to_run(self):
  114. '''
  115. Gathers all tests for current benchmark run.
  116. '''
  117. return self.gather_tests(self.benchmarker.config.test,
  118. self.benchmarker.config.exclude)
  119. def gather_frameworks(self, include=None, exclude=None):
  120. '''
  121. Return a dictionary mapping frameworks->[test1,test2,test3]
  122. for quickly grabbing all tests in a grouped manner.
  123. Args have the same meaning as gather_tests
  124. '''
  125. tests = self.gather_tests(include, exclude)
  126. frameworks = dict()
  127. for test in tests:
  128. if test.framework not in frameworks:
  129. frameworks[test.framework] = []
  130. frameworks[test.framework].append(test)
  131. return frameworks
  132. def has_file(self, test_dir, filename):
  133. '''
  134. Returns True if the file exists in the test dir
  135. '''
  136. path = test_dir
  137. if not self.benchmarker.config.lang_root in path:
  138. path = os.path.join(self.benchmarker.config.lang_root, path)
  139. return os.path.isfile("{!s}/{!s}".format(path, filename))
  140. @staticmethod
  141. def test_order(type_name):
  142. """
  143. This sort ordering is set up specifically to return the length
  144. of the test name. There were SO many problems involved with
  145. 'plaintext' being run first (rather, just not last) that we
  146. needed to ensure that it was run last for every framework.
  147. """
  148. return len(type_name)
  149. def parse_config(self, config, directory):
  150. """
  151. Parses a config file into a list of FrameworkTest objects
  152. """
  153. from toolset.benchmark.framework_test import FrameworkTest
  154. tests = []
  155. # The config object can specify multiple tests
  156. # Loop over them and parse each into a FrameworkTest
  157. for test in config['tests']:
  158. tests_to_run = [name for (name, keys) in test.iteritems()]
  159. if "default" not in tests_to_run:
  160. log("Framework %s does not define a default test in benchmark_config.json"
  161. % config['framework'],
  162. color=Fore.YELLOW)
  163. # Check that each test configuration is acceptable
  164. # Throw exceptions if a field is missing, or how to improve the field
  165. for test_name, test_keys in test.iteritems():
  166. # Validates and normalizes the benchmark_config entry
  167. test_keys = Metadata.validate_test(test_name, test_keys,
  168. config['framework'], directory)
  169. # Map test type to a parsed FrameworkTestType object
  170. runTests = dict()
  171. # TODO: remove self.benchmarker.config.types
  172. for type_name, type_obj in self.benchmarker.config.types.iteritems():
  173. try:
  174. # Makes a FrameWorkTestType object using some of the keys in config
  175. # e.g. JsonTestType uses "json_url"
  176. runTests[type_name] = type_obj.copy().parse(test_keys)
  177. except AttributeError:
  178. # This is quite common - most tests don't support all types
  179. # Quitely log it and move on (debug logging is on in travis and this causes
  180. # ~1500 lines of debug, so I'm totally ignoring it for now
  181. # log("Missing arguments for test type %s for framework test %s" % (type_name, test_name))
  182. pass
  183. # We need to sort by test_type to run
  184. sortedTestKeys = sorted(
  185. runTests.keys(), key=Metadata.test_order)
  186. sortedRunTests = OrderedDict()
  187. for sortedTestKey in sortedTestKeys:
  188. sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
  189. # Prefix all test names with framework except 'default' test
  190. # Done at the end so we may still refer to the primary test as `default` in benchmark config error messages
  191. if test_name == 'default':
  192. test_name = config['framework']
  193. else:
  194. test_name = "%s-%s" % (config['framework'], test_name)
  195. # By passing the entire set of keys, each FrameworkTest will have a member for each key
  196. tests.append(
  197. FrameworkTest(test_name, directory, self.benchmarker,
  198. sortedRunTests, test_keys))
  199. return tests
  200. def to_jsonable(self):
  201. '''
  202. Returns an array suitable for jsonification
  203. '''
  204. all_tests = self.gather_tests()
  205. return map(lambda test: {
  206. "project_name": test.project_name,
  207. "name": test.name,
  208. "approach": test.approach,
  209. "classification": test.classification,
  210. "database": test.database,
  211. "framework": test.framework,
  212. "language": test.language,
  213. "orm": test.orm,
  214. "platform": test.platform,
  215. "webserver": test.webserver,
  216. "os": test.os,
  217. "database_os": test.database_os,
  218. "display_name": test.display_name,
  219. "notes": test.notes,
  220. "versus": test.versus,
  221. "tags": hasattr(test, "tags") and test.tags or []
  222. }, all_tests)
  223. def list_test_metadata(self):
  224. '''
  225. Prints the metadata for all the available tests
  226. '''
  227. all_tests_json = json.dumps(self.to_jsonable())
  228. with open(
  229. os.path.join(self.benchmarker.results.directory,
  230. "test_metadata.json"), "w") as f:
  231. f.write(all_tests_json)
  232. @staticmethod
  233. def validate_test(test_name, test_keys, project_name, directory):
  234. """
  235. Validate and normalizes benchmark config values for this test based on a schema
  236. """
  237. recommended_lang = directory.split('/')[-2]
  238. windows_url = "https://github.com/TechEmpower/FrameworkBenchmarks/issues/1038"
  239. schema = {
  240. 'language': {
  241. # Language is the only key right now with no 'allowed' key that can't
  242. # have a "None" value
  243. 'required':
  244. True,
  245. 'help': ('language',
  246. 'The language of the framework used, suggestion: %s' %
  247. recommended_lang)
  248. },
  249. 'webserver': {
  250. 'help':
  251. ('webserver',
  252. 'Name of the webserver also referred to as the "front-end server"'
  253. )
  254. },
  255. 'classification': {
  256. 'allowed': [('Fullstack', '...'), ('Micro', '...'),
  257. ('Platform', '...')]
  258. },
  259. 'database': {
  260. 'allowed':
  261. Metadata.supported_dbs +
  262. [('None',
  263. 'No database was used for these tests, as is the case with Json Serialization and Plaintext'
  264. )]
  265. },
  266. 'approach': {
  267. 'allowed': [('Realistic', '...'), ('Stripped', '...')]
  268. },
  269. 'orm': {
  270. 'required_with':
  271. 'database',
  272. 'allowed':
  273. [('Full',
  274. 'Has a full suite of features like lazy loading, caching, multiple language support, sometimes pre-configured with scripts.'
  275. ),
  276. ('Micro',
  277. 'Has basic database driver capabilities such as establishing a connection and sending queries.'
  278. ),
  279. ('Raw',
  280. 'Tests that do not use an ORM will be classified as "raw" meaning they use the platform\'s raw database connectivity.'
  281. )]
  282. },
  283. 'platform': {
  284. 'help':
  285. ('platform',
  286. 'Name of the platform this framework runs on, e.g. Node.js, PyPy, hhvm, JRuby ...'
  287. )
  288. },
  289. 'framework': {
  290. # Guaranteed to be here and correct at this point
  291. # key is left here to produce the set of required keys
  292. },
  293. 'os': {
  294. 'allowed':
  295. [('Linux',
  296. 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'
  297. ),
  298. ('Windows',
  299. 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s'
  300. % windows_url)]
  301. },
  302. 'database_os': {
  303. 'required_with':
  304. 'database',
  305. 'allowed':
  306. [('Linux',
  307. 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'
  308. ),
  309. ('Windows',
  310. 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s'
  311. % windows_url)]
  312. }
  313. }
  314. # Check the (all optional) test urls
  315. Metadata.validate_urls(test_name, test_keys)
  316. def get_test_val(k):
  317. return test_keys.get(k, "none").lower()
  318. def throw_incorrect_key(k, acceptable_values, descriptors):
  319. msg = (
  320. "`%s` is a required key for test \"%s\" in framework \"%s\"\n"
  321. % (k, test_name, project_name))
  322. if acceptable_values:
  323. msg = (
  324. "Invalid `%s` value specified for test \"%s\" in framework \"%s\"; suggestions:\n"
  325. % (k, test_name, project_name))
  326. helpinfo = ('\n').join([
  327. " `%s` -- %s" % (v, desc)
  328. for (v, desc) in zip(acceptable_values, descriptors)
  329. ])
  330. msg = msg + helpinfo + "\n"
  331. raise Exception(msg)
  332. # Check values of keys against schema
  333. for key in schema.keys():
  334. val = get_test_val(key)
  335. test_keys[key] = val
  336. acceptable_values = None
  337. descriptors = None
  338. if 'allowed' in schema[key]:
  339. allowed = schema[key].get('allowed', [])
  340. acceptable_values, descriptors = zip(*allowed)
  341. acceptable_values = [a.lower() for a in acceptable_values]
  342. if val == "none":
  343. # incorrect if key requires a value other than none
  344. if schema[key].get('required', False):
  345. throw_incorrect_key(key, acceptable_values, descriptors)
  346. # certain keys are only required if another key is not none
  347. if 'required_with' in schema[key]:
  348. if get_test_val(schema[key]['required_with']) != "none":
  349. throw_incorrect_key(key, acceptable_values, descriptors)
  350. # if we're here, the key needs to be one of the "allowed" values
  351. elif acceptable_values and val not in acceptable_values:
  352. throw_incorrect_key(key, acceptable_values, descriptors)
  353. test_keys['project_name'] = project_name
  354. return test_keys
  355. @staticmethod
  356. def validate_urls(test_name, test_keys):
  357. """
  358. Separated from validate_test because urls are not required anywhere. We know a url is incorrect if it is
  359. empty or does not start with a "/" character. There is no validation done to ensure the url conforms to
  360. the suggested url specifications, although those suggestions are presented if a url fails validation here.
  361. """
  362. example_urls = {
  363. "json_url":
  364. "/json",
  365. "db_url":
  366. "/mysql/db",
  367. "query_url":
  368. "/mysql/queries?queries= or /mysql/queries/",
  369. "fortune_url":
  370. "/mysql/fortunes",
  371. "update_url":
  372. "/mysql/updates?queries= or /mysql/updates/",
  373. "plaintext_url":
  374. "/plaintext",
  375. "cached_query_url":
  376. "/mysql/cached_queries?queries= or /mysql/cached_queries"
  377. }
  378. for test_url in [
  379. "json_url", "db_url", "query_url", "fortune_url", "update_url",
  380. "plaintext_url", "cached_query_url"
  381. ]:
  382. key_value = test_keys.get(test_url, None)
  383. if key_value is not None and not key_value.startswith('/'):
  384. errmsg = """`%s` field in test \"%s\" does not appear to be a valid url: \"%s\"\n
  385. Example `%s` url: \"%s\"
  386. """ % (test_url, test_name, key_value, test_url,
  387. example_urls[test_url])
  388. raise Exception(errmsg)