1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057 |
- from benchmark.fortune_html_parser import FortuneHTMLParser
- from setup.linux import setup_util
- from benchmark.test_types import *
- import importlib
- import os
- import subprocess
- import time
- import re
- from pprint import pprint
- import sys
- import traceback
- import json
- import logging
- import csv
- import shlex
- import math
- from collections import OrderedDict
- from requests import ConnectionError
- from threading import Thread
- from threading import Event
- from utils import header
- # Cross-platform colored text
- from colorama import Fore, Back, Style
- from datetime import datetime
- from datetime import timedelta
- class FrameworkTest:
- headers_template = "-H 'Host: localhost' -H '{accept}' -H 'Connection: keep-alive'"
-
- # Used for test types that require no pipelining or query string params.
- concurrency_template = """
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Primer {name}"
- echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
- echo "---------------------------------------------------------"
- echo ""
- {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
- sleep 5
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Warmup {name}"
- echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}\""
- echo "---------------------------------------------------------"
- echo ""
- {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}"
- sleep 5
- echo ""
- echo "---------------------------------------------------------"
- echo " Synchronizing time"
- echo "---------------------------------------------------------"
- echo ""
- ntpdate -s pool.ntp.org
- for c in {levels}
- do
- echo ""
- echo "---------------------------------------------------------"
- echo " Concurrency: $c for {name}"
- echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\""
- echo "---------------------------------------------------------"
- echo ""
- STARTTIME=$(date +"%s")
- {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url}
- echo "STARTTIME $STARTTIME"
- echo "ENDTIME $(date +"%s")"
- sleep 2
- done
- """
- # Used for test types that require pipelining.
- pipeline_template = """
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Primer {name}"
- echo " {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}\""
- echo "---------------------------------------------------------"
- echo ""
- {wrk} {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}"
- sleep 5
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Warmup {name}"
- echo " {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}\""
- echo "---------------------------------------------------------"
- echo ""
- {wrk} {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}"
- sleep 5
- echo ""
- echo "---------------------------------------------------------"
- echo " Synchronizing time"
- echo "---------------------------------------------------------"
- echo ""
- ntpdate -s pool.ntp.org
- for c in {levels}
- do
- echo ""
- echo "---------------------------------------------------------"
- echo " Concurrency: $c for {name}"
- echo " {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t $(($c>{max_threads}?{max_threads}:$c)) \"http://{server_host}:{port}{url}\" -s ~/pipeline.lua -- {pipeline}"
- echo "---------------------------------------------------------"
- echo ""
- STARTTIME=$(date +"%s")
- {wrk} {headers} --latency -d {duration} -c $c --timeout 8 -t "$(($c>{max_threads}?{max_threads}:$c))" http://{server_host}:{port}{url} -s ~/pipeline.lua -- {pipeline}
- echo "STARTTIME $STARTTIME"
- echo "ENDTIME $(date +"%s")"
- sleep 2
- done
- """
- # Used for test types that require a database -
- # These tests run at a static concurrency level and vary the size of
- # the query sent with each request
- query_template = """
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Primer {name}"
- echo " wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 \"http://{server_host}:{port}{url}2\""
- echo "---------------------------------------------------------"
- echo ""
- wrk {headers} --latency -d 5 -c 8 --timeout 8 -t 8 "http://{server_host}:{port}{url}2"
- sleep 5
-
- echo ""
- echo "---------------------------------------------------------"
- echo " Running Warmup {name}"
- echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}2\""
- echo "---------------------------------------------------------"
- echo ""
- wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}2"
- sleep 5
- echo ""
- echo "---------------------------------------------------------"
- echo " Synchronizing time"
- echo "---------------------------------------------------------"
- echo ""
- ntpdate -s pool.ntp.org
- for c in {levels}
- do
- echo ""
- echo "---------------------------------------------------------"
- echo " Queries: $c for {name}"
- echo " wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} \"http://{server_host}:{port}{url}$c\""
- echo "---------------------------------------------------------"
- echo ""
- STARTTIME=$(date +"%s")
- wrk {headers} --latency -d {duration} -c {max_concurrency} --timeout 8 -t {max_threads} "http://{server_host}:{port}{url}$c"
- echo "STARTTIME $STARTTIME"
- echo "ENDTIME $(date +"%s")"
- sleep 2
- done
- """
- ############################################################
- # start(benchmarker)
- # Start the test using its setup file
- ############################################################
- def start(self, out):
- # Setup environment variables
- logDir = os.path.join(self.fwroot, self.benchmarker.latest_results_directory, 'logs', self.name.lower())
- bash_functions_path= os.path.join(self.fwroot, 'toolset/setup/linux/bash_functions.sh')
- setup_util.replace_environ(config='$FWROOT/config/benchmark_profile',
- command='''\
- export TROOT=%s && \
- export IROOT=%s && \
- export DBHOST=%s && \
- export LOGDIR=%s && \
- export MAX_THREADS=%s && \
- export MAX_CONCURRENCY=%s \
- ''' % (
- self.directory,
- self.install_root,
- self.database_host,
- logDir,
- self.benchmarker.threads,
- max(self.benchmarker.concurrency_levels)))
- # Always ensure that IROOT belongs to the runner_user
- if not os.path.exists(self.install_root):
- os.mkdir(self.install_root)
- chown = "sudo chown -R %s:%s %s" % (self.benchmarker.runner_user,
- self.benchmarker.runner_user, os.path.join(self.fwroot, self.install_root))
- subprocess.check_call(chown, shell=True, cwd=self.fwroot, executable='/bin/bash')
- # Run the module start inside parent of TROOT
- # - we use the parent as a historical accident, a number of tests
- # refer to their TROOT maually still
- previousDir = os.getcwd()
- os.chdir(os.path.dirname(self.troot))
- logging.info("Running setup module start (cwd=%s)", self.directory)
-
- # Run the start script for the test as the "testrunner" user
- #
- # `sudo` - Switching user requires superuser privs
- # -u [username] The username
- # -E Preserves the current environment variables
- # -H Forces the home var (~) to be reset to the user specified
- # `stdbuf` - Disable buffering, send output to python ASAP
- # -o0 zero-sized buffer for stdout
- # -e0 zero-sized buffer for stderr
- # `bash` - Run the setup.sh script using bash
- # -e Force bash to exit on first error
- # -x Turn on bash tracing e.g. print commands before running
- #
- # Most servers do not output to stdout/stderr while serving
- # requests so there is no performance hit from disabling
- # output buffering. This disabling is necessary to
- # a) allow TFB to show output in real time and b) avoid loosing
- # output in the buffer when the testrunner processes are forcibly
- # killed
- #
- # See http://www.pixelbeat.org/programming/stdio_buffering/
- # See https://blogs.gnome.org/markmc/2013/06/04/async-io-and-python/
- # See http://eyalarubas.com/python-subproc-nonblock.html
- command = 'sudo -u %s -E -H stdbuf -o0 -e0 bash -exc "source %s && source %s.sh"' % (
- self.benchmarker.runner_user,
- bash_functions_path,
- os.path.join(self.troot, self.setup_file))
-
- debug_command = '''\
- export FWROOT=%s && \\
- export TROOT=%s && \\
- export IROOT=%s && \\
- export DBHOST=%s && \\
- export LOGDIR=%s && \\
- export MAX_THREADS=%s && \\
- export MAX_CONCURRENCY=%s && \\
- cd %s && \\
- %s''' % (self.fwroot,
- self.directory,
- self.install_root,
- self.database_host,
- logDir,
- self.benchmarker.threads,
- self.directory,
- max(self.benchmarker.concurrency_levels),
- command)
- logging.info("To run %s manually, copy/paste this:\n%s", self.name, debug_command)
- def tee_output(prefix, line):
- # Needs to be one atomic write
- # Explicitly use UTF-8 as it's the most common framework output
- # TODO improve encoding handling
- line = prefix.encode('utf-8') + line
- # Log to current terminal
- sys.stdout.write(line)
- sys.stdout.flush()
- # logging.error("".join([prefix, line]))
- out.write(line)
- out.flush()
- # Start the setup.sh command
- p = subprocess.Popen(command, cwd=self.directory,
- shell=True, stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- nbsr = setup_util.NonBlockingStreamReader(p.stdout,
- "%s: %s.sh and framework processes have terminated" % (self.name, self.setup_file))
- # Set a limit on total execution time of setup.sh
- timeout = datetime.now() + timedelta(minutes = 105)
- time_remaining = timeout - datetime.now()
- # Need to print to stdout once every 10 minutes or Travis-CI will abort
- travis_timeout = datetime.now() + timedelta(minutes = 5)
- # Flush output until setup.sh work is finished. This is
- # either a) when setup.sh exits b) when the port is bound
- # c) when we run out of time. Note that 'finished' doesn't
- # guarantee setup.sh process is dead - the OS may choose to make
- # setup.sh a zombie process if it still has living children
- #
- # Note: child processes forked (using &) will remain alive
- # after setup.sh has exited. The will have inherited the
- # stdout/stderr descriptors and will be directing their
- # output to the pipes.
- #
- prefix = "Setup %s: " % self.name
- while (p.poll() is None
- and not self.benchmarker.is_port_bound(self.port)
- and not time_remaining.total_seconds() < 0):
-
- # The conditions above are slow to check, so
- # we will delay output substantially if we only
- # print one line per condition check.
- # Adding a tight loop here mitigates the effect,
- # ensuring that most of the output directly from
- # setup.sh is sent to tee_output before the outer
- # loop exits and prints things like "setup.sh exited"
- #
- for i in xrange(10):
- try:
- line = nbsr.readline(0.05)
- if line:
- tee_output(prefix, line)
- # Reset Travis-CI timer
- travis_timeout = datetime.now() + timedelta(minutes = 5)
- except setup_util.EndOfStream:
- tee_output(prefix, "Setup has terminated\n")
- break
- time_remaining = timeout - datetime.now()
- if (travis_timeout - datetime.now()).total_seconds() < 0:
- sys.stdout.write(prefix + 'Printing so Travis-CI does not time out\n')
- sys.stdout.write(prefix + "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
- p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
- sys.stdout.flush()
- travis_timeout = datetime.now() + timedelta(minutes = 5)
- # Did we time out?
- if time_remaining.total_seconds() < 0:
- tee_output(prefix, "%s.sh timed out!! Aborting...\n" % self.setup_file)
- p.kill()
- return 1
- # What's our return code?
- # If setup.sh has terminated, use that code
- # Otherwise, detect if the port was bound
- tee_output(prefix, "Status: Poll: %s, Port %s bound: %s, Time Left: %s\n" % (
- p.poll(), self.port, self.benchmarker.is_port_bound(self.port), time_remaining))
- retcode = (p.poll() if p.poll() is not None else 0 if self.benchmarker.is_port_bound(self.port) else 1)
- if p.poll() is not None:
- tee_output(prefix, "%s.sh process exited naturally with %s\n" % (self.setup_file, p.poll()))
- elif self.benchmarker.is_port_bound(self.port):
- tee_output(prefix, "Bound port detected on %s\n" % self.port)
- # Before we return control to the benchmarker, spin up a
- # thread to keep an eye on the pipes in case the running
- # framework uses stdout/stderr. Once all processes accessing
- # the subprocess.PIPEs are dead, this thread will terminate.
- # Use a different prefix to indicate this is the framework
- # speaking
- prefix = "Server %s: " % self.name
- def watch_child_pipes(nbsr, prefix):
- while True:
- try:
- line = nbsr.readline(60)
- if line:
- tee_output(prefix, line)
- except setup_util.EndOfStream:
- tee_output(prefix, "Framework processes have terminated\n")
- return
- watch_thread = Thread(target = watch_child_pipes,
- args = (nbsr, prefix))
- watch_thread.daemon = True
- watch_thread.start()
- logging.info("Executed %s.sh, returning %s", self.setup_file, retcode)
- os.chdir(previousDir)
- return retcode
- ############################################################
- # End start
- ############################################################
- ############################################################
- # verify_urls
- # Verifys each of the URLs for this test. THis will sinply
- # curl the URL and check for it's return status.
- # For each url, a flag will be set on this object for whether
- # or not it passed
- # Returns True if all verifications succeeded
- ############################################################
- def verify_urls(self, verificationPath):
- result = True
-
- def verify_type(test_type):
- with open(os.path.join(verificationPath, (test_type + '.txt')), 'w') as verification:
- test = self.runTests[test_type]
- test.setup_out(verification)
- verification.write(header("VERIFYING %s" % test_type.upper()))
- base_url = "http://%s:%s" % (self.benchmarker.server_host, self.port)
- try:
- results = test.verify(base_url)
- except ConnectionError as e:
- results = [('fail',"Server did not respond to request", base_url)]
- logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
- except Exception as e:
- results = [('fail',"""Caused Exception in TFB
- This almost certainly means your return value is incorrect,
- but also that you have found a bug. Please submit an issue
- including this message: %s\n%s""" % (e, traceback.format_exc()),
- base_url)]
- logging.warning("Verifying test %s for %s caused an exception: %s", test_type, self.name, e)
- traceback.format_exc()
- test.failed = any(result == 'fail' for (result, reason, url) in results)
- test.warned = any(result == 'warn' for (result, reason, url) in results)
- test.passed = all(result == 'pass' for (result, reason, url) in results)
- def output_result(result, reason, url):
- specific_rules_url = "http://frameworkbenchmarks.readthedocs.org/en/latest/Project-Information/Framework-Tests/#specific-test-requirements"
- color = Fore.GREEN
- if result.upper() == "WARN":
- color = Fore.YELLOW
- elif result.upper() == "FAIL":
- color = Fore.RED
- verification.write((" " + color + "%s" + Style.RESET_ALL + " for %s\n") % (result.upper(), url))
- print (" " + color + "%s" + Style.RESET_ALL + " for %s\n") % (result.upper(), url)
- if reason is not None and len(reason) != 0:
- for line in reason.splitlines():
- verification.write(" " + line + '\n')
- print " " + line
- if not test.passed:
- verification.write(" See %s\n" % specific_rules_url)
- print " See %s\n" % specific_rules_url
- [output_result(r1,r2,url) for (r1, r2, url) in results]
- if test.failed:
- self.benchmarker.report_verify_results(self, test_type, 'fail')
- elif test.warned:
- self.benchmarker.report_verify_results(self, test_type, 'warn')
- elif test.passed:
- self.benchmarker.report_verify_results(self, test_type, 'pass')
- else:
- raise Exception("Unknown error - test did not pass,warn,or fail")
- verification.flush()
- result = True
- for test_type in self.runTests:
- verify_type(test_type)
- if self.runTests[test_type].failed:
- result = False
-
- return result
- ############################################################
- # End verify_urls
- ############################################################
- ############################################################
- # benchmark
- # Runs the benchmark for each type of test that it implements
- # JSON/DB/Query.
- ############################################################
- def benchmark(self, benchmarkPath):
- def benchmark_type(test_type):
- with open(os.path.join(benchmarkPath, (test_type + '.txt')), 'w') as out:
- out.write("BENCHMARKING %s ... " % test_type.upper())
- test = self.runTests[test_type]
- test.setup_out(out)
- output_file = self.benchmarker.output_file(self.name, test_type)
- if not os.path.exists(output_file):
- # Open to create the empty file
- with open(output_file, 'w'):
- pass
- if not test.failed:
- if test_type == 'plaintext': # One special case
- remote_script = self.__generate_pipeline_script(test.get_url(), self.port, test.accept_header)
- elif test_type == 'query' or test_type == 'update':
- remote_script = self.__generate_query_script(test.get_url(), self.port, test.accept_header)
- else:
- remote_script = self.__generate_concurrency_script(test.get_url(), self.port, test.accept_header)
- # Begin resource usage metrics collection
- self.__begin_logging(test_type)
- # Run the benchmark
- with open(output_file, 'w') as raw_file:
- p = subprocess.Popen(self.benchmarker.client_ssh_string.split(" "), stdin=subprocess.PIPE, stdout=raw_file, stderr=raw_file)
- p.communicate(remote_script)
- out.flush()
- # End resource usage metrics collection
- self.__end_logging()
- results = self.__parse_test(test_type)
- print "Benchmark results:"
- pprint(results)
- self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
- out.write( "Complete\n" )
- out.flush()
-
- for test_type in self.runTests:
- benchmark_type(test_type)
- ############################################################
- # End benchmark
- ############################################################
-
- ############################################################
- # parse_all
- # Method meant to be run for a given timestamp
- ############################################################
- def parse_all(self):
- for test_type in self.runTests:
- if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
- results = self.__parse_test(test_type)
- self.benchmarker.report_benchmark_results(framework=self, test=test_type, results=results['results'])
- ##########################################################################################
- # Private Methods
- ##########################################################################################
- ############################################################
- # __parse_test(test_type)
- ############################################################
- def __parse_test(self, test_type):
- try:
- results = dict()
- results['results'] = []
- stats = []
-
- if os.path.exists(self.benchmarker.get_output_file(self.name, test_type)):
- with open(self.benchmarker.output_file(self.name, test_type)) as raw_data:
- is_warmup = True
- rawData = None
- for line in raw_data:
- if "Queries:" in line or "Concurrency:" in line:
- is_warmup = False
- rawData = None
- continue
- if "Warmup" in line or "Primer" in line:
- is_warmup = True
- continue
- if not is_warmup:
- if rawData == None:
- rawData = dict()
- results['results'].append(rawData)
- #if "Requests/sec:" in line:
- # m = re.search("Requests/sec:\s+([0-9]+)", line)
- # rawData['reportedResults'] = m.group(1)
-
- # search for weighttp data such as succeeded and failed.
- if "Latency" in line:
- m = re.findall("([0-9]+\.*[0-9]*[us|ms|s|m|%]+)", line)
- if len(m) == 4:
- rawData['latencyAvg'] = m[0]
- rawData['latencyStdev'] = m[1]
- rawData['latencyMax'] = m[2]
- # rawData['latencyStdevPercent'] = m[3]
-
- #if "Req/Sec" in line:
- # m = re.findall("([0-9]+\.*[0-9]*[k|%]*)", line)
- # if len(m) == 4:
- # rawData['requestsAvg'] = m[0]
- # rawData['requestsStdev'] = m[1]
- # rawData['requestsMax'] = m[2]
- # rawData['requestsStdevPercent'] = m[3]
-
- #if "requests in" in line:
- # m = re.search("requests in ([0-9]+\.*[0-9]*[ms|s|m|h]+)", line)
- # if m != None:
- # # parse out the raw time, which may be in minutes or seconds
- # raw_time = m.group(1)
- # if "ms" in raw_time:
- # rawData['total_time'] = float(raw_time[:len(raw_time)-2]) / 1000.0
- # elif "s" in raw_time:
- # rawData['total_time'] = float(raw_time[:len(raw_time)-1])
- # elif "m" in raw_time:
- # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 60.0
- # elif "h" in raw_time:
- # rawData['total_time'] = float(raw_time[:len(raw_time)-1]) * 3600.0
-
- if "requests in" in line:
- m = re.search("([0-9]+) requests in", line)
- if m != None:
- rawData['totalRequests'] = int(m.group(1))
-
- if "Socket errors" in line:
- if "connect" in line:
- m = re.search("connect ([0-9]+)", line)
- rawData['connect'] = int(m.group(1))
- if "read" in line:
- m = re.search("read ([0-9]+)", line)
- rawData['read'] = int(m.group(1))
- if "write" in line:
- m = re.search("write ([0-9]+)", line)
- rawData['write'] = int(m.group(1))
- if "timeout" in line:
- m = re.search("timeout ([0-9]+)", line)
- rawData['timeout'] = int(m.group(1))
-
- if "Non-2xx" in line:
- m = re.search("Non-2xx or 3xx responses: ([0-9]+)", line)
- if m != None:
- rawData['5xx'] = int(m.group(1))
- if "STARTTIME" in line:
- m = re.search("[0-9]+", line)
- rawData["startTime"] = int(m.group(0))
- if "ENDTIME" in line:
- m = re.search("[0-9]+", line)
- rawData["endTime"] = int(m.group(0))
- test_stats = self.__parse_stats(test_type, rawData["startTime"], rawData["endTime"], 1)
- # rawData["averageStats"] = self.__calculate_average_stats(test_stats)
- stats.append(test_stats)
- with open(self.benchmarker.stats_file(self.name, test_type) + ".json", "w") as stats_file:
- json.dump(stats, stats_file, indent=2)
- return results
- except IOError:
- return None
- ############################################################
- # End benchmark
- ############################################################
- ############################################################
- # __generate_concurrency_script(url, port)
- # Generates the string containing the bash script that will
- # be run on the client to benchmark a single test. This
- # specifically works for the variable concurrency tests (JSON
- # and DB)
- ############################################################
- def __generate_concurrency_script(self, url, port, accept_header, wrk_command="wrk"):
- headers = self.headers_template.format(accept=accept_header)
- return self.concurrency_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
- max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
- levels=" ".join("{}".format(item) for item in self.benchmarker.concurrency_levels),
- server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command)
- ############################################################
- # __generate_pipeline_script(url, port)
- # Generates the string containing the bash script that will
- # be run on the client to benchmark a single pipeline test.
- ############################################################
- def __generate_pipeline_script(self, url, port, accept_header, wrk_command="wrk"):
- headers = self.headers_template.format(accept=accept_header)
- return self.pipeline_template.format(max_concurrency=16384,
- max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
- levels=" ".join("{}".format(item) for item in [256,1024,4096,16384]),
- server_host=self.benchmarker.server_host, port=port, url=url, headers=headers, wrk=wrk_command,
- pipeline=16)
- ############################################################
- # __generate_query_script(url, port)
- # Generates the string containing the bash script that will
- # be run on the client to benchmark a single test. This
- # specifically works for the variable query tests (Query)
- ############################################################
- def __generate_query_script(self, url, port, accept_header):
- headers = self.headers_template.format(accept=accept_header)
- return self.query_template.format(max_concurrency=max(self.benchmarker.concurrency_levels),
- max_threads=self.benchmarker.threads, name=self.name, duration=self.benchmarker.duration,
- levels=" ".join("{}".format(item) for item in self.benchmarker.query_levels),
- server_host=self.benchmarker.server_host, port=port, url=url, headers=headers)
- ############################################################
- # Returns True if any test type this this framework test will use a DB
- ############################################################
- def requires_database(self):
- '''Returns True/False if this test requires a database'''
- return any(tobj.requires_db for (ttype,tobj) in self.runTests.iteritems())
- ############################################################
- # __begin_logging
- # Starts a thread to monitor the resource usage, to be synced with the client's time
- # TODO: MySQL and InnoDB are possible. Figure out how to implement them.
- ############################################################
- def __begin_logging(self, test_type):
- output_file = "{file_name}".format(file_name=self.benchmarker.get_stats_file(self.name, test_type))
- dstat_string = "dstat -afilmprsT --aio --fs --ipc --lock --raw --socket --tcp \
- --raw --socket --tcp --udp --unix --vm --disk-util \
- --rpc --rpcd --output {output_file}".format(output_file=output_file)
- cmd = shlex.split(dstat_string)
- dev_null = open(os.devnull, "w")
- self.subprocess_handle = subprocess.Popen(cmd, stdout=dev_null)
- ##############################################################
- # Begin __end_logging
- # Stops the logger thread and blocks until shutdown is complete.
- ##############################################################
- def __end_logging(self):
- self.subprocess_handle.terminate()
- self.subprocess_handle.communicate()
- ##############################################################
- # Begin __parse_stats
- # For each test type, process all the statistics, and return a multi-layered dictionary
- # that has a structure as follows:
- # (timestamp)
- # | (main header) - group that the stat is in
- # | | (sub header) - title of the stat
- # | | | (stat) - the stat itself, usually a floating point number
- ##############################################################
- def __parse_stats(self, test_type, start_time, end_time, interval):
- stats_dict = dict()
- stats_file = self.benchmarker.stats_file(self.name, test_type)
- with open(stats_file) as stats:
- while(stats.next() != "\n"): # dstat doesn't output a completely compliant CSV file - we need to strip the header
- pass
- stats_reader = csv.reader(stats)
- main_header = stats_reader.next()
- sub_header = stats_reader.next()
- time_row = sub_header.index("epoch")
- int_counter = 0
- for row in stats_reader:
- time = float(row[time_row])
- int_counter+=1
- if time < start_time:
- continue
- elif time > end_time:
- return stats_dict
- if int_counter % interval != 0:
- continue
- row_dict = dict()
- for nextheader in main_header:
- if nextheader != "":
- row_dict[nextheader] = dict()
- header = ""
- for item_num, column in enumerate(row):
- if(len(main_header[item_num]) != 0):
- header = main_header[item_num]
- row_dict[header][sub_header[item_num]] = float(column) # all the stats are numbers, so we want to make sure that they stay that way in json
- stats_dict[time] = row_dict
- return stats_dict
- ##############################################################
- # End __parse_stats
- ##############################################################
- def __getattr__(self, name):
- """For backwards compatibility, we used to pass benchmarker
- as the argument to the setup.sh files"""
- try:
- x = getattr(self.benchmarker, name)
- except AttributeError:
- print "AttributeError: %s not a member of FrameworkTest or Benchmarker" % name
- print "This is probably a bug"
- raise
- return x
- ##############################################################
- # Begin __calculate_average_stats
- # We have a large amount of raw data for the statistics that
- # may be useful for the stats nerds, but most people care about
- # a couple of numbers. For now, we're only going to supply:
- # * Average CPU
- # * Average Memory
- # * Total network use
- # * Total disk use
- # More may be added in the future. If they are, please update
- # the above list.
- # Note: raw_stats is directly from the __parse_stats method.
- # Recall that this consists of a dictionary of timestamps,
- # each of which contain a dictionary of stat categories which
- # contain a dictionary of stats
- ##############################################################
- def __calculate_average_stats(self, raw_stats):
- raw_stat_collection = dict()
-
- for timestamp, time_dict in raw_stats.items():
- for main_header, sub_headers in time_dict.items():
- item_to_append = None
- if 'cpu' in main_header:
- # We want to take the idl stat and subtract it from 100
- # to get the time that the CPU is NOT idle.
- item_to_append = sub_headers['idl'] - 100.0
- elif main_header == 'memory usage':
- item_to_append = sub_headers['used']
- elif 'net' in main_header:
- # Network stats have two parts - recieve and send. We'll use a tuple of
- # style (recieve, send)
- item_to_append = (sub_headers['recv'], sub_headers['send'])
- elif 'dsk' or 'io' in main_header:
- # Similar for network, except our tuple looks like (read, write)
- item_to_append = (sub_headers['read'], sub_headers['writ'])
- if item_to_append is not None:
- if main_header not in raw_stat_collection:
- raw_stat_collection[main_header] = list()
- raw_stat_collection[main_header].append(item_to_append)
- # Simple function to determine human readable size
- # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
- def sizeof_fmt(num):
- # We'll assume that any number we get is convertable to a float, just in case
- num = float(num)
- for x in ['bytes','KB','MB','GB']:
- if num < 1024.0 and num > -1024.0:
- return "%3.1f%s" % (num, x)
- num /= 1024.0
- return "%3.1f%s" % (num, 'TB')
- # Now we have our raw stats in a readable format - we need to format it for display
- # We need a floating point sum, so the built in sum doesn't cut it
- display_stat_collection = dict()
- for header, values in raw_stat_collection.items():
- display_stat = None
- if 'cpu' in header:
- display_stat = sizeof_fmt(math.fsum(values) / len(values))
- elif main_header == 'memory usage':
- display_stat = sizeof_fmt(math.fsum(values) / len(values))
- elif 'net' in main_header:
- receive, send = zip(*values) # unzip
- display_stat = {'receive': sizeof_fmt(math.fsum(receive)), 'send': sizeof_fmt(math.fsum(send))}
- else: # if 'dsk' or 'io' in header:
- read, write = zip(*values) # unzip
- display_stat = {'read': sizeof_fmt(math.fsum(read)), 'write': sizeof_fmt(math.fsum(write))}
- display_stat_collection[header] = display_stat
- return display_stat
- ###########################################################################################
- # End __calculate_average_stats
- #########################################################################################
-
- ##########################################################################################
- # Constructor
- ##########################################################################################
- def __init__(self, name, directory, benchmarker, runTests, args):
- self.name = name
- self.directory = directory
- self.benchmarker = benchmarker
- self.runTests = runTests
- self.fwroot = benchmarker.fwroot
- self.approach = ""
- self.classification = ""
- self.database = ""
- self.framework = ""
- self.language = ""
- self.orm = ""
- self.platform = ""
- self.webserver = ""
- self.os = ""
- self.database_os = ""
- self.display_name = ""
- self.notes = ""
- self.versus = ""
-
- # setup logging
- logging.basicConfig(stream=sys.stderr, level=logging.INFO)
-
- self.install_root="%s/%s" % (self.fwroot, "installs")
- if benchmarker.install_strategy is 'pertest':
- self.install_root="%s/pertest/%s" % (self.install_root, name)
- # Used in setup.sh scripts for consistency with
- # the bash environment variables
- self.troot = self.directory
- self.iroot = self.install_root
- self.__dict__.update(args)
- ############################################################
- # End __init__
- ############################################################
- ############################################################
- # End FrameworkTest
- ############################################################
- # Static methods
- def test_order(type_name):
- """
- This sort ordering is set up specifically to return the length
- of the test name. There were SO many problems involved with
- 'plaintext' being run first (rather, just not last) that we
- needed to ensure that it was run last for every framework.
- """
- return len(type_name)
- def validate_urls(test_name, test_keys):
- """
- Separated from validate_test because urls are not required anywhere. We know a url is incorrect if it is
- empty or does not start with a "/" character. There is no validation done to ensure the url conforms to
- the suggested url specifications, although those suggestions are presented if a url fails validation here.
- """
- example_urls = {
- "json_url": "/json",
- "db_url": "/mysql/db",
- "query_url": "/mysql/queries?queries= or /mysql/queries/",
- "fortune_url": "/mysql/fortunes",
- "update_url": "/mysql/updates?queries= or /mysql/updates/",
- "plaintext_url": "/plaintext"
- }
- for test_url in ["json_url","db_url","query_url","fortune_url","update_url","plaintext_url"]:
- key_value = test_keys.get(test_url, None)
- if key_value != None and not key_value.startswith('/'):
- errmsg = """`%s` field in test \"%s\" does not appear to be a valid url: \"%s\"\n
- Example `%s` url: \"%s\"
- """ % (test_url, test_name, key_value, test_url, example_urls[test_url])
- raise Exception(errmsg)
-
- def validate_test(test_name, test_keys, directory):
- """
- Validate benchmark config values for this test based on a schema
- """
- # Ensure that each FrameworkTest has a framework property, inheriting from top-level if not
- if not test_keys['framework']:
- test_keys['framework'] = config['framework']
- recommended_lang = directory.split('/')[-2]
- windows_url = "https://github.com/TechEmpower/FrameworkBenchmarks/milestones/Windows%%20Compatibility"
- schema = {
- 'language': {
- 'help': ('language', 'The language of the framework used, suggestion: %s' % recommended_lang)
- },
- 'webserver': {
- 'help': ('webserver', 'Name of the webserver also referred to as the "front-end server"')
- },
- 'classification': {
- 'allowed': [
- ('Fullstack', '...'),
- ('Micro', '...'),
- ('Platform', '...')
- ]
- },
- 'database': {
- 'allowed': [
- ('MySQL', 'One of the most popular databases around the web and in TFB'),
- ('Postgres', 'An advanced SQL database with a larger feature set than MySQL'),
- ('MongoDB', 'A popular document-store database'),
- ('Cassandra', 'A highly performant and scalable NoSQL database'),
- ('Elasticsearch', 'A distributed RESTful search engine that is used as a database for TFB tests'),
- ('Redis', 'An open-sourced, BSD licensed, advanced key-value cache and store'),
- ('SQLite', 'A network-less database, still supported for backwards compatibility'),
- ('SQLServer', 'Microsoft\'s SQL implementation'),
- ('None', 'No database was used for these tests, as is the case with Json Serialization and Plaintext')
- ]
- },
- 'approach': {
- 'allowed': [
- ('Realistic', '...'),
- ('Stripped', '...')
- ]
- },
- 'orm': {
- 'allowed': [
- ('Full', 'Has a full suite of features like lazy loading, caching, multiple language support, sometimes pre-configured with scripts.'),
- ('Micro', 'Has basic database driver capabilities such as establishing a connection and sending queries.'),
- ('Raw', 'Tests that do not use an ORM will be classified as "raw" meaning they use the platform\'s raw database connectivity.')
- ]
- },
- 'platform': {
- 'help': ('platform', 'Name of the platform this framework runs on, e.g. Node.js, Pypy, hhvm, JRuby ...')
- },
- 'framework': {
- # Guranteed to be here and correct at this point
- # key is left here to produce the set of required keys
- },
- 'os': {
- 'allowed': [
- ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
- ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
- ]
- },
- 'database_os': {
- 'allowed': [
- ('Linux', 'Our best-supported host OS, it is recommended that you build your tests for Linux hosts'),
- ('Windows', 'TFB is not fully-compatible on windows, contribute towards our work on compatibility: %s' % windows_url)
- ]
- }
- }
- # Confirm required keys are present
- required_keys = schema.keys()
- missing = list(set(required_keys) - set(test_keys))
- if len(missing) > 0:
- missingstr = (", ").join(map(str, missing))
- raise Exception("benchmark_config.json for test %s is invalid, please amend by adding the following required keys: [%s]"
- % (test_name, missingstr))
- # Check the (all optional) test urls
- validate_urls(test_name, test_keys)
- # Check values of keys against schema
- for key in required_keys:
- val = test_keys.get(key, "").lower()
- has_predefined_acceptables = 'allowed' in schema[key]
- if has_predefined_acceptables:
- allowed = schema[key].get('allowed', [])
- acceptable_values, descriptors = zip(*allowed)
- acceptable_values = [a.lower() for a in acceptable_values]
-
- if val not in acceptable_values:
- msg = ("Invalid `%s` value specified for test \"%s\" in framework \"%s\"; suggestions:\n"
- % (key, test_name, test_keys['framework']))
- helpinfo = ('\n').join([" `%s` -- %s" % (v, desc) for (v, desc) in zip(acceptable_values, descriptors)])
- fullerr = msg + helpinfo + "\n"
- raise Exception(fullerr)
-
- elif not has_predefined_acceptables and val == "":
- msg = ("Value for `%s` in test \"%s\" in framework \"%s\" was missing:\n"
- % (key, test_name, test_keys['framework']))
- helpinfo = " %s -- %s" % schema[key]['help']
- fullerr = msg + helpinfo + '\n'
- raise Exception(fullerr)
- def parse_config(config, directory, benchmarker):
- """
- Parses a config file into a list of FrameworkTest objects
- """
- tests = []
- # The config object can specify multiple tests
- # Loop over them and parse each into a FrameworkTest
- for test in config['tests']:
- tests_to_run = [name for (name,keys) in test.iteritems()]
- if "default" not in tests_to_run:
- logging.warn("Framework %s does not define a default test in benchmark_config.json", config['framework'])
-
- # Check that each test configuration is acceptable
- # Throw exceptions if a field is missing, or how to improve the field
- for test_name, test_keys in test.iteritems():
- # Validates the benchmark_config entry
- validate_test(test_name, test_keys, directory)
-
- # Map test type to a parsed FrameworkTestType object
- runTests = dict()
- for type_name, type_obj in benchmarker.types.iteritems():
- try:
- # Makes a FrameWorkTestType object using some of the keys in config
- # e.g. JsonTestType uses "json_url"
- runTests[type_name] = type_obj.copy().parse(test_keys)
- except AttributeError as ae:
- # This is quite common - most tests don't support all types
- # Quitely log it and move on (debug logging is on in travis and this causes
- # ~1500 lines of debug, so I'm totally ignoring it for now
- # logging.debug("Missing arguments for test type %s for framework test %s", type_name, test_name)
- pass
- # We need to sort by test_type to run
- sortedTestKeys = sorted(runTests.keys(), key=test_order)
- sortedRunTests = OrderedDict()
- for sortedTestKey in sortedTestKeys:
- sortedRunTests[sortedTestKey] = runTests[sortedTestKey]
- # Prefix all test names with framework except 'default' test
- # Done at the end so we may still refer to the primary test as `default` in benchmark config error messages
- if test_name == 'default':
- test_name = config['framework']
- else:
- test_name = "%s-%s" % (config['framework'], test_name)
- # By passing the entire set of keys, each FrameworkTest will have a member for each key
- tests.append(FrameworkTest(test_name, directory, benchmarker, sortedRunTests, test_keys))
- return tests
|