DEPRECATED-test-zstd-speed.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. #! /usr/bin/env python3
  2. # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py
  3. # ################################################################
  4. # Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  5. # All rights reserved.
  6. #
  7. # This source code is licensed under both the BSD-style license (found in the
  8. # LICENSE file in the root directory of this source tree) and the GPLv2 (found
  9. # in the COPYING file in the root directory of this source tree).
  10. # You may select, at your option, one of the above-listed licenses.
  11. # ##########################################################################
  12. # Limitations:
  13. # - doesn't support filenames with spaces
  14. # - dir1/zstd and dir2/zstd will be merged in a single results file
  15. import argparse
  16. import os # getloadavg
  17. import string
  18. import subprocess
  19. import time # strftime
  20. import traceback
  21. import hashlib
  22. import platform # system
  23. script_version = 'v1.1.2 (2017-03-26)'
  24. default_repo_url = 'https://github.com/facebook/zstd.git'
  25. working_dir_name = 'speedTest'
  26. working_path = os.getcwd() + '/' + working_dir_name # /path/to/zstd/tests/speedTest
  27. clone_path = working_path + '/' + 'zstd' # /path/to/zstd/tests/speedTest/zstd
  28. email_header = 'ZSTD_speedTest'
  29. pid = str(os.getpid())
  30. verbose = False
  31. clang_version = "unknown"
  32. gcc_version = "unknown"
  33. args = None
  34. def hashfile(hasher, fname, blocksize=65536):
  35. with open(fname, "rb") as f:
  36. for chunk in iter(lambda: f.read(blocksize), b""):
  37. hasher.update(chunk)
  38. return hasher.hexdigest()
  39. def log(text):
  40. print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
  41. def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
  42. if print_command:
  43. log("> " + command)
  44. popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
  45. stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
  46. stderr_lines = stderr_lines.decode("utf-8")
  47. stdout_lines = stdout_lines.decode("utf-8")
  48. if print_output:
  49. if stdout_lines:
  50. print(stdout_lines)
  51. if stderr_lines:
  52. print(stderr_lines)
  53. if popen.returncode is not None and popen.returncode != 0:
  54. if stderr_lines and not print_output and print_error:
  55. print(stderr_lines)
  56. raise RuntimeError(stdout_lines + stderr_lines)
  57. return (stdout_lines + stderr_lines).splitlines()
  58. execute.cwd = None
  59. def does_command_exist(command):
  60. try:
  61. execute(command, verbose, False, False)
  62. except Exception:
  63. return False
  64. return True
  65. def send_email(emails, topic, text, have_mutt, have_mail):
  66. logFileName = working_path + '/' + 'tmpEmailContent'
  67. with open(logFileName, "w") as myfile:
  68. myfile.writelines(text)
  69. myfile.close()
  70. if have_mutt:
  71. execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
  72. elif have_mail:
  73. execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
  74. else:
  75. log("e-mail cannot be sent (mail or mutt not found)")
  76. def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
  77. logFileName, have_mutt, have_mail):
  78. with open(logFileName, "w") as myfile:
  79. myfile.writelines(text)
  80. myfile.close()
  81. email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
  82. % (email_header, pid, branch, commit, last_commit,
  83. args.lowerLimit, args.ratioLimit)
  84. if have_mutt:
  85. execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
  86. + ' < ' + logFileName)
  87. elif have_mail:
  88. execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
  89. else:
  90. log("e-mail cannot be sent (mail or mutt not found)")
  91. def git_get_branches():
  92. execute('git fetch -p', verbose)
  93. branches = execute('git branch -rl', verbose)
  94. output = []
  95. for line in branches:
  96. if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
  97. output.append(line.strip())
  98. return output
  99. def git_get_changes(branch, commit, last_commit):
  100. fmt = '--format="%h: (%an) %s, %ar"'
  101. if last_commit is None:
  102. commits = execute('git log -n 10 %s %s' % (fmt, commit))
  103. else:
  104. commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit))
  105. return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits)
  106. def get_last_results(resultsFileName):
  107. if not os.path.isfile(resultsFileName):
  108. return None, None, None, None
  109. commit = None
  110. csize = []
  111. cspeed = []
  112. dspeed = []
  113. with open(resultsFileName, 'r') as f:
  114. for line in f:
  115. words = line.split()
  116. if len(words) <= 4: # branch + commit + compilerVer + md5
  117. commit = words[1]
  118. csize = []
  119. cspeed = []
  120. dspeed = []
  121. if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files"
  122. csize.append(int(words[1]))
  123. cspeed.append(float(words[3]))
  124. dspeed.append(float(words[5]))
  125. return commit, csize, cspeed, dspeed
  126. def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName,
  127. testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
  128. sleepTime = 30
  129. while os.getloadavg()[0] > args.maxLoadAvg:
  130. log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
  131. % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
  132. time.sleep(sleepTime)
  133. start_load = str(os.getloadavg())
  134. osType = platform.system()
  135. if osType == 'Linux':
  136. cpuSelector = "taskset --cpu-list 0"
  137. else:
  138. cpuSelector = ""
  139. if args.dictionary:
  140. result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
  141. else:
  142. result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True)
  143. end_load = str(os.getloadavg())
  144. linesExpected = args.lastCLevel + 1
  145. if len(result) != linesExpected:
  146. raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
  147. with open(resultsFileName, "a") as myfile:
  148. myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum))
  149. myfile.write('\n'.join(result) + '\n')
  150. myfile.close()
  151. if (last_cspeed == None):
  152. log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
  153. return ""
  154. commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
  155. text = ""
  156. for i in range(0, min(len(cspeed), len(last_cspeed))):
  157. print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
  158. if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
  159. text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
  160. if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
  161. text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
  162. if (float(last_csize[i])/csize[i] < args.ratioLimit):
  163. text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
  164. if text:
  165. text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text
  166. return text
  167. def update_config_file(branch, commit):
  168. last_commit = None
  169. commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
  170. if os.path.isfile(commitFileName):
  171. with open(commitFileName, 'r') as infile:
  172. last_commit = infile.read()
  173. with open(commitFileName, 'w') as outfile:
  174. outfile.write(commit)
  175. return last_commit
  176. def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName):
  177. last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
  178. if not args.dry_run:
  179. text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
  180. if text:
  181. log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
  182. text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
  183. return text
  184. def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
  185. local_branch = branch.split('/')[1]
  186. version = local_branch.rpartition('-')[2] + '_' + commit
  187. if not args.dry_run:
  188. execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version +
  189. 'mv programs/zstd programs/zstd_clang && ' +
  190. 'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version)
  191. md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd')
  192. md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32')
  193. md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang')
  194. print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang))
  195. print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version))
  196. logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
  197. text_to_send = []
  198. results_files = ""
  199. if args.dictionary:
  200. dictName = args.dictionary.rpartition('/')[2]
  201. else:
  202. dictName = None
  203. for filePath in testFilePaths:
  204. fileName = filePath.rpartition('/')[2]
  205. if dictName:
  206. resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
  207. else:
  208. resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
  209. text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
  210. if text:
  211. text_to_send.append(text)
  212. results_files += resultsFileName + " "
  213. resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
  214. text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
  215. if text:
  216. text_to_send.append(text)
  217. results_files += resultsFileName + " "
  218. resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
  219. text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName)
  220. if text:
  221. text_to_send.append(text)
  222. results_files += resultsFileName + " "
  223. if text_to_send:
  224. send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)
  225. if __name__ == '__main__':
  226. parser = argparse.ArgumentParser()
  227. parser.add_argument('testFileNames', help='file or directory names list for speed benchmark')
  228. parser.add_argument('emails', help='list of e-mail addresses to send warnings')
  229. parser.add_argument('--dictionary', '-D', help='path to the dictionary')
  230. parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="")
  231. parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
  232. parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98)
  233. parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999)
  234. parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
  235. parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
  236. parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
  237. parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
  238. parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
  239. parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
  240. args = parser.parse_args()
  241. verbose = args.verbose
  242. # check if test files are accessible
  243. testFileNames = args.testFileNames.split()
  244. testFilePaths = []
  245. for fileName in testFileNames:
  246. fileName = os.path.expanduser(fileName)
  247. if os.path.isfile(fileName) or os.path.isdir(fileName):
  248. testFilePaths.append(os.path.abspath(fileName))
  249. else:
  250. log("ERROR: File/directory not found: " + fileName)
  251. exit(1)
  252. # check if dictionary is accessible
  253. if args.dictionary:
  254. args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary))
  255. if not os.path.isfile(args.dictionary):
  256. log("ERROR: Dictionary not found: " + args.dictionary)
  257. exit(1)
  258. # check availability of e-mail senders
  259. have_mutt = does_command_exist("mutt -h")
  260. have_mail = does_command_exist("mail -V")
  261. if not have_mutt and not have_mail:
  262. log("ERROR: e-mail senders 'mail' or 'mutt' not found")
  263. exit(1)
  264. clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
  265. gcc_version = execute("gcc -dumpversion", verbose)[0];
  266. if verbose:
  267. print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
  268. print("working_path=%s" % working_path)
  269. print("clone_path=%s" % clone_path)
  270. print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
  271. print("message=%s" % args.message)
  272. print("emails=%s" % args.emails)
  273. print("dictionary=%s" % args.dictionary)
  274. print("maxLoadAvg=%s" % args.maxLoadAvg)
  275. print("lowerLimit=%s" % args.lowerLimit)
  276. print("ratioLimit=%s" % args.ratioLimit)
  277. print("lastCLevel=%s" % args.lastCLevel)
  278. print("sleepTime=%s" % args.sleepTime)
  279. print("timeout=%s" % args.timeout)
  280. print("dry_run=%s" % args.dry_run)
  281. print("verbose=%s" % args.verbose)
  282. print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
  283. # clone ZSTD repo if needed
  284. if not os.path.isdir(working_path):
  285. os.mkdir(working_path)
  286. if not os.path.isdir(clone_path):
  287. execute.cwd = working_path
  288. execute('git clone ' + args.repoURL)
  289. if not os.path.isdir(clone_path):
  290. log("ERROR: ZSTD clone not found: " + clone_path)
  291. exit(1)
  292. execute.cwd = clone_path
  293. # check if speedTest.pid already exists
  294. pidfile = "./speedTest.pid"
  295. if os.path.isfile(pidfile):
  296. log("ERROR: %s already exists, exiting" % pidfile)
  297. exit(1)
  298. send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
  299. with open(pidfile, 'w') as the_file:
  300. the_file.write(pid)
  301. branch = ""
  302. commit = ""
  303. first_time = True
  304. while True:
  305. try:
  306. if first_time:
  307. first_time = False
  308. else:
  309. time.sleep(args.sleepTime)
  310. loadavg = os.getloadavg()[0]
  311. if (loadavg <= args.maxLoadAvg):
  312. branches = git_get_branches()
  313. for branch in branches:
  314. commit = execute('git show -s --format=%h ' + branch, verbose)[0]
  315. last_commit = update_config_file(branch, commit)
  316. if commit == last_commit:
  317. log("skipping branch %s: head %s already processed" % (branch, commit))
  318. else:
  319. log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit))
  320. execute('git checkout -- . && git checkout ' + branch)
  321. print(git_get_changes(branch, commit, last_commit))
  322. test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
  323. else:
  324. log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
  325. if verbose:
  326. log("sleep for %s seconds" % args.sleepTime)
  327. except Exception as e:
  328. stack = traceback.format_exc()
  329. email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
  330. send_email(args.emails, email_topic, stack, have_mutt, have_mail)
  331. print(stack)
  332. except KeyboardInterrupt:
  333. os.unlink(pidfile)
  334. send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
  335. exit(0)