asset_load_benchmark_test.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. """
  2. Copyright (c) Contributors to the Open 3D Engine Project.
  3. For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. SPDX-License-Identifier: Apache-2.0 OR MIT
  5. """
  6. """
  7. This file generates automated asset load benchmark timings for different asset loading scenarios. It uses the launcher
  8. so that it can be run on any platform. The test works as follows:
  9. - Starts with a series of predefined "benchmarksettings" asset files that are used by the Asset Processor to generate
  10. "benchmark" files for loading
  11. - These get copied to the current project, where AssetProcessor will process them
  12. - After they're processed, the Launcher is launched and tries to load each set of benchmark assets N times in a row
  13. - Once that's done, the results are gathered and summarized into a CSV
  14. """
  15. import os
  16. import shutil
  17. import pytest
  18. import logging
  19. import re
  20. from statistics import mean, median
  21. from ctypes import *
  22. from ctypes.wintypes import *
  23. pytest.importorskip("ly_test_tools")
  24. import time as time
  25. import ly_test_tools.launchers.launcher_helper as launcher_helper
  26. import ly_remote_console.remote_console_commands as remote_console_commands
  27. from ly_remote_console.remote_console_commands import (
  28. send_command_and_expect_response as send_command_and_expect_response,
  29. )
  30. import ly_test_tools.environment.waiter as waiter
  31. from automatedtesting_shared.network_utils import check_for_listening_port
  32. from automatedtesting_shared.file_utils import delete_check
  33. remote_console_port = 4600
  34. remote_command_timeout = 20
  35. listener_timeout = 120
  36. logger = logging.getLogger(__name__)
  37. # Number of times to run each benchmark test.
  38. benchmark_trials = 5
  39. class Benchmark:
  40. """
  41. Data structure used to define the list of benchmark tests we want to run.
  42. :attr base_name: The base name of the .benchmarksettings test file. This is also used as
  43. a tag when logging.
  44. :attr num_copies: Number of copies to make of the .benchmarksettings test file; the
  45. generated .benchmark files will all get loaded separately in parallel.
  46. :attr load_times: The set of load times that have been captured.
  47. """
  48. def __init__(self, base_name, num_copies):
  49. self.base_name = base_name
  50. self.num_copies = num_copies
  51. self.load_times = []
  52. @pytest.mark.parametrize("project", ["AutomatedTesting"])
  53. @pytest.mark.parametrize("level", ["Simple"])
  54. class TestBenchmarkAssetLoads(object):
  55. @pytest.fixture
  56. def launcher_instance(self, request, workspace, level):
  57. self.launcher = launcher_helper.create_game_launcher(workspace)
  58. @pytest.fixture
  59. def remote_console_instance(self, request):
  60. console = remote_console_commands.RemoteConsole()
  61. def teardown():
  62. if console.connected:
  63. console.stop()
  64. request.addfinalizer(teardown)
  65. return console
  66. @pytest.fixture(autouse=True)
  67. def setup_teardown(self, request, workspace, project):
  68. def teardown():
  69. # Clean up in the following order:
  70. # 1) Kill the interactive AP
  71. # 2) Remove our source .benchmarksettings files that we put in the project
  72. # 3) Run APBatch to process the missing source files to clean up our generated cached files
  73. # By going in this order, we guarantee that we won't hit any file locks and we can
  74. # easily detect when AP is done cleaning everything up for us.
  75. workspace.asset_processor.stop()
  76. logger.debug('Cleaning up the benchmark asset artifacts.')
  77. if self.remove_benchmark_assets(self.get_temp_benchmark_asset_directory(workspace)):
  78. self.run_ap_batch(workspace.asset_processor)
  79. # On setup, make sure to clean up any benchmark assets that stuck around from a previous run.
  80. request.addfinalizer(teardown)
  81. logger.debug('Cleaning up any previously-existing benchmark asset artifacts.')
  82. if self.remove_benchmark_assets(self.get_temp_benchmark_asset_directory(workspace)):
  83. self.run_ap_batch(workspace.asset_processor)
  84. def run_ap_batch(self, asset_processor):
  85. """
  86. Run AssetProcessorBatch to process any changed assets.
  87. :param asset_processor: The asset processor object
  88. """
  89. process_assets_success = asset_processor.batch_process(timeout=600, fastscan=True)
  90. assert process_assets_success, 'Assets did not process correctly'
  91. def get_temp_benchmark_asset_directory(self, workspace):
  92. return os.path.join(workspace.paths.engine_root(), workspace.project, 'benchmark')
  93. def create_benchmark_assets(self, dest_directory, benchmarks):
  94. """
  95. Create a directory of benchmarksettings assets in the selected project.
  96. :param dest_direcotry: The path of the temporary directory to create the benchmark assets in
  97. :param benchmarks: The list of Benchmark objects to process and create
  98. """
  99. source_directory = os.path.join(os.path.dirname(__file__), 'assets')
  100. logger.debug(f'Creating directory: {dest_directory}')
  101. os.makedirs(dest_directory, exist_ok=True)
  102. # For each Benchmark object, either just copy the .benchmarksettings file as-is with the same name
  103. # if we only want one copy, or else append _<num> to the name for each copy if we want multiples.
  104. for benchmark in benchmarks:
  105. if benchmark.num_copies > 1:
  106. for asset_num in range (0, benchmark.num_copies):
  107. shutil.copy(os.path.join(source_directory, f'{benchmark.base_name}.benchmarksettings'),
  108. os.path.join(dest_directory, f'{benchmark.base_name}_{asset_num}.benchmarksettings'))
  109. else:
  110. shutil.copy(os.path.join(source_directory, f'{benchmark.base_name}.benchmarksettings'), dest_directory)
  111. def remove_benchmark_assets(self, dest_directory):
  112. """
  113. Remove the project/benchmark directory that we created.
  114. :param dest_directory: The path to the temp directory that we created the source benchmarks assets in
  115. :return True if assets were found and removed, False if they weren't
  116. """
  117. return delete_check(dest_directory)
  118. def set_cvar(self, remote_console_instance, cvar_name, cvar_value):
  119. """
  120. Helper function to set a CVar value and wait for confirmation.
  121. """
  122. send_command_and_expect_response(remote_console_instance, f"{cvar_name} {cvar_value}",
  123. f"{cvar_name} : {cvar_value}", timeout=remote_command_timeout)
  124. def run_benchmark(self, remote_console_instance, num_trials, benchmark):
  125. """
  126. Run a series of benchmark trials.
  127. :param remote_console_instance: The remote console instance
  128. :param num_trials: The number of times to run the same benchmark in a row
  129. :param benchmark: The Benchmark object describing the set of assets to try loading
  130. """
  131. # The root benchmark asset is the one with sub ID 1, so it has '_00000001' at the end of the base name.
  132. name_suffix='_00000001.benchmark'
  133. for trial in range (0, num_trials):
  134. # Set a unique log label to make it easier to parse for the results
  135. self.set_cvar(remote_console_instance, 'benchmarkLoadAssetLogLabel', f"{benchmark.base_name}_{trial}")
  136. # Clear the list of assets to load for the benchmark
  137. send_command_and_expect_response(remote_console_instance,
  138. f"benchmarkClearAssetList",
  139. f"({benchmark.base_name}_{trial}) - Benchmark asset list cleared.", timeout=remote_command_timeout)
  140. # Build up the load asset command.
  141. # Note that there is a limit to the number of parameters we can send in each console command, so we need to split it up
  142. # if we have too many assets.
  143. max_params_per_line = 50
  144. if benchmark.num_copies > 1:
  145. benchmark_command = f"benchmarkAddAssetsToList"
  146. for asset_num in range (0, benchmark.num_copies):
  147. benchmark_command += f" benchmark/{benchmark.base_name}_{asset_num}{name_suffix}"
  148. if (asset_num % max_params_per_line == (max_params_per_line - 1)) or (asset_num == benchmark.num_copies - 1):
  149. send_command_and_expect_response(remote_console_instance,
  150. benchmark_command,
  151. f"({benchmark.base_name}_{trial}) - All requested assets added.", timeout=remote_command_timeout)
  152. benchmark_command = f"benchmarkAddAssetsToList"
  153. else:
  154. send_command_and_expect_response(remote_console_instance,
  155. f"benchmarkAddAssetsToList benchmark/{benchmark.base_name}{name_suffix}",
  156. f"({benchmark.base_name}_{trial}) - All requested assets added.", timeout=remote_command_timeout)
  157. # Attempt to clear the Windows file cache between each run
  158. self.clear_windows_file_cache()
  159. # Send it and wait for the benchmark to complete
  160. send_command_and_expect_response(remote_console_instance,
  161. f"benchmarkLoadAssetList",
  162. f"({benchmark.base_name}_{trial}) - Benchmark run complete.", timeout=remote_command_timeout)
  163. def move_artifact(self, artifact_manager, artifact_name):
  164. """
  165. Move the artifact to our TestResults.
  166. """
  167. # Save the artifact to be examined later
  168. logger.debug('Preserving artifact: {}'.format(artifact_name))
  169. artifact_manager.save_artifact(artifact_name)
  170. os.remove(artifact_name)
  171. def extract_benchmark_results(self, launcher_log, extracted_log, benchmarks):
  172. """
  173. Extract the benchmark timing results, save them in a summary file, and put the timings into our benchmarks structure.
  174. :param launcher_log: the path to the game.log to parse
  175. :param extracted_log: the output path to save the summarized log
  176. :param benchmarks: the benchmarks structure to fill in with our timing results
  177. """
  178. logger.debug('Parsing benchmark results.')
  179. with open(extracted_log, 'w') as summary_log:
  180. with open(launcher_log) as log:
  181. for line in log:
  182. for benchmark in benchmarks:
  183. for trial in range (0, benchmark_trials):
  184. if f'({benchmark.base_name}_{trial}) - Results:' in line:
  185. # For each timing result we find, write it as-is into our summary log.
  186. summary_log.write(line)
  187. # Pull out the timing value and save it in the appropriate benchmark entry.
  188. match = re.search("Time=([\\d]+)", line)
  189. if match:
  190. benchmark.load_times.append(int(match.groups()[0]))
  191. def clear_windows_file_cache(self):
  192. """
  193. Clear the Windows file cache, using the undocumented NtSetSystemInformation call.
  194. For this to work, the process needs to be run with Administrator privileges.
  195. :return True if file cache cleared, False if not.
  196. """
  197. # Define the LUID Windows class
  198. class LUID(ctypes.Structure):
  199. _fields_ = [
  200. ('low_part', c_uint32),
  201. ('high_part', c_int32),
  202. ]
  203. # Define an abbreviated version of the TOKEN_PRIVILEGES Windows class.
  204. # The correct version would use an array of dynamic size for LUID_AND_ATTRIBUTES inside of itself,
  205. # but since we're only every using one entry, we can simplify the code and hard-code the definition at
  206. # a single entry.
  207. class TOKEN_PRIVILEGES(ctypes.Structure):
  208. _fields_ = [
  209. ('count', c_uint32),
  210. ('luid', LUID),
  211. ('attributes', c_uint32),
  212. ]
  213. # Force the construction to always fill in the array count with 1, since we've hard-coded a single
  214. # array definition into our structure definition.
  215. def __init__(self, luid, attributes):
  216. super(TOKEN_PRIVILEGES, self).__init__(1, luid, attributes)
  217. # Windows constants that we'll need
  218. TOKEN_QUERY = 0x0008
  219. TOKEN_ADJUST_PRIVILEGES = 0x0020
  220. SE_PRIVILEGE_ENABLED = 0x0002
  221. STATUS_PRIVILEGE_NOT_HELD = 0xc0000061
  222. ERROR_NOT_ALL_ASSIGNED = 1300
  223. SystemMemoryListInformation = 0x0050
  224. MemoryPurgeStandbyList = 4
  225. logger.info('Attempting to clear the Windows file cache')
  226. # Get the token for the currently-running process
  227. token_handle = HANDLE()
  228. result = windll.advapi32.OpenProcessToken(
  229. windll.kernel32.GetCurrentProcess(),
  230. TOKEN_QUERY | TOKEN_ADJUST_PRIVILEGES,
  231. byref(token_handle))
  232. if (result == 0):
  233. logger.error('Failed on the OpenProcessToken call')
  234. # Get the LUID for the privilege we need to escalate
  235. luid = LUID()
  236. if (result > 0):
  237. result = windll.advapi32.LookupPrivilegeValueW(None, 'SeProfileSingleProcessPrivilege', byref(luid))
  238. if (result == 0):
  239. logger.error('Failed on the LookupPrivilegeValueW call')
  240. # Adjust the privilege for the current process so that we have access to clear the Windows file cache.
  241. # This will only succeed if we're running with Administrator privileges.
  242. if (result > 0):
  243. tp = TOKEN_PRIVILEGES(luid, SE_PRIVILEGE_ENABLED)
  244. result = windll.advapi32.AdjustTokenPrivileges(token_handle, False, byref(tp), 0, None, None)
  245. if (result > 0):
  246. # The AdjustTokenPrivileges call itself succeeded, but we need to separately check to see if the privilege was escalated
  247. errCode = windll.kernel32.GetLastError()
  248. if (errCode == ERROR_NOT_ALL_ASSIGNED):
  249. # This is an acceptable error, but it means we won't be able to clear the file cache.
  250. # Don't fail the benchmark run due to it though.
  251. windll.kernel32.CloseHandle(token_handle)
  252. logger.info('Windows file cache will not be cleared, insufficient privileges.')
  253. logger.info('If you wish to clear the cache when running this benchmark, run with Administrator privileges.')
  254. return False
  255. elif (errCode > 0):
  256. # Flag that something unexpected happened.
  257. result = 0
  258. if (result == 0):
  259. logger.error('Failed on the AdjustTokenPrivileges call')
  260. # Privileges have been escalated, so clear the Windows file cache.
  261. if (result > 0):
  262. command = c_uint32(MemoryPurgeStandbyList)
  263. status = c_int32(windll.ntdll.NtSetSystemInformation(SystemMemoryListInformation, byref(command), sizeof(command) ))
  264. if (status.value == c_int32(STATUS_PRIVILEGE_NOT_HELD).value):
  265. # This is an acceptable error, but it means we won't be able to clear the file cache.
  266. # Don't fail the benchmark run due to it though.
  267. windll.kernel32.CloseHandle(token_handle)
  268. logger.info('Windows file cache will not be cleared, insufficient privileges.')
  269. logger.info('If you wish to clear the cache when running this benchmark, run with Administrator privileges.')
  270. return False
  271. elif (status.value < 0):
  272. # Flag that something unexpected happened.
  273. result = 0
  274. logger.error('Failed on the NtSetSystemInformation call')
  275. # Print the final results
  276. if (result > 0):
  277. logger.info('Successfully cleared the Windows file cache.')
  278. else:
  279. errCode = windll.kernel32.GetLastError()
  280. logger.error(f'Unexpected error while attempting to clear the Windows file cache: {errCode}')
  281. windll.kernel32.CloseHandle(token_handle)
  282. return (result > 0)
  283. def base_test_BenchmarkAssetLoads(self, workspace, launcher, level, remote_console_instance, use_dev_mode):
  284. benchmarks = [
  285. # The choice of 650 MB as the benchmark size is arbitrary. It was picked as large enough to get reasonably stable
  286. # and measureable results even when running on an SSD, and because the "balanced tree" test causes a total size
  287. # of (2^N + 1)*AssetSize. Working with that constraint, and a base asset size of 10 MB, the main contenders were
  288. # either 330 MB, 650 MB, or 1290 MB.
  289. # The first set of benchmarks measures the load time effects of different asset dependency hierarchies.
  290. # Load 650 MB from a single root 10MB asset where each asset has 2 dependent 10MB assets 6 levels deep
  291. Benchmark('10mb_2x6', 1),
  292. # Load 650 MB from a single root 10MB asset where each asset has 4 dependent 10MB assets 3 levels deep
  293. Benchmark('10mb_4x3', 1),
  294. # Load 650 MB from a single root 10MB asset that has 64 dependent 10MB assets
  295. Benchmark('10mb_64x1', 1),
  296. # Load 650 MB from a single root 10MB asset where each asset has 1 dependent 10MB asset 64 levels deep
  297. # (Currently removed because it crashes Open 3D Engine, re-enable once LY can handle it - SPEC-1314)
  298. #Benchmark('10mb_1x64', 1),
  299. # The second set of benchmarks measures the load time effects of different quantities of parallel asset loads.
  300. # Load a single 650 MB asset
  301. Benchmark('650mb', 1),
  302. # Load 2 x 325 MB assets in parallel
  303. Benchmark('325mb', 2),
  304. # Load 3 x 216 MB assets in parallel
  305. Benchmark('216mb', 3),
  306. # Load 4 x 162 MB assets in parallel
  307. Benchmark('162mb', 4),
  308. # Load 65 x 10 MB assets in parallel
  309. Benchmark('10mb', 65),
  310. # Load 650 x 1 MB assets in parallel
  311. Benchmark('1mb', 650),
  312. ]
  313. # Create test assets
  314. logger.debug('Generating new benchmark asset artifacts.')
  315. self.create_benchmark_assets(self.get_temp_benchmark_asset_directory(workspace), benchmarks)
  316. self.run_ap_batch(workspace.asset_processor)
  317. # Set the launcher arguments to choose either dev mode or optimized mode for the streamer.
  318. if (use_dev_mode):
  319. launcher.args = ['-NullRenderer', '-cl_streamerDevMode=true']
  320. else:
  321. launcher.args = ['-NullRenderer', '-cl_streamerDevMode=false']
  322. streamer_mode = 'devMode' if use_dev_mode else 'optMode'
  323. # Run the benchmarks in the launcher
  324. logger.debug(f'Running benchmark in launcher with args={launcher.args}')
  325. with launcher.start():
  326. waiter.wait_for(
  327. lambda: check_for_listening_port(remote_console_port),
  328. listener_timeout,
  329. exc=AssertionError("Port {} not listening.".format(remote_console_port)),
  330. )
  331. remote_console_instance.start(timeout=remote_command_timeout)
  332. # Set up the benchmarking parameters to use across all the tests
  333. self.set_cvar(remote_console_instance, "benchmarkLoadAssetTimeoutMs", "30000")
  334. self.set_cvar(remote_console_instance, "benchmarkLoadAssetPollMs", "20")
  335. self.set_cvar(remote_console_instance, "benchmarkLoadAssetDisplayProgress", "false")
  336. self.set_cvar(remote_console_instance, "cl_assetLoadWarningEnable", "false")
  337. # Run all our benchmarks
  338. for benchmark in benchmarks:
  339. self.run_benchmark(remote_console_instance, benchmark_trials, benchmark)
  340. # Give the launcher a chance to quit before forcibly stopping it.
  341. # This way our game.log will contain all the information we want instead of potentially truncating early.
  342. send_command_and_expect_response(remote_console_instance, f"quit", "Executing console command 'quit'",
  343. timeout=remote_command_timeout)
  344. launcher.stop()
  345. remote_console_instance.stop()
  346. # Extract all the benchmark timings out of the game.log, save it into a summary log file,
  347. # and put the timings into the benchmarks list so we can gather summarized metrics
  348. launcher_log = os.path.join(launcher.workspace.paths.project_log(), 'Game.log')
  349. summary_log = f'load_benchmark_{level}_{streamer_mode}.log'
  350. self.extract_benchmark_results(launcher_log, summary_log, benchmarks)
  351. self.move_artifact(workspace.artifact_manager, summary_log)
  352. # Generate a CSV of the min/max/avg results for each test.
  353. results_csv = f'load_benchmark_{level}_{streamer_mode}.csv'
  354. with open(results_csv, 'w') as csv_file:
  355. csv_file.write(f'"Test Name","Num Assets Loaded","First Time(ms)","Last Time(ms)","Max Time(ms)","Min Time (ms)",'+
  356. f'"Avg Time (ms)","Median Time (ms)"\n')
  357. for benchmark in benchmarks:
  358. csv_file.write(f'{benchmark.base_name},{benchmark.num_copies},{benchmark.load_times[0]},{benchmark.load_times[-1]},'+
  359. f'{max(benchmark.load_times)},{min(benchmark.load_times)},{mean(benchmark.load_times)},'+
  360. f'{median(benchmark.load_times)}\n')
  361. self.move_artifact(workspace.artifact_manager, results_csv)
  362. @pytest.mark.SUITE_benchmark
  363. def test_BenchmarkAssetLoads_optMode(self, workspace, launcher, level, remote_console_instance):
  364. self.base_test_BenchmarkAssetLoads(workspace, launcher, level, remote_console_instance, use_dev_mode = False)
  365. @pytest.mark.SUITE_benchmark
  366. def test_BenchmarkAssetLoads_devMode(self, workspace, launcher, level, remote_console_instance):
  367. self.base_test_BenchmarkAssetLoads(workspace, launcher, level, remote_console_instance, use_dev_mode = True)