Browse Source

Merge pull request #71472 from EIREXE/nvapi-threaded-optimization

Disable NVIDIA's threaded optimization on Windows
Rémi Verschelde 2 năm trước cách đây
mục cha
commit
5288a69a22

+ 5 - 0
COPYRIGHT.txt

@@ -397,6 +397,11 @@ Comment: Multi-channel signed distance field generator
 Copyright: 2016-2022, Viktor Chlumsky
 License: MIT
 
+Files: ./thirdparty/nvapi/nvapi_minimal.h
+Comment: Stripped down version of "nvapi.h" from the NVIDIA NVAPI SDK
+Copyright: 2019-2022, NVIDIA Corporation
+License: Expat
+
 Files: ./thirdparty/oidn/
 Comment: Intel Open Image Denoise
 Copyright: 2009-2019, Intel Corporation

+ 4 - 0
doc/classes/ProjectSettings.xml

@@ -2324,6 +2324,10 @@
 		<member name="rendering/gl_compatibility/item_buffer_size" type="int" setter="" getter="" default="16384">
 			Maximum number of canvas items commands that can be drawn in a single viewport update. If more render commands are issued they will be ignored. Decreasing this limit may improve performance on bandwidth limited devices. Increase this limit if you find that not all objects are being drawn in a frame.
 		</member>
+		<member name="rendering/gl_compatibility/nvidia_disable_threaded_optimization" type="bool" setter="" getter="" default="true">
+			If [code]true[/code], disables the threaded optimization feature from the NVIDIA drivers, which are known to cause stuttering in most OpenGL applications.
+			[b]Note:[/b] This setting only works on Windows, as threaded optimization is disabled by default on other platforms.
+		</member>
 		<member name="rendering/global_illumination/gi/use_half_resolution" type="bool" setter="" getter="" default="false">
 			If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting.
 			[b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead.

+ 1 - 0
main/main.cpp

@@ -1647,6 +1647,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
 		GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.android", PROPERTY_HINT_ENUM, driver_hints), default_driver);
 		GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.ios", PROPERTY_HINT_ENUM, driver_hints), default_driver);
 		GLOBAL_DEF(PropertyInfo(Variant::STRING, "rendering/gl_compatibility/driver.macos", PROPERTY_HINT_ENUM, driver_hints), default_driver);
+		GLOBAL_DEF_RST("rendering/gl_compatibility/nvidia_disable_threaded_optimization", true);
 	}
 
 	// Start with RenderingDevice-based backends. Should be included if any RD driver present.

+ 171 - 0
platform/windows/gl_manager_windows.cpp

@@ -32,6 +32,11 @@
 
 #if defined(WINDOWS_ENABLED) && defined(GLES3_ENABLED)
 
+#include "core/config/project_settings.h"
+#include "core/version.h"
+
+#include "thirdparty/nvapi/nvapi_minimal.h"
+
 #include <dwmapi.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -64,6 +69,171 @@ static String format_error_message(DWORD id) {
 	return msg;
 }
 
+const int OGL_THREAD_CONTROL_ID = 0x20C1221E;
+const int OGL_THREAD_CONTROL_DISABLE = 0x00000002;
+const int OGL_THREAD_CONTROL_ENABLE = 0x00000001;
+
+typedef int(__cdecl *NvAPI_Initialize_t)();
+typedef int(__cdecl *NvAPI_Unload_t)();
+typedef int(__cdecl *NvAPI_GetErrorMessage_t)(unsigned int, NvAPI_ShortString);
+typedef int(__cdecl *NvAPI_DRS_CreateSession_t)(NvDRSSessionHandle *);
+typedef int(__cdecl *NvAPI_DRS_DestroySession_t)(NvDRSSessionHandle);
+typedef int(__cdecl *NvAPI_DRS_LoadSettings_t)(NvDRSSessionHandle);
+typedef int(__cdecl *NvAPI_DRS_CreateProfile_t)(NvDRSSessionHandle, NVDRS_PROFILE *, NvDRSProfileHandle *);
+typedef int(__cdecl *NvAPI_DRS_CreateApplication_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_APPLICATION *);
+typedef int(__cdecl *NvAPI_DRS_SaveSettings_t)(NvDRSSessionHandle);
+typedef int(__cdecl *NvAPI_DRS_SetSetting_t)(NvDRSSessionHandle, NvDRSProfileHandle, NVDRS_SETTING *);
+typedef int(__cdecl *NvAPI_DRS_FindProfileByName_t)(NvDRSSessionHandle, NvAPI_UnicodeString, NvDRSProfileHandle *);
+NvAPI_GetErrorMessage_t NvAPI_GetErrorMessage__;
+
+static bool nvapi_err_check(char *msg, int status) {
+	if (status != 0) {
+		if (OS::get_singleton()->is_stdout_verbose()) {
+			NvAPI_ShortString err_desc = { 0 };
+			NvAPI_GetErrorMessage__(status, err_desc);
+			print_verbose(vformat("%s: %s(code %d)", msg, err_desc, status));
+		}
+		return false;
+	}
+	return true;
+}
+
+// On windows we have to disable threaded optimization when using NVIDIA graphics cards
+// to avoid stuttering, see https://github.com/microsoft/vscode-cpptools/issues/6592
+// also see https://github.com/Ryujinx/Ryujinx/blob/master/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs
+void GLManager_Windows::_nvapi_disable_threaded_optimization() {
+	HMODULE nvapi = 0;
+#ifdef _WIN64
+	nvapi = LoadLibraryA("nvapi64.dll");
+#else
+	nvapi = LoadLibraryA("nvapi.dll");
+#endif
+
+	if (nvapi == NULL) {
+		return;
+	}
+
+	void *(__cdecl * NvAPI_QueryInterface)(unsigned int interface_id) = 0;
+
+	NvAPI_QueryInterface = (void *(__cdecl *)(unsigned int))GetProcAddress(nvapi, "nvapi_QueryInterface");
+
+	if (NvAPI_QueryInterface == NULL) {
+		print_verbose("Error getting NVAPI NvAPI_QueryInterface");
+		return;
+	}
+
+	// Setup NVAPI function pointers
+	NvAPI_Initialize_t NvAPI_Initialize = (NvAPI_Initialize_t)NvAPI_QueryInterface(0x0150E828);
+	NvAPI_GetErrorMessage__ = (NvAPI_GetErrorMessage_t)NvAPI_QueryInterface(0x6C2D048C);
+	NvAPI_DRS_CreateSession_t NvAPI_DRS_CreateSession = (NvAPI_DRS_CreateSession_t)NvAPI_QueryInterface(0x0694D52E);
+	NvAPI_DRS_DestroySession_t NvAPI_DRS_DestroySession = (NvAPI_DRS_DestroySession_t)NvAPI_QueryInterface(0xDAD9CFF8);
+	NvAPI_Unload_t NvAPI_Unload = (NvAPI_Unload_t)NvAPI_QueryInterface(0xD22BDD7E);
+	NvAPI_DRS_LoadSettings_t NvAPI_DRS_LoadSettings = (NvAPI_DRS_LoadSettings_t)NvAPI_QueryInterface(0x375DBD6B);
+	NvAPI_DRS_CreateProfile_t NvAPI_DRS_CreateProfile = (NvAPI_DRS_CreateProfile_t)NvAPI_QueryInterface(0xCC176068);
+	NvAPI_DRS_CreateApplication_t NvAPI_DRS_CreateApplication = (NvAPI_DRS_CreateApplication_t)NvAPI_QueryInterface(0x4347A9DE);
+	NvAPI_DRS_SaveSettings_t NvAPI_DRS_SaveSettings = (NvAPI_DRS_SaveSettings_t)NvAPI_QueryInterface(0xFCBC7E14);
+	NvAPI_DRS_SetSetting_t NvAPI_DRS_SetSetting = (NvAPI_DRS_SetSetting_t)NvAPI_QueryInterface(0x577DD202);
+	NvAPI_DRS_FindProfileByName_t NvAPI_DRS_FindProfileByName = (NvAPI_DRS_FindProfileByName_t)NvAPI_QueryInterface(0x7E4A9A0B);
+
+	if (!nvapi_err_check("NVAPI: Init failed", NvAPI_Initialize())) {
+		return;
+	}
+
+	print_verbose("NVAPI: Init OK!");
+
+	NvDRSSessionHandle session_handle;
+
+	if (!nvapi_err_check("NVAPI: Error creating DRS session", NvAPI_DRS_CreateSession(&session_handle))) {
+		NvAPI_Unload();
+		return;
+	}
+
+	if (!nvapi_err_check("NVAPI: Error loading DRS settings", NvAPI_DRS_LoadSettings(session_handle))) {
+		NvAPI_DRS_DestroySession(session_handle);
+		NvAPI_Unload();
+		return;
+	}
+
+	String app_executable_name = OS::get_singleton()->get_executable_path().get_file();
+	String app_friendly_name = GLOBAL_GET("application/config/name");
+	// We need a name anyways, so let's use the engine name if an application name is not available
+	// (this is used mostly by the Project Manager)
+	if (app_friendly_name.is_empty()) {
+		app_friendly_name = VERSION_NAME;
+	}
+	String app_profile_name = app_friendly_name + " Nvidia Profile";
+	Char16String app_profile_name_u16 = app_profile_name.utf16();
+	Char16String app_executable_name_u16 = app_executable_name.utf16();
+	Char16String app_friendly_name_u16 = app_friendly_name.utf16();
+
+	NvDRSProfileHandle profile_handle = 0;
+
+	int status = NvAPI_DRS_FindProfileByName(session_handle, (NvU16 *)(app_profile_name_u16.ptrw()), &profile_handle);
+
+	if (status != 0) {
+		print_verbose("NVAPI: Profile not found, creating....");
+
+		NVDRS_PROFILE profile_info;
+		profile_info.version = NVDRS_PROFILE_VER;
+		profile_info.isPredefined = 0;
+		memcpy(profile_info.profileName, app_profile_name_u16.get_data(), sizeof(char16_t) * app_profile_name_u16.size());
+
+		if (!nvapi_err_check("NVAPI: Error creating profile", NvAPI_DRS_CreateProfile(session_handle, &profile_info, &profile_handle))) {
+			NvAPI_DRS_DestroySession(session_handle);
+			NvAPI_Unload();
+			return;
+		}
+
+		NVDRS_APPLICATION_V4 app;
+		app.version = NVDRS_APPLICATION_VER_V4;
+		app.isPredefined = 0;
+		app.isMetro = 1;
+		app.isCommandLine = 1;
+		memcpy(app.appName, app_executable_name_u16.get_data(), sizeof(char16_t) * app_executable_name_u16.size());
+		memcpy(app.userFriendlyName, app_friendly_name_u16.get_data(), sizeof(char16_t) * app_friendly_name_u16.size());
+		memcpy(app.launcher, L"", 1);
+		memcpy(app.fileInFolder, L"", 1);
+
+		if (!nvapi_err_check("NVAPI: Error creating application", NvAPI_DRS_CreateApplication(session_handle, profile_handle, &app))) {
+			NvAPI_DRS_DestroySession(session_handle);
+			NvAPI_Unload();
+			return;
+		}
+	}
+
+	NVDRS_SETTING setting;
+	setting.version = NVDRS_SETTING_VER;
+	setting.settingId = OGL_THREAD_CONTROL_ID;
+	setting.settingType = NVDRS_DWORD_TYPE;
+	setting.settingLocation = NVDRS_CURRENT_PROFILE_LOCATION;
+	setting.isCurrentPredefined = 0;
+	setting.isPredefinedValid = 0;
+	int thread_control_val = OGL_THREAD_CONTROL_DISABLE;
+	if (!GLOBAL_GET("rendering/gl_compatibility/nvidia_disable_threaded_optimization")) {
+		thread_control_val = OGL_THREAD_CONTROL_ENABLE;
+	}
+	setting.u32CurrentValue = thread_control_val;
+	setting.u32PredefinedValue = thread_control_val;
+
+	if (!nvapi_err_check("NVAPI: Error calling NvAPI_DRS_SetSetting", NvAPI_DRS_SetSetting(session_handle, profile_handle, &setting))) {
+		NvAPI_DRS_DestroySession(session_handle);
+		NvAPI_Unload();
+		return;
+	}
+
+	if (!nvapi_err_check("NVAPI: Error saving settings", NvAPI_DRS_SaveSettings(session_handle))) {
+		NvAPI_DRS_DestroySession(session_handle);
+		NvAPI_Unload();
+		return;
+	}
+	if (thread_control_val == OGL_THREAD_CONTROL_DISABLE) {
+		print_verbose("NVAPI: Disabled OpenGL threaded optimization successfully");
+	} else {
+		print_verbose("NVAPI: Enabled OpenGL threaded optimization successfully");
+	}
+	NvAPI_DRS_DestroySession(session_handle);
+}
+
 int GLManager_Windows::_find_or_create_display(GLWindow &win) {
 	// find display NYI, only 1 supported so far
 	if (_displays.size()) {
@@ -295,6 +465,7 @@ void GLManager_Windows::swap_buffers() {
 }
 
 Error GLManager_Windows::initialize() {
+	_nvapi_disable_threaded_optimization();
 	return OK;
 }
 

+ 1 - 0
platform/windows/gl_manager_windows.h

@@ -89,6 +89,7 @@ private:
 	ContextType context_type;
 
 private:
+	void _nvapi_disable_threaded_optimization();
 	int _find_or_create_display(GLWindow &win);
 	Error _create_context(GLWindow &win, GLDisplay &gl_display);
 

+ 9 - 0
thirdparty/README.md

@@ -539,6 +539,15 @@ Files extracted from the upstream source:
 - `LICENSE.txt`
 
 
+## nvapi
+
+- Upstream: http://download.nvidia.com/XFree86/nvapi-open-source-sdk
+- Version: R525
+- License: MIT
+
+- `nvapi_minimal.h` was created by using `nvapi.h` from upstream and removing unnecessary code.
+
+
 ## oidn
 
 - Upstream: https://github.com/OpenImageDenoise/oidn

+ 175 - 0
thirdparty/nvapi/nvapi_minimal.h

@@ -0,0 +1,175 @@
+#ifndef NVAPI_MINIMAL_H
+#define NVAPI_MINIMAL_H
+typedef uint32_t NvU32;
+typedef uint16_t NvU16;
+typedef uint8_t NvU8;
+
+#define MAKE_NVAPI_VERSION(typeName,ver) (NvU32)(sizeof(typeName) | ((ver)<<16))
+
+#define NV_DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name
+
+NV_DECLARE_HANDLE(NvDRSSessionHandle);
+NV_DECLARE_HANDLE(NvDRSProfileHandle);
+
+#define NVAPI_UNICODE_STRING_MAX                             2048
+#define NVAPI_BINARY_DATA_MAX                                4096
+typedef NvU16 NvAPI_UnicodeString[NVAPI_UNICODE_STRING_MAX];
+typedef char NvAPI_ShortString[64];
+
+#define NVAPI_SETTING_MAX_VALUES                             100
+
+typedef enum _NVDRS_SETTING_TYPE
+{
+     NVDRS_DWORD_TYPE,
+     NVDRS_BINARY_TYPE,
+     NVDRS_STRING_TYPE,
+     NVDRS_WSTRING_TYPE
+} NVDRS_SETTING_TYPE;
+
+typedef enum _NVDRS_SETTING_LOCATION
+{
+     NVDRS_CURRENT_PROFILE_LOCATION,
+     NVDRS_GLOBAL_PROFILE_LOCATION,
+     NVDRS_BASE_PROFILE_LOCATION,
+     NVDRS_DEFAULT_PROFILE_LOCATION
+} NVDRS_SETTING_LOCATION;
+
+typedef struct _NVDRS_GPU_SUPPORT
+{
+    NvU32 geforce    :  1;
+    NvU32 quadro     :  1;
+    NvU32 nvs        :  1;
+    NvU32 reserved4  :  1;
+    NvU32 reserved5  :  1;
+    NvU32 reserved6  :  1;
+    NvU32 reserved7  :  1;
+    NvU32 reserved8  :  1;
+    NvU32 reserved9  :  1;
+    NvU32 reserved10 :  1;
+    NvU32 reserved11 :  1;
+    NvU32 reserved12 :  1;
+    NvU32 reserved13 :  1;
+    NvU32 reserved14 :  1;
+    NvU32 reserved15 :  1;
+    NvU32 reserved16 :  1;
+    NvU32 reserved17 :  1;
+    NvU32 reserved18 :  1;
+    NvU32 reserved19 :  1;
+    NvU32 reserved20 :  1;
+    NvU32 reserved21 :  1;
+    NvU32 reserved22 :  1;
+    NvU32 reserved23 :  1;
+    NvU32 reserved24 :  1;
+    NvU32 reserved25 :  1;
+    NvU32 reserved26 :  1;
+    NvU32 reserved27 :  1;
+    NvU32 reserved28 :  1;
+    NvU32 reserved29 :  1;
+    NvU32 reserved30 :  1;
+    NvU32 reserved31 :  1;
+    NvU32 reserved32 :  1;
+} NVDRS_GPU_SUPPORT;
+
+//! Enum to decide on the datatype of setting value.
+typedef struct _NVDRS_BINARY_SETTING 
+{
+     NvU32                valueLength;               //!< valueLength should always be in number of bytes.
+     NvU8                 valueData[NVAPI_BINARY_DATA_MAX];
+} NVDRS_BINARY_SETTING;
+
+typedef struct _NVDRS_SETTING_VALUES
+{
+     NvU32                      version;                //!< Structure Version
+     NvU32                      numSettingValues;       //!< Total number of values available in a setting.
+     NVDRS_SETTING_TYPE         settingType;            //!< Type of setting value.  
+     union                                              //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
+     {
+         NvU32                      u32DefaultValue;    //!< Accessing default DWORD value of this setting.
+         NVDRS_BINARY_SETTING       binaryDefaultValue; //!< Accessing default Binary value of this setting.
+                                                        //!< Must be allocated by caller with valueLength specifying buffer size, or only valueLength will be filled in.
+         NvAPI_UnicodeString        wszDefaultValue;    //!< Accessing default unicode string value of this setting.
+     };
+     union                                                //!< Setting values can be of either DWORD, Binary values or String type,
+     {                                                    //!< NOT mixed types.
+         NvU32                      u32Value;           //!< All possible DWORD values for a setting
+         NVDRS_BINARY_SETTING       binaryValue;        //!< All possible Binary values for a setting
+         NvAPI_UnicodeString        wszValue;           //!< Accessing current unicode string value of this setting.
+     }settingValues[NVAPI_SETTING_MAX_VALUES];
+} NVDRS_SETTING_VALUES;
+
+//! Macro for constructing the version field of ::_NVDRS_SETTING_VALUES
+#define NVDRS_SETTING_VALUES_VER    MAKE_NVAPI_VERSION(NVDRS_SETTING_VALUES,1)
+     
+typedef struct _NVDRS_SETTING_V1
+{
+     NvU32                      version;                //!< Structure Version
+     NvAPI_UnicodeString        settingName;            //!< String name of setting
+     NvU32                      settingId;              //!< 32 bit setting Id
+     NVDRS_SETTING_TYPE         settingType;            //!< Type of setting value.  
+     NVDRS_SETTING_LOCATION     settingLocation;        //!< Describes where the value in CurrentValue comes from. 
+     NvU32                      isCurrentPredefined;    //!< It is different than 0 if the currentValue is a predefined Value, 
+                                                        //!< 0 if the currentValue is a user value. 
+     NvU32                      isPredefinedValid;      //!< It is different than 0 if the PredefinedValue union contains a valid value. 
+     union                                              //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
+     {
+         NvU32                      u32PredefinedValue;    //!< Accessing default DWORD value of this setting.
+         NVDRS_BINARY_SETTING       binaryPredefinedValue; //!< Accessing default Binary value of this setting.
+                                                           //!< Must be allocated by caller with valueLength specifying buffer size, 
+                                                           //!< or only valueLength will be filled in.
+         NvAPI_UnicodeString        wszPredefinedValue;    //!< Accessing default unicode string value of this setting.
+     };
+     union                                              //!< Setting can hold either DWORD or Binary value or string. Not mixed types.
+     {
+         NvU32                      u32CurrentValue;    //!< Accessing current DWORD value of this setting.
+         NVDRS_BINARY_SETTING       binaryCurrentValue; //!< Accessing current Binary value of this setting.
+                                                        //!< Must be allocated by caller with valueLength specifying buffer size, 
+                                                        //!< or only valueLength will be filled in.
+         NvAPI_UnicodeString        wszCurrentValue;    //!< Accessing current unicode string value of this setting.
+     };                                                 
+} NVDRS_SETTING_V1;
+
+//! Macro for constructing the version field of ::_NVDRS_SETTING
+#define NVDRS_SETTING_VER1        MAKE_NVAPI_VERSION(NVDRS_SETTING_V1, 1)
+
+typedef NVDRS_SETTING_V1          NVDRS_SETTING;
+#define NVDRS_SETTING_VER         NVDRS_SETTING_VER1
+
+typedef struct _NVDRS_APPLICATION_V4
+{
+     NvU32                      version;            //!< Structure Version
+     NvU32                      isPredefined;       //!< Is the application userdefined/predefined
+     NvAPI_UnicodeString        appName;            //!< String name of the Application
+     NvAPI_UnicodeString        userFriendlyName;   //!< UserFriendly name of the Application
+     NvAPI_UnicodeString        launcher;           //!< Indicates the name (if any) of the launcher that starts the Application
+     NvAPI_UnicodeString        fileInFolder;       //!< Select this application only if this file is found.
+                                                    //!< When specifying multiple files, separate them using the ':' character.
+     NvU32                      isMetro:1;          //!< Windows 8 style app
+     NvU32                      isCommandLine:1;    //!< Command line parsing for the application name
+     NvU32                      reserved:30;        //!< Reserved. Should be 0.
+     NvAPI_UnicodeString        commandLine;        //!< If isCommandLine is set to 0 this must be an empty. If isCommandLine is set to 1 
+                                                    //!< this contains application's command line as if it was returned by GetCommandLineW.
+} NVDRS_APPLICATION_V4;
+
+#define NVDRS_APPLICATION_VER_V4        MAKE_NVAPI_VERSION(NVDRS_APPLICATION_V4,4)
+
+typedef NVDRS_APPLICATION_V4 NVDRS_APPLICATION;
+#define NVDRS_APPLICATION_VER NVDRS_APPLICATION_VER_V4
+
+typedef struct _NVDRS_PROFILE_V1
+{
+     NvU32                      version;            //!< Structure Version
+     NvAPI_UnicodeString        profileName;        //!< String name of the Profile
+     NVDRS_GPU_SUPPORT          gpuSupport;         //!< This read-only flag indicates the profile support on either
+                                                    //!< Quadro, or Geforce, or both.
+     NvU32                      isPredefined;       //!< Is the Profile user-defined, or predefined
+     NvU32                      numOfApps;          //!< Total number of applications that belong to this profile. Read-only
+     NvU32                      numOfSettings;      //!< Total number of settings applied for this Profile. Read-only
+} NVDRS_PROFILE_V1;
+
+typedef NVDRS_PROFILE_V1         NVDRS_PROFILE;
+
+//! Macro for constructing the version field of ::NVDRS_PROFILE
+#define NVDRS_PROFILE_VER1       MAKE_NVAPI_VERSION(NVDRS_PROFILE_V1,1)
+#define NVDRS_PROFILE_VER        NVDRS_PROFILE_VER1
+
+#endif