浏览代码

Merge pull request #69957 from akien-mga/3.x-cherrypicks

Cherry-picks for the 3.x branch (future 3.6) - 10th batch
Rémi Verschelde 2 年之前
父节点
当前提交
3a2f2d99f6
共有 100 个文件被更改,包括 5690 次插入2121 次删除
  1. 2 2
      COPYRIGHT.txt
  2. 4 1
      SConstruct
  3. 0 4
      core/os/os.cpp
  4. 0 2
      core/os/os.h
  5. 0 8
      core/pool_allocator.cpp
  6. 0 5
      drivers/unix/os_unix.cpp
  7. 0 1
      drivers/unix/os_unix.h
  8. 2 0
      editor/editor_properties_array_dict.cpp
  9. 88 32
      main/gamecontrollerdb.txt
  10. 1 0
      main/godotcontrollerdb.txt
  11. 21 10
      methods.py
  12. 0 1
      misc/dist/ios_xcode/godot_ios.xcodeproj/project.pbxproj
  13. 1 1
      modules/denoise/SCsub
  14. 6 1
      modules/raycast/SCsub
  15. 25 11
      modules/raycast/godot_update_embree.py
  16. 2 2
      modules/upnp/SCsub
  17. 2 2
      modules/upnp/upnp.cpp
  18. 1 1
      modules/upnp/upnp.h
  19. 1 1
      modules/upnp/upnp_device.cpp
  20. 1 4
      platform/android/detect.py
  21. 4 4
      platform/android/java/app/config.gradle
  22. 1 1
      platform/android/java/gradle/wrapper/gradle-wrapper.properties
  23. 0 3
      platform/android/java/lib/AndroidManifest.xml
  24. 6 7
      platform/android/java/lib/build.gradle
  25. 2 3
      platform/iphone/detect.py
  26. 14 0
      platform/javascript/SCsub
  27. 1 722
      platform/javascript/package-lock.json
  28. 2 4
      platform/javascript/package.json
  29. 0 21
      platform/javascript/serve.json
  30. 55 0
      platform/javascript/serve.py
  31. 0 1
      platform/windows/detect.py
  32. 6 0
      scene/gui/control.cpp
  33. 3 1
      scene/gui/tree.cpp
  34. 7 10
      thirdparty/README.md
  35. 0 1
      thirdparty/embree/common/algorithms/parallel_for.h
  36. 17 6
      thirdparty/embree/common/algorithms/parallel_for_for.h
  37. 40 10
      thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h
  38. 1 1
      thirdparty/embree/common/algorithms/parallel_reduce.h
  39. 5 5
      thirdparty/embree/common/math/bbox.h
  40. 18 1
      thirdparty/embree/common/math/color.h
  41. 0 19
      thirdparty/embree/common/math/constants.cpp
  42. 33 21
      thirdparty/embree/common/math/constants.h
  43. 95 9
      thirdparty/embree/common/math/math.h
  44. 8 4
      thirdparty/embree/common/math/quaternion.h
  45. 3 3
      thirdparty/embree/common/math/transcendental.h
  46. 4 4
      thirdparty/embree/common/math/vec2.h
  47. 23 6
      thirdparty/embree/common/math/vec2fa.h
  48. 5 6
      thirdparty/embree/common/math/vec3.h
  49. 75 17
      thirdparty/embree/common/math/vec3fa.h
  50. 28 18
      thirdparty/embree/common/math/vec3ia.h
  51. 3 3
      thirdparty/embree/common/math/vec4.h
  52. 1196 0
      thirdparty/embree/common/simd/arm/avx2neon.h
  53. 53 18
      thirdparty/embree/common/simd/arm/emulation.h
  54. 2215 779
      thirdparty/embree/common/simd/arm/sse2neon.h
  55. 1 1
      thirdparty/embree/common/simd/simd.h
  56. 1 1
      thirdparty/embree/common/simd/sse.h
  57. 6 1
      thirdparty/embree/common/simd/vboold4_avx.h
  58. 1 1
      thirdparty/embree/common/simd/vboolf16_avx512.h
  59. 20 4
      thirdparty/embree/common/simd/vboolf4_sse2.h
  60. 1 1
      thirdparty/embree/common/simd/vboolf8_avx.h
  61. 8 1
      thirdparty/embree/common/simd/vdouble4_avx.h
  62. 3 2
      thirdparty/embree/common/simd/vfloat16_avx512.h
  63. 108 27
      thirdparty/embree/common/simd/vfloat4_sse2.h
  64. 75 12
      thirdparty/embree/common/simd/vfloat8_avx.h
  65. 77 23
      thirdparty/embree/common/simd/vint4_sse2.h
  66. 2 2
      thirdparty/embree/common/simd/vint8_avx.h
  67. 4 0
      thirdparty/embree/common/simd/vint8_avx2.h
  68. 28 10
      thirdparty/embree/common/simd/vuint4_sse2.h
  69. 2 2
      thirdparty/embree/common/simd/vuint8_avx.h
  70. 2 0
      thirdparty/embree/common/simd/vuint8_avx2.h
  71. 13 0
      thirdparty/embree/common/simd/wasm/emulation.h
  72. 6 6
      thirdparty/embree/common/sys/array.h
  73. 2 2
      thirdparty/embree/common/sys/barrier.h
  74. 74 50
      thirdparty/embree/common/sys/intrinsics.h
  75. 1 0
      thirdparty/embree/common/sys/mutex.cpp
  76. 6 0
      thirdparty/embree/common/sys/mutex.h
  77. 10 10
      thirdparty/embree/common/sys/platform.h
  78. 52 10
      thirdparty/embree/common/sys/sysinfo.cpp
  79. 11 2
      thirdparty/embree/common/sys/sysinfo.h
  80. 23 12
      thirdparty/embree/common/sys/thread.cpp
  81. 8 6
      thirdparty/embree/common/sys/vector.h
  82. 1 1
      thirdparty/embree/common/tasking/taskschedulerinternal.h
  83. 1 7
      thirdparty/embree/common/tasking/taskschedulertbb.h
  84. 1 3
      thirdparty/embree/include/embree3/rtcore_common.h
  85. 9 8
      thirdparty/embree/include/embree3/rtcore_config.h
  86. 1 1
      thirdparty/embree/include/embree3/rtcore_quaternion.h
  87. 4 1
      thirdparty/embree/include/embree3/rtcore_scene.h
  88. 1 1
      thirdparty/embree/kernels/builders/bvh_builder_morton.h
  89. 2 2
      thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
  90. 1 1
      thirdparty/embree/kernels/builders/bvh_builder_sah.h
  91. 2 4
      thirdparty/embree/kernels/builders/heuristic_binning.h
  92. 1 1
      thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
  93. 32 79
      thirdparty/embree/kernels/builders/heuristic_spatial.h
  94. 8 7
      thirdparty/embree/kernels/builders/heuristic_spatial_array.h
  95. 0 4
      thirdparty/embree/kernels/builders/primrefgen.cpp
  96. 9 4
      thirdparty/embree/kernels/builders/primrefgen_presplit.h
  97. 15 13
      thirdparty/embree/kernels/builders/splitter.h
  98. 1 1
      thirdparty/embree/kernels/bvh/bvh.cpp
  99. 917 0
      thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
  100. 59 0
      thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp

+ 2 - 2
COPYRIGHT.txt

@@ -268,8 +268,8 @@ Copyright: lieff
 License: CC0-1.0
 
 Files: ./thirdparty/miniupnpc/
-Comment: MiniUPnPc
-Copyright: 2005-2021, Thomas Bernard
+Comment: MiniUPnP Project
+Copyright: 2005-2022, Thomas Bernard
 License: BSD-3-clause
 
 Files: ./thirdparty/minizip/

+ 4 - 1
SConstruct

@@ -94,6 +94,7 @@ env_base.__class__.add_library = methods.add_library
 env_base.__class__.add_program = methods.add_program
 env_base.__class__.CommandNoCache = methods.CommandNoCache
 env_base.__class__.disable_warnings = methods.disable_warnings
+env_base.__class__.force_optimization_on_debug = methods.force_optimization_on_debug
 
 env_base["x86_libtheora_opt_gcc"] = False
 env_base["x86_libtheora_opt_vc"] = False
@@ -330,6 +331,9 @@ if env_base["target"] == "debug":
     # DEV_ENABLED enables *engine developer* code which should only be compiled for those
     # working on the engine itself.
     env_base.Append(CPPDEFINES=["DEV_ENABLED"])
+else:
+    # Disable assert() for production targets (only used in thirdparty code).
+    env_base.Append(CPPDEFINES=["NDEBUG"])
 
 # SCons speed optimization controlled by the `fast_unsafe` option, which provide
 # more than 10 s speed up for incremental rebuilds.
@@ -542,7 +546,6 @@ if selected_platform in platform_list:
             print("       Use `tools=no target=release` to build a release export template.")
             Exit(255)
         suffix += ".opt"
-        env.Append(CPPDEFINES=["NDEBUG"])
     elif env["target"] == "release_debug":
         if env["tools"]:
             suffix += ".opt.tools"

+ 0 - 4
core/os/os.cpp

@@ -90,10 +90,6 @@ uint64_t OS::get_system_time_msecs() const {
 double OS::get_subsecond_unix_time() const {
 	return 0.0;
 }
-void OS::debug_break(){
-
-	// something
-};
 
 void OS::_set_logger(CompositeLogger *p_logger) {
 	if (_logger) {

+ 0 - 2
core/os/os.h

@@ -569,8 +569,6 @@ public:
 	virtual void enable_for_stealing_focus(ProcessID pid) {}
 	virtual void move_window_to_foreground() {}
 
-	virtual void debug_break();
-
 	virtual void release_rendering_thread();
 	virtual void make_rendering_thread();
 	virtual void swap_buffers();

+ 0 - 8
core/pool_allocator.cpp

@@ -35,8 +35,6 @@
 #include "core/os/os.h"
 #include "core/print_string.h"
 
-#include <assert.h>
-
 #define COMPACT_CHUNK(m_entry, m_to_pos)                      \
 	do {                                                      \
 		void *_dst = &((unsigned char *)pool)[m_to_pos];      \
@@ -169,11 +167,6 @@ bool PoolAllocator::find_entry_index(EntryIndicesPos *p_map_pos, Entry *p_entry)
 
 PoolAllocator::ID PoolAllocator::alloc(int p_size) {
 	ERR_FAIL_COND_V(p_size < 1, POOL_ALLOCATOR_INVALID_ID);
-#ifdef DEBUG_ENABLED
-	if (p_size > free_mem) {
-		OS::get_singleton()->debug_break();
-	}
-#endif
 	ERR_FAIL_COND_V(p_size > free_mem, POOL_ALLOCATOR_INVALID_ID);
 
 	mt_lock();
@@ -482,7 +475,6 @@ void *PoolAllocator::get(ID p_mem) {
 		ERR_FAIL_COND_V(!e, nullptr);
 	}
 	if (e->lock == 0) {
-		//assert(0);
 		mt_unlock();
 		ERR_PRINT("e->lock == 0");
 		return nullptr;

+ 0 - 5
drivers/unix/os_unix.cpp

@@ -49,7 +49,6 @@
 #include <sys/sysctl.h>
 #endif
 
-#include <assert.h>
 #include <dlfcn.h>
 #include <errno.h>
 #include <poll.h>
@@ -87,10 +86,6 @@ static void _setup_clock() {
 }
 #endif
 
-void OS_Unix::debug_break() {
-	assert(false);
-};
-
 static void handle_interrupt(int sig) {
 	if (ScriptDebugger::get_singleton() == nullptr) {
 		return;

+ 0 - 1
drivers/unix/os_unix.h

@@ -95,7 +95,6 @@ public:
 	virtual bool set_environment(const String &p_var, const String &p_value) const;
 	virtual String get_locale() const;
 
-	virtual void debug_break();
 	virtual void initialize_debugging();
 
 	virtual String get_executable_path() const;

+ 2 - 0
editor/editor_properties_array_dict.cpp

@@ -675,6 +675,8 @@ void EditorPropertyArray::_reorder_button_up() {
 	reorder_mouse_y_delta = 0.0f;
 
 	Input::get_singleton()->set_mouse_mode(Input::MOUSE_MODE_VISIBLE);
+
+	ERR_FAIL_NULL(reorder_selected_button);
 	reorder_selected_button->warp_mouse(reorder_selected_button->get_size() / 2.0f);
 
 	reorder_selected_element_hbox = nullptr;

+ 88 - 32
main/gamecontrollerdb.txt

@@ -67,9 +67,11 @@
 03000000c82d00000160000000000000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00000161000000000000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00000121000000000000,8BitDo SN30 Pro for Android,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
-03000000c82d00000260000000000000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
-03000000c82d00000261000000000000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
-03000000c82d00001330000000000000,8BitDo Ultimate Wireless,a:b0,b:b1,x:b3,y:b4,back:b10,guide:b12,start:b11,leftstick:b13,rightstick:b14,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:b8,righttrigger:b9,paddle1:b23,paddle2:b19,platform:Windows,
+03000000c82d00000260000000000000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
+03000000c82d00000261000000000000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
+03000000c82d00001130000000000000,8BitDo Ultimate Wired,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
+03000000c82d00001230000000000000,8BitDo Ultimate Wireless,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
+03000000c82d00001330000000000000,8BitDo Ultimate Wireless,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b26,paddle1:b23,paddle2:b19,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Windows,
 03000000a00500003232000000000000,8BitDo Zero,a:b0,b:b1,back:b10,dpdown:+a2,dpleft:-a0,dpright:+a0,dpup:-a2,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Windows,
 03000000c82d00001890000000000000,8BitDo Zero 2,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00003032000000000000,8BitDo Zero 2,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Windows,
@@ -201,6 +203,7 @@
 03000000ac0500005b05000000000000,GameSir G3w,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Windows,
 03000000ac0500002d02000000000000,GameSir G4,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Windows,
 03000000ac0500004d04000000000000,GameSir G4,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
+03000000ac0500001a06000000000000,GameSir-T3 2.02,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
 030000004c0e00001035000000000000,Gamester,a:b0,b:b1,back:b7,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b10,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b11,rightx:a3,righty:a4,start:b6,x:b2,y:b3,platform:Windows,
 030000000d0f00001110000000000000,GameStick Controller,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Windows,
 0300000047530000616d000000000000,GameStop,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
@@ -296,7 +299,8 @@
 030000000d0f0000ee00000000000000,Horipad Mini 4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00006700000000000000,Horipad One,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
 030000000d0f0000dc00000000000000,Horipad Switch,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
-030000008f0e00001330000000000000,HuiJia SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b9,x:b3,y:b0,platform:Windows,
+03000000242e00000b20000000000000,Hyperkin Admiral N64 Controller,+rightx:b11,+righty:b13,-rightx:b8,-righty:b12,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b14,leftx:a0,lefty:a1,rightshoulder:b5,start:b9,platform:Windows,
+03000000242e0000ff0b000000000000,Hyperkin N64 Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,platform:Windows,
 03000000790000004e95000000000000,Hyperkin N64 Controller Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b7,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a5,righty:a2,start:b9,platform:Windows,
 03000000d81d00000e00000000000000,iBuffalo AC02 Arcade Joystick,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b2,rightstick:b11,righttrigger:b3,rightx:a2,righty:a5,start:b8,x:b4,y:b5,platform:Windows,
 03000000d81d00000f00000000000000,iBuffalo BSGP1204 Series,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Windows,
@@ -369,6 +373,7 @@
 030000002a0600001024000000000000,Matricom,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a3,righty:a4,start:b9,x:b2,y:b3,platform:Windows,
 030000009f000000adbb000000000000,MaxJoypad Virtual Controller,a:b1,b:b2,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
 03000000250900000128000000000000,Mayflash Arcade Stick,a:b1,b:b2,back:b8,leftshoulder:b0,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b3,righttrigger:b7,start:b9,x:b5,y:b6,platform:Windows,
+030000008f0e00001330000000000000,Mayflash Controller Adapter,a:b1,b:b2,back:b8,dpdown:h0.8,dpleft:h0.2,dpright:h0.1,dpup:h0.4,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a3~,righty:a2,start:b9,x:b0,y:b3,platform:Windows,
 03000000242f00003700000000000000,Mayflash F101,a:b1,b:b2,back:b8,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Windows,
 03000000790000003018000000000000,Mayflash F300 Arcade Joystick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Windows,
 03000000242f00003900000000000000,Mayflash F300 Elite Arcade Joystick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
@@ -377,8 +382,9 @@
 03000000242f00007300000000000000,Mayflash Magic NS,a:b1,b:b4,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b0,y:b3,platform:Windows,
 0300000079000000d218000000000000,Mayflash Magic NS,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Windows,
 03000000d620000010a7000000000000,Mayflash Magic NS,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
+03000000790000007918000000000000,Mayflash N64 Controller Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b9,leftx:a0,lefty:a1,righttrigger:b7,rightx:a3,righty:a2,start:b8,platform:Windows,
 030000008f0e00001030000000000000,Mayflash Sega Saturn Adapter,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b5,rightshoulder:b2,righttrigger:b7,start:b9,x:b3,y:b4,platform:Windows,
-0300000025090000e803000000000000,Mayflash Wii Classic Adapter,a:b1,b:b0,back:b8,dpdown:b13,dpleft:b12,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Windows,
+0300000025090000e803000000000000,Mayflash Wii Classic Adapter,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:a4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:a5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Windows,
 03000000790000000318000000000000,Mayflash Wii DolphinBar,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b11,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b1,platform:Windows,
 03000000790000000018000000000000,Mayflash Wii U Pro Adapter,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000790000002418000000000000,Mega Drive Controller,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,rightshoulder:b2,start:b9,x:b3,y:b4,platform:Windows,
@@ -443,6 +449,7 @@
 030000006f0e00008501000000000000,PDP Fightpad Pro,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b1,y:b0,platform:Windows,
 030000006f0e00000901000000000000,PDP Versus Fighting,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Windows,
 030000008f0e00004100000000000000,PlaySega,a:b1,b:b0,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,lefttrigger:b7,rightshoulder:b5,righttrigger:b2,start:b8,x:b4,y:b3,platform:Windows,
+03000000666600006706000000000000,PlayStation Adapter,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b9,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b10,righttrigger:b5,rightx:a2,righty:a3,start:b11,x:b3,y:b0,platform:Windows,
 03000000e30500009605000000000000,PlayStation Adapter,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
 030000004c050000da0c000000000000,PlayStation Classic Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,start:b9,x:b3,y:b0,platform:Windows,
 03000000632500002306000000000000,PlayStation Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Windows,
@@ -464,7 +471,6 @@
 03000000250900000088000000000000,PS2 Controller,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
 03000000250900006888000000000000,PS2 Controller,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b5,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b6,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
 03000000250900008888000000000000,PS2 Controller,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
-03000000666600006706000000000000,PS2 Controller,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b9,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b10,righttrigger:b5,rightx:a2,righty:a3,start:b11,x:b3,y:b0,platform:Windows,
 030000006b1400000303000000000000,PS2 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
 030000009d0d00001330000000000000,PS2 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
 03000000151a00006222000000000000,PS2 Dual Plus Adapter,a:b2,b:b1,back:b9,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
@@ -505,7 +511,8 @@
 030000004c050000a00b000000000000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 030000004c050000c405000000000000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Windows,
 030000004c050000cc09000000000000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
-030000004c050000e60c000000000000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Windows,
+030000004c050000e60c000000000000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Windows,
+030000004c050000f20d000000000000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Windows,
 03000000830500005020000000000000,PSX,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3,start:b11,x:b2,y:b3,platform:Windows,
 03000000300f00000111000000000000,Qanba 2,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 03000000300f00000211000000000000,Qanba 2P,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
@@ -554,6 +561,7 @@
 03000000830500006020000000000000,Retro Controller,a:b0,b:b1,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b8,righttrigger:b9,start:b7,x:b2,y:b3,platform:Windows,
 03000000bd12000013d0000000000000,Retrolink Sega Saturn Classic Controller,a:b0,b:b1,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b5,lefttrigger:b6,rightshoulder:b2,righttrigger:b7,start:b8,x:b3,y:b4,platform:Windows,
 03000000bd12000015d0000000000000,Retrolink SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Windows,
+03000000341200000400000000000000,RetroUSB N64 RetroPort,+rightx:b8,+righty:b10,-rightx:b9,-righty:b11,a:b7,b:b6,dpdown:b2,dpleft:b1,dpright:b0,dpup:b3,leftshoulder:b13,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b12,start:b4,platform:Windows,
 0300000000f000000300000000000000,RetroUSB RetroPad,a:b1,b:b5,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Windows,
 0300000000f00000f100000000000000,RetroUSB Super RetroPort,a:b1,b:b5,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Windows,
 03000000830500000960000000000000,Revenger,a:b0,b:b1,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b3,x:b4,y:b5,platform:Windows,
@@ -741,9 +749,8 @@
 03000000450c00002043000000000000,Xeox SL6556BK,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
 030000006f0e00000300000000000000,XGear,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a5,righty:a2,start:b9,x:b3,y:b0,platform:Windows,
 03000000172700004431000000000000,Xiaomi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b20,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a7,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Windows,
-03000000bc2000005060000000000000,Xiaomi XMGP01YM,a:b0,b:b1,x:b3,y:b4,back:b10,guide:b12,start:b11,leftstick:b13,rightstick:b14,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,-lefty:-a1,+lefty:+a2,rightx:a3,-righty:-a4,+righty:+a5,lefttrigger:b8,righttrigger:b9,platform:Windows,
-03000000172700003350000000000000,Xiaomi XMGP01YM,a:b0,b:b1,x:b3,y:b4,back:b10,guide:b12,start:b11,leftstick:b13,rightstick:b14,leftshoulder:b6,rightshoulder:b7,dpup:h0.1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,leftx:a0,lefty:a1,rightx:a3,righty:a4,platform:Windows,
-03000000786901006e70000000000000,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
+03000000172700003350000000000000,Xiaomi XMGP01YM,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
+03000000bc2000005060000000000000,Xiaomi XMGP01YM,+lefty:+a2,+righty:+a5,-lefty:-a1,-righty:-a4,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,start:b11,x:b3,y:b4,platform:Windows,
 xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
 030000007d0400000340000000000000,Xterminator Digital Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:-a4,lefttrigger:+a4,leftx:a0,lefty:a1,paddle1:b7,paddle2:b6,rightshoulder:b5,rightstick:b9,righttrigger:b2,rightx:a3,righty:a5,start:b8,x:b3,y:b4,platform:Windows,
 03000000790000004f18000000000000,ZDT Android Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
@@ -789,8 +796,11 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00004028000000010000,8BitDo SN30,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00000160000001000000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00000161000000010000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a5,start:b11,x:b4,y:b3,platform:Mac OS X,
-03000000c82d00000260000001000000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
-03000000c82d00000261000000010000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
+03000000c82d00000260000001000000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
+03000000c82d00000261000000010000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
+03000000c82d00001130000000020000,8BitDo Ultimate Wired,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+03000000c82d00001330000001000000,8BitDo Ultimate Wireless,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b26,paddle1:b23,paddle2:b19,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+03000000c82d00001330000000020000,8BitDo Ultimate Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b26,paddle1:b23,paddle2:b19,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000a00500003232000008010000,8BitDo Zero,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000a00500003232000009010000,8BitDo Zero,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000c82d00001890000001000000,8BitDo Zero 2,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
@@ -817,10 +827,12 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000151900004000000001000000,Flydigi Vader 2,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000b40400001124000000000000,Flydigi Vader 2,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b12,lefttrigger:b8,leftx:a0,lefty:a1,paddle1:b4,paddle2:b5,paddle3:b17,rightshoulder:b7,rightstick:b13,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b2,y:b3,platform:Mac OS X,
 03000000790000004618000000010000,GameCube Controller Adapter,a:b4,b:b0,dpdown:b56,dpleft:b60,dpright:b52,dpup:b48,lefttrigger:a12,leftx:a0,lefty:a4,rightshoulder:b28,righttrigger:a16,rightx:a20,righty:a8,start:b36,x:b8,y:b12,platform:Mac OS X,
+03000000ac0500001a06000002020000,GameSir-T3 2.02,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000ad1b000001f9000000000000,Gamestop BB070 X360 Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 0500000047532047616d657061640000,GameStop Gamepad,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000c01100000140000000010000,GameStop PS4 Fun Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000006f0e00000102000000000000,GameStop Xbox 360 Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
+03000000ff1100003133000007010000,GameWare PC Control Pad,a:b2,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a3,righty:a4,start:b11,x:b3,y:b0,platform:Mac OS X,
 030000007d0400000540000001010000,Gravis Eliminator Pro,a:b1,b:b2,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000280400000140000000020000,Gravis GamePad Pro,a:b1,b:b2,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000008f0e00000300000007010000,GreenAsia Joystick,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b0,y:b1,platform:Mac OS X,
@@ -836,11 +848,12 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000000d0f00004d00000000000000,Hori Gem Pad 3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f00003801000008010000,Hori PC Engine Mini Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,start:b9,platform:Mac OS X,
 030000000d0f00009200000000010000,Hori Pokken Tournament DX Pro,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
+030000000d0f0000aa00000072050000,Hori Real Arcade Pro,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Mac OS X,
 030000000d0f00006e00000000010000,Horipad 4 PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f00006600000000010000,Horipad 4 PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f00006600000000000000,Horipad FPS Plus 4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f0000ee00000000010000,Horipad Mini 4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
-030000008f0e00001330000011010000,HuiJia SNES Controller,a:b4,b:b2,back:b16,dpdown:+a2,dpleft:-a0,dpright:+a0,dpup:-a2,leftshoulder:b12,rightshoulder:b14,start:b18,x:b6,y:b0,platform:Mac OS X,
+03000000242e0000ff0b000000010000,Hyperkin N64 Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,platform:Mac OS X,
 03000000790000004e95000000010000,Hyperkin N64 Controller Adapter,a:b1,b:b2,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b7,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a5,righty:a2,start:b9,platform:Mac OS X,
 03000000830500006020000000000000,iBuffalo Gamepad,a:b1,b:b0,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b7,x:b3,y:b2,platform:Mac OS X,
 03000000ef0500000300000000020000,InterAct AxisPad,a:b2,b:b3,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a3,righty:a2,start:b11,x:b0,y:b1,platform:Mac OS X,
@@ -858,10 +871,11 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000006d0400001fc2000000000000,Logitech F710,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000006d04000018c2000000010000,Logitech RumblePad 2,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3~,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000380700005032000000010000,Mad Catz PS3 Fightpad Pro,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
-03000000380700008433000000010000,Mad Catz PS3 Fightstick TE S+,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
+03000000380700008433000000010000,Mad Catz PS3 Fightstick TE S Plus,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000380700005082000000010000,Mad Catz PS4 Fightpad Pro,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
-03000000380700008483000000010000,Mad Catz PS4 Fightstick TE S+,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
+03000000380700008483000000010000,Mad Catz PS4 Fightstick TE S Plus,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000790000000600000007010000,Marvo GT-004,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Mac OS X,
+030000008f0e00001330000011010000,Mayflash Controller Adapter,a:b2,b:b4,back:b16,dpdown:h0.8,dpleft:h0.2,dpright:h0.1,dpup:h0.4,leftshoulder:b12,lefttrigger:b16,leftx:a0,lefty:a2,rightshoulder:b14,rightx:a6~,righty:a4,start:b18,x:b0,y:b6,platform:Mac OS X,
 03000000790000004318000000010000,Mayflash GameCube Adapter,a:b4,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,lefttrigger:a12,leftx:a0,lefty:a4,rightshoulder:b28,righttrigger:a16,rightx:a20,righty:a8,start:b36,x:b8,y:b12,platform:Mac OS X,
 03000000790000004418000000010000,Mayflash GameCube Controller,a:b1,b:b2,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000242f00007300000000020000,Mayflash Magic NS,a:b1,b:b4,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b0,y:b3,platform:Mac OS X,
@@ -886,8 +900,10 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000007e0500001920000001000000,NSO N64 Controller,+rightx:b8,+righty:b7,-rightx:b3,-righty:b2,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,righttrigger:b10,start:b9,platform:Mac OS X,
 030000007e0500001720000001000000,NSO SNES Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b15,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000550900001472000025050000,NVIDIA Controller,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b4,leftstick:b7,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a5,start:b6,x:b2,y:b3,platform:Mac OS X,
+030000004b120000014d000000010000,Nyko Airflo EX,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b2,y:b3,platform:Mac OS X,
 030000006f0e00000901000002010000,PDP Versus Fighting,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000008f0e00000300000000000000,Piranha Xtreme PS3 Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b3,y:b0,platform:Mac OS X,
+03000000666600006706000088020000,PlayStation Adapter,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b5,rightx:a2,righty:a3,start:b11,x:b3,y:b0,platform:Mac OS X,
 030000004c050000da0c000000010000,PlayStation Classic Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
 030000004c0500003713000000010000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000d62000006dca000000010000,PowerA Pro Ex,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -899,8 +915,10 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000004c050000c405000000000000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000004c050000c405000000010000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000004c050000cc09000000010000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
-030000004c050000e60c000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
+030000004c050000e60c000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Mac OS X,
+030000004c050000f20d000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Mac OS X,
 050000004c050000e60c000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Mac OS X,
+050000004c050000f20d000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Mac OS X,
 03000000222c00000225000000010000,Qanba Dragon Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000222c00000020000000010000,Qanba Drone Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000008916000000fd000000000000,Razer Onza TE,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
@@ -919,6 +937,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000790000001100000005010000,Retro Controller,a:b1,b:b2,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b6,lefttrigger:b7,rightshoulder:b5,righttrigger:b4,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000830500006020000000010000,Retro Controller,a:b0,b:b1,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,lefttrigger:b5,rightshoulder:b8,righttrigger:b9,start:b7,x:b2,y:b3,platform:Mac OS X,
 03000000790000001100000006010000,Retrolink SNES Controller,a:b2,b:b1,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
+03000000341200000400000000000000,RetroUSB N64 RetroPort,+rightx:b8,+righty:b10,-rightx:b9,-righty:b11,a:b7,b:b6,dpdown:b2,dpleft:b1,dpright:b0,dpup:b3,leftshoulder:b13,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b12,start:b4,platform:Mac OS X,
 030000006b140000010d000000010000,Revolution Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000006b140000130d000000010000,Revolution Pro Controller 3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000c6240000fefa000000000000,Rock Candy PS3,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
@@ -937,14 +956,13 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000381000002014000001000000,SteelSeries Nimbus,a:b0,b:b1,dpdown:b9,dpleft:b11,dpright:b10,dpup:b8,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3~,x:b2,y:b3,platform:Mac OS X,
 05000000484944204465766963650000,SteelSeries Nimbus Plus,a:b0,b:b1,back:b15,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b16,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b14,x:b2,y:b3,platform:Mac OS X,
 050000004e696d6275732b0000000000,SteelSeries Nimbus Plus,a:b0,b:b1,back:b15,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b16,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b14,x:b2,y:b3,platform:Mac OS X,
-050000004e696d6275732b008b000000,SteelSeries Nimbus Plus,a:b0,b:b1,back:b15,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b16,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b14,x:b2,y:b3,platform:Mac OS X,
-05000000556e6b6e6f776e2048494400,SteelSeries Nimbus Plus,a:b0,b:b1,back:b15,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b16,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b14,x:b2,y:b3,platform:Mac OS X,
 03000000381000003014000000000000,SteelSeries Stratus Duo,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000381000003114000000000000,SteelSeries Stratus Duo,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000110100001714000000000000,SteelSeries Stratus XL,a:b0,b:b1,dpdown:b9,dpleft:b11,dpright:b10,dpup:b8,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3~,start:b12,x:b2,y:b3,platform:Mac OS X,
 03000000110100001714000020010000,SteelSeries Stratus XL,a:b0,b:b1,dpdown:b9,dpleft:b11,dpright:b10,dpup:b8,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3~,start:b12,x:b2,y:b3,platform:Mac OS X,
 030000000d0f0000f600000000010000,Switch Hori Pad,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000457500002211000000010000,SZMY Power PC Gamepad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
+03000000790000001c18000003100000,TGZ Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000004f04000015b3000000000000,Thrustmaster Dual Analog 3.2,a:b0,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b1,y:b3,platform:Mac OS X,
 030000004f0400000ed0000000020000,ThrustMaster eSwap Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000004f04000000b3000000000000,Thrustmaster Firestorm Dual Power,a:b0,b:b2,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b11,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a2,righty:a3,start:b10,x:b1,y:b3,platform:Mac OS X,
@@ -954,6 +972,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000571d00002000000021000000,Tomee SNES Controller Adapter,a:b0,b:b1,back:b6,dpdown:+a4,dpleft:-a0,dpright:+a0,dpup:-a4,leftshoulder:b4,rightshoulder:b5,start:b7,x:b2,y:b3,platform:Mac OS X,
 030000005f140000c501000000020000,Trust Gamepad,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Mac OS X,
 03000000100800000100000000000000,Twin USB Joystick,a:b4,b:b2,back:b16,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b12,leftstick:b20,lefttrigger:b8,leftx:a0,lefty:a2,rightshoulder:b14,rightstick:b22,righttrigger:b10,rightx:a6,righty:a4,start:b18,x:b6,y:b0,platform:Mac OS X,
+03000000632500002605000000010000,Uberwith Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000006f0e00000302000025040000,Victrix PS4 Pro Fightstick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000006f0e00000702000003060000,Victrix PS4 Pro Fightstick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
 050000005769696d6f74652028303000,Wii Remote,a:b4,b:b5,back:b7,dpdown:b3,dpleft:b0,dpright:b1,dpup:b2,guide:b8,leftshoulder:b11,lefttrigger:b12,leftx:a0,lefty:a1,start:b6,x:b10,y:b9,platform:Mac OS X,
@@ -965,6 +984,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000005e040000050b000003090000,Xbox Elite Controller Series 2,a:b0,b:b1,back:b31,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b53,leftshoulder:b6,leftstick:b13,lefttrigger:a6,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000011050000,Xbox One Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000200b000011050000,Xbox One Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000005e040000200b000013050000,Xbox One Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000d102000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000dd02000000000000,Xbox One Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000e002000000000000,Xbox One Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Mac OS X,
@@ -975,8 +995,9 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c62400003a54000000000000,Xbox One PowerA Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 030000005e040000130b000001050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000005050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
-030000005e040000130b000009050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000005e040000130b000009050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000013050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000005e040000130b000015050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000120c0000100e000000010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000120c0000101e000000010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -1008,6 +1029,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00000751000000010000,8BitDo P30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:a8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000c82d00000851000000010000,8BitDo P30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:a8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000c82d00000660000011010000,8BitDo Pro 2,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
+03000000c82d00001030000011010000,8BitDo Pro 2,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 05000000c82d00000660000000010000,8BitDo Pro 2,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 03000000c82d00000131000011010000,8BitDo Receiver,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 03000000c82d00000231000011010000,8BitDo Receiver,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
@@ -1028,9 +1050,13 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00001290000011010000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b4,y:b3,platform:Linux,
 05000000c82d00000161000000010000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 05000000c82d00006228000000010000,8BitDo SN30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
-03000000c82d00000260000011010000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
-05000000c82d00000261000000010000,8BitDo SN30 Pro+,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
+03000000c82d00000260000011010000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
+05000000c82d00000261000000010000,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 05000000202800000900000000010000,8BitDo SNES30,a:b1,b:b0,back:b10,dpdown:b122,dpleft:b119,dpright:b120,dpup:b117,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Linux,
+03000000c82d00001130000011010000,8BitDo Ultimate Wired,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+03000000c82d00000760000011010000,8BitDo Ultimate Wireless,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
+03000000c82d00001230000011010000,8BitDo Ultimate Wireless,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+03000000c82d00001330000011010000,8BitDo Ultimate Wireless,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b26,paddle1:b23,paddle2:b19,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000a00500003232000001000000,8BitDo Zero,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Linux,
 05000000a00500003232000008010000,8BitDo Zero,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Linux,
 03000000c82d00001890000011010000,8BitDo Zero 2,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Linux,
@@ -1069,10 +1095,10 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c21100000791000011010000,Be1 GC101 Controller 1.03,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 03000000c31100000791000011010000,Be1 GC101 Controller 1.03,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 030000005e0400008e02000003030000,Be1 GC101 Xbox 360,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+03000000bc2000004d50000011010000,BEITONG A1T2 BFM,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000bc2000000055000001000000,BETOP AX1 BFM,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000bc2000006412000011010000,Betop Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b30,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 030000006b1400000209000011010000,Bigben,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
-03000000666600006706000000010000,Boom PSX to PC Converter,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b9,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b10,righttrigger:b5,rightx:a2,righty:a3,start:b11,x:b3,y:b0,platform:Linux,
 03000000120c0000200e000011010000,Brook Mars PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000120c0000210e000011010000,Brook Mars PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000120c0000f70e000011010000,Brook Universal Fighting Board,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,rightshoulder:b5,rightstick:b11,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
@@ -1090,6 +1116,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000430b00000300000000010000,EMS Production PS2 Adapter,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a5,righty:a2,start:b9,x:b3,y:b0,platform:Linux,
 03000000b40400001124000011010000,Flydigi Vader 2,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b12,lefttrigger:a5,leftx:a0,lefty:a1,paddle1:b2,paddle2:b5,paddle4:b17,rightshoulder:b7,rightstick:b13,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000151900004000000001000000,Flydigi Vader 2,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
+19000000030000000300000002030000,GameForce Controller,a:b1,b:b0,back:b8,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,guide:b16,leftshoulder:b4,leftstick:b14,lefttrigger:b6,leftx:a1,lefty:a0,rightshoulder:b5,rightstick:b15,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
 03000000ac0500005b05000010010000,GameSir G3w,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 03000000bc2000000055000011010000,GameSir G3w,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000558500001b06000010010000,GameSir G4 Pro,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
@@ -1099,6 +1126,9 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000006f0e00000104000000010000,Gamestop Logic3 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000008f0e00000800000010010000,Gasia PlayStation Gamepad,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 03000000451300000010000010010000,Genius Maxfire Grandias 12,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
+190000004b4800000010000000010000,GO-Advance Controller,a:b1,b:b0,back:b10,dpdown:b7,dpleft:b8,dpright:b9,dpup:b6,leftshoulder:b4,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b13,start:b15,x:b2,y:b3,platform:Linux,
+190000004b4800000010000001010000,GO-Advance Controller,a:b1,b:b0,back:b12,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,leftshoulder:b4,leftstick:b13,lefttrigger:b14,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b16,righttrigger:b15,start:b17,x:b2,y:b3,platform:Linux,
+190000004b4800000011000000010000,GO-Super Controller,a:b1,b:b0,back:b12,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b16,leftshoulder:b4,leftstick:b14,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b15,righttrigger:b7,rightx:a2,righty:a3,start:b13,x:b2,y:b3,platform:Linux,
 03000000f0250000c183000010010000,Goodbetterbest Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 0300000079000000d418000000010000,GPD Win 2 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000007d0400000540000000010000,Gravis Eliminator Pro,a:b1,b:b2,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
@@ -1113,6 +1143,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000632500002605000010010000,HJDX,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a5,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 030000000d0f00000d00000000010000,Hori,a:b0,b:b6,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b3,rightshoulder:b7,start:b9,x:b1,y:b2,platform:Linux,
 030000000d0f00006d00000020010000,Hori EDGE 301,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:+a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:+a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+030000000d0f00008400000011010000,Hori Fighting Commander,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 030000000d0f00005f00000011010000,Hori Fighting Commander 4 PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 030000000d0f00005e00000011010000,Hori Fighting Commander 4 PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 030000000d0f00005001000009040000,Hori Fighting Commander OCTA Xbox One,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1139,7 +1170,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000000d0f0000c100000011010000,Horipad S,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b13,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 050000000d0f0000f600000001000000,Horipad Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 03000000341a000005f7000010010000,HuiJia GameCube Controller Adapter,a:b1,b:b2,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b0,y:b3,platform:Linux,
-030000008f0e00001330000010010000,HuiJia SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b9,x:b3,y:b0,platform:Linux,
+05000000242e00000b20000001000000,Hyperkin Admiral N64 Controller,+rightx:b11,+righty:b13,-rightx:b8,-righty:b12,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b14,leftx:a0,lefty:a1,rightshoulder:b5,start:b9,platform:Linux,
+03000000242e0000ff0b000011010000,Hyperkin N64 Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,platform:Linux,
 03000000242e00008816000001010000,Hyperkin X91,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000830500006020000010010000,iBuffalo SNES Controller,a:b1,b:b0,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b7,x:b3,y:b2,platform:Linux,
 050000006964726f69643a636f6e0000,idroidcon Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
@@ -1191,6 +1223,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000380700003847000090040000,Mad Catz Xbox 360 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 03000000ad1b000016f0000090040000,Mad Catz Xbox 360 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000120c00000500000000010000,Manta Dualshock 2,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
+030000008f0e00001330000010010000,Mayflash Controller Adapter,a:b1,b:b2,back:b8,dpdown:h0.8,dpleft:h0.2,dpright:h0.1,dpup:h0.4,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a3~,righty:a2,start:b9,x:b0,y:b3,platform:Linux,
 03000000790000004318000010010000,Mayflash GameCube Adapter,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
 03000000790000004418000010010000,Mayflash GameCube Controller,a:b1,b:b0,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
 03000000242f00007300000011010000,Mayflash Magic NS,a:b1,b:b4,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b0,y:b3,platform:Linux,
@@ -1240,6 +1273,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000790000004518000010010000,Nexilux GameCube Controller Adapter,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
 030000001008000001e5000010010000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,righttrigger:b6,start:b9,x:b3,y:b0,platform:Linux,
 060000007e0500003713000000000000,Nintendo 3DS,a:b0,b:b1,back:b8,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Linux,
+030000009b2800008000000020020000,Nintendo Classic Controller,a:b1,b:b4,back:b2,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Linux,
 030000007e0500003703000000016800,Nintendo GameCube Controller,a:b0,b:b2,dpdown:b6,dpleft:b4,dpright:b5,dpup:b7,lefttrigger:a4,leftx:a0,lefty:a1~,rightshoulder:b9,righttrigger:a5,rightx:a2,righty:a3~,start:b8,x:b1,y:b3,platform:Linux,
 03000000790000004618000010010000,Nintendo GameCube Controller Adapter,a:b1,b:b0,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b5,rightx:a5~,righty:a2~,start:b9,x:b2,y:b3,platform:Linux,
 060000007e0500000620000000000000,Nintendo Switch Combined Joy-Cons,a:b0,b:b1,back:b9,dpdown:b15,dpleft:b16,dpright:b17,dpup:b14,guide:b11,leftshoulder:b5,leftstick:b12,lefttrigger:b7,leftx:a0,lefty:a1,misc1:b4,rightshoulder:b6,rightstick:b13,righttrigger:b8,rightx:a2,righty:a3,start:b10,x:b3,y:b2,platform:Linux,
@@ -1268,7 +1302,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000362800000100000003010000,OUYA Controller,a:b0,b:b3,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b14,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,x:b1,y:b2,platform:Linux,
 05000000362800000100000004010000,OUYA Controller,a:b0,b:b3,back:b14,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,leftshoulder:b4,leftstick:b6,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:b13,rightx:a3,righty:a4,start:b16,x:b1,y:b2,platform:Linux,
 03000000830500005020000010010000,Padix Rockfire PlayStation Bridge,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3,start:b11,x:b2,y:b3,platform:Linux,
-03000000790000001c18000011010000,PC Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 03000000ff1100003133000010010000,PC Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 030000006f0e0000b802000001010000,PDP Afterglow Xbox One Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000006f0e0000b802000013020000,PDP Afterglow Xbox One Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1283,6 +1316,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000006f0e00000901000011010000,PDP Versus Fighting,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
 030000006f0e0000a802000023020000,PDP Xbox One Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 030000006f0e0000a702000023020000,PDP Xbox One Raven Black,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+03000000666600006706000000010000,PlayStation Adapter,a:b2,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b9,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b10,righttrigger:b5,rightx:a2,righty:a3,start:b11,x:b3,y:b0,platform:Linux,
 030000004c050000da0c000011010000,PlayStation Controller,a:b2,b:b1,back:b8,leftshoulder:b6,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b5,start:b9,x:b3,y:b0,platform:Linux,
 03000000d9040000160f000000010000,PlayStation Controller Adapter,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b3,y:b0,platform:Linux,
 030000004c0500003713000011010000,PlayStation Vita,a:b1,b:b2,back:b8,dpdown:b13,dpleft:b15,dpright:b14,dpup:b12,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a3,righty:a4,start:b9,x:b0,y:b3,platform:Linux,
@@ -1326,10 +1360,12 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 050000004c050000cc09000000010000,PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 050000004c050000cc09000000810000,PS4 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:Linux,
 050000004c050000cc09000001800000,PS4 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:Linux,
-030000004c050000e60c000011010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
+030000004c050000e60c000011010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
 030000004c050000e60c000011810000,PS5 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:Linux,
+030000004c050000f20d000011010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b14,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
 050000004c050000e60c000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
 050000004c050000e60c000000810000,PS5 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:Linux,
+050000004c050000f20d000000010000,PS5 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,touchpad:b13,x:b0,y:b3,platform:Linux,
 03000000300f00001211000011010000,Qanba Arcade Joystick,a:b2,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b5,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b6,start:b9,x:b1,y:b3,platform:Linux,
 03000000222c00000225000011010000,Qanba Dragon Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000222c00000025000011010000,Qanba Dragon Arcade Joystick (PS4),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
@@ -1341,7 +1377,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000009b2800003200000001010000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Linux,
 030000009b2800006000000001010000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Linux,
 030000008916000001fd000024010000,Razer Onza Classic Edition,a:b0,b:b1,back:b6,dpdown:b14,dpleft:b11,dpright:b12,dpup:b13,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
-030000008916000000fd000024010000,Razer Onza Tournament Edition,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000321500000204000011010000,Razer Panthera PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000321500000104000011010000,Razer Panthera PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000321500000810000011010000,Razer Panthera PS4 Evo Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b13,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
@@ -1356,9 +1391,11 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 050000003215000000090000163a0000,Razer Serval,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a4,rightx:a2,righty:a3,start:b7,x:b2,y:b3,platform:Linux,
 0300000032150000030a000001010000,Razer Wildcat,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000790000001100000010010000,Retro Controller,a:b1,b:b2,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,lefttrigger:b7,rightshoulder:b4,righttrigger:b5,start:b9,x:b0,y:b3,platform:Linux,
+190000004b4800000111000000010000,RetroGame Joypad,a:b1,b:b0,back:b8,dpdown:b14,dpleft:b15,dpright:b16,dpup:b13,leftshoulder:b4,leftstick:b11,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 0300000081170000990a000001010000,Retronic Adapter,a:b0,leftx:a0,lefty:a1,platform:Linux,
 0300000000f000000300000000010000,RetroPad,a:b1,b:b5,back:b2,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Linux,
 00000000526574726f53746f6e653200,RetroStone 2 Controller,a:b1,b:b0,back:b10,dpdown:b15,dpleft:b16,dpright:b17,dpup:b14,leftshoulder:b6,lefttrigger:b8,rightshoulder:b7,righttrigger:b9,start:b11,x:b4,y:b3,platform:Linux,
+03000000341200000400000000010000,RetroUSB N64 RetroPort,+rightx:b8,+righty:b10,-rightx:b9,-righty:b11,a:b7,b:b6,dpdown:b2,dpleft:b1,dpright:b0,dpup:b3,leftshoulder:b13,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b12,start:b4,platform:Linux,
 030000006b140000010d000011010000,Revolution Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 030000006b140000130d000011010000,Revolution Pro Controller 3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 030000006f0e00001f01000000010000,Rock Candy,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1414,9 +1451,11 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000666600000488000000010000,Super Joy Box 5 Pro,a:b2,b:b1,back:b9,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Linux,
 0300000000f00000f100000000010000,Super RetroPort,a:b1,b:b5,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Linux,
 030000008f0e00000d31000010010000,SZMY Power 3 Turbo,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
+03000000457500000401000011010000,SZMY Power DS4 Wired Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000457500002211000010010000,SZMY Power Gamepad,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 030000008f0e00001431000010010000,SZMY Power PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
 03000000ba2200000701000001010000,Technology Innovation PS2 Adapter,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a5,righty:a2,start:b9,x:b3,y:b2,platform:Linux,
+03000000790000001c18000011010000,TGZ Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 030000004f04000015b3000001010000,Thrustmaster Dual Analog 3.2,a:b0,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b1,y:b3,platform:Linux,
 030000004f04000015b3000010010000,Thrustmaster Dual Analog 4,a:b0,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b1,y:b3,platform:Linux,
 030000004f04000020b3000010010000,Thrustmaster Dual Trigger,a:b0,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b1,y:b3,platform:Linux,
@@ -1449,6 +1488,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000006f0e00000302000011010000,Victrix Pro Fightstick PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
 030000006f0e00000702000011010000,Victrix Pro Fightstick PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
 05000000ac0500003232000001000000,VR Box Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
+05000000434f4d4d414e440000000000,VX Gaming Command Series,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 0000000058626f782033363020576900,Xbox 360 Controller,a:b0,b:b1,back:b14,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,guide:b7,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b6,x:b2,y:b3,platform:Linux,
 030000005e0400001907000000010000,Xbox 360 Controller,a:b0,b:b1,back:b6,dpdown:b14,dpleft:b11,dpright:b12,dpup:b13,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000005e0400008e02000010010000,Xbox 360 Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1486,8 +1526,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 050000005e040000130b000013050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 060000005e040000120b00000b050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 030000005e040000120b000007050000,Xbox Series X Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,misc1:b11,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+050000005e040000130b000007050000,Xbox Series X Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 050000005e040000130b000011050000,Xbox Series X Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
-050000005e040000130b000007050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 050000005e040000200b000013050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000450c00002043000010010000,XEOX SL6556 BK,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 05000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b20,leftshoulder:b6,leftstick:b13,lefttrigger:a7,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Linux,
@@ -1551,13 +1591,13 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 65323563303231646531383162646335,8BitDo SN30,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftx:a0,lefty:a1,rightshoulder:b10,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 35383531346263653330306238353131,8BitDo SN30 PP,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 05000000c82d000001600000ffff3f00,8BitDo SN30 Pro,a:b1,b:b0,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
+05000000c82d000002600000ffff0f00,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b17,leftshoulder:b9,leftstick:b7,lefttrigger:b15,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b16,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 36653638656632326235346264663661,8BitDo SN30 Pro Plus,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b19,y:b2,platform:Android,
 38303232393133383836366330346462,8BitDo SN30 Pro Plus,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b17,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b18,righttrigger:a5,rightx:a3,righty:a4,start:b10,x:b19,y:b2,platform:Android,
 38346630346135363335366265656666,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 38426974446f20534e33302050726f2b,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b19,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 536f6e7920436f6d707574657220456e,8BitDo SN30 Pro Plus,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 66306331643531333230306437353936,8BitDo SN30 Pro Plus,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
-05000000c82d000002600000ffff0f00,8BitDo SN30 Pro+,a:b1,b:b0,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b17,leftshoulder:b9,leftstick:b7,lefttrigger:b15,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b16,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 050000002028000009000000ffff3f00,8BitDo SNES30,a:b1,b:b0,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b3,y:b2,platform:Android,
 050000003512000020ab000000780f00,8BitDo SNES30,a:b21,b:b20,back:b30,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b26,rightshoulder:b27,start:b31,x:b24,y:b23,platform:Android,
 33666663316164653937326237613331,8BitDo Zero,a:b0,b:b1,back:b15,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b9,rightshoulder:b10,start:b6,x:b2,y:b3,platform:Android,
@@ -1580,7 +1620,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 34323662653333636330306631326233,Google Nexus,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 35383633353935396534393230616564,Google Stadia Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 05000000d6020000e5890000dfff3f00,GPD XD Plus,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a4,rightx:a2,righty:a5,start:b6,x:b2,y:b3,platform:Android,
-0500000031366332860c44aadfff0f00,GS Gamepad,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:b15,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b16,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 66633030656131663837396562323935,Hori Battle,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a3,righty:a4,start:b6,x:b2,y:b3,platform:Android,
 35623466343433653739346434636330,Hori Fighting Commander 3 Pro,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b0,y:b2,platform:Android,
 484f524920434f2e2c4c54442e203130,Hori Fighting Commander 3 Pro,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b20,lefttrigger:b10,leftx:a0,lefty:a1,rightshoulder:b3,righttrigger:b9,rightx:a2,righty:a3,start:b18,x:b0,y:b2,platform:Android,
@@ -1588,6 +1627,10 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 65656436646661313232656661616130,Hori PC Engine Mini Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,start:b18,platform:Android,
 31303433326562636431653534636633,Hori Real Arcade Pro 3,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b0,y:b2,platform:Android,
 30306539356238653637313730656134,HORIPAD Switch Pro Controller,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b19,y:b2,platform:Android,
+48797065726b696e2050616400000000,Hyperkin Admiral N64 Controller,+rightx:b6,+righty:b7,-rightx:b17,-righty:b5,a:b1,b:b0,leftshoulder:b3,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b20,start:b18,platform:Android,
+62333331353131353034386136626636,Hyperkin Admiral N64 Controller,+rightx:b6,+righty:b7,-rightx:b17,-righty:b5,a:b1,b:b0,leftshoulder:b3,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b20,start:b18,platform:Android,
+31306635363562663834633739396333,Hyperkin N64 Adapter,a:b1,b:b19,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightx:a2,righty:a3,start:b18,platform:Android,
+5368616e57616e202020202048797065,Hyperkin N64 Adapter,a:b1,b:b19,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightx:a2,righty:a3,start:b18,platform:Android,
 0500000083050000602000000ffe0000,iBuffalo SNES Controller,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b15,rightshoulder:b16,start:b10,x:b2,y:b3,platform:Android,
 64306137363261396266353433303531,InterAct GoPad,a:b24,b:b25,leftshoulder:b23,lefttrigger:b27,leftx:a0,lefty:a1,rightshoulder:b26,righttrigger:b28,x:b21,y:b22,platform:Android,
 532e542e442e20496e74657261637420,InterAct HammerHead FX,a:b23,b:b24,back:b30,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b26,leftstick:b22,lefttrigger:b28,leftx:a0,lefty:a1,rightshoulder:b27,rightstick:b25,righttrigger:b29,rightx:a2,righty:a3,start:b31,x:b20,y:b21,platform:Android,
@@ -1604,6 +1647,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 35623364393661626231343866613337,Logitech F710,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 4c6f6769746563682047616d65706164,Logitech F710,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 64396331333230326333313330336533,Logitech F710,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
+39653365373864633935383236363438,Logitech G Cloud,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4~,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5~,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 416d617a6f6e2047616d6520436f6e74,Luna Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a3,righty:a4,start:b6,x:b2,y:b3,platform:Android,
 4c756e612047616d6570616400000000,Luna Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 30363066623539323534363639323363,Magic NS,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b0,y:b2,platform:Android,
@@ -1642,6 +1686,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 4f5559412047616d6520436f6e74726f,OUYA Controller,a:b0,b:b2,dpdown:b18,dpleft:b15,dpright:b6,dpup:b17,leftshoulder:b3,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,x:b1,y:b19,platform:Android,
 506572666f726d616e63652044657369,PDP PS3 Rock Candy Controller,a:b1,b:b17,back:h0.2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b4,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b18,rightstick:b6,righttrigger:b10,rightx:a2,righty:a3,start:b16,x:b0,y:b2,platform:Android,
 62653335326261303663356263626339,PlayStation Classic Controller,a:b19,b:b1,back:b17,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b9,lefttrigger:b3,rightshoulder:b10,righttrigger:b20,start:b18,x:b2,y:b0,platform:Android,
+536f6e7920496e746572616374697665,PlayStation Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,misc1:b8,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
+576972656c65737320436f6e74726f6c,PlayStation Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 61653962353232366130326530363061,Pokken,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,rightshoulder:b20,righttrigger:b10,start:b18,x:b0,y:b2,platform:Android,
 32666633663735353234363064386132,PS2,a:b23,b:b22,back:b29,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b27,lefttrigger:b25,leftx:a0,lefty:a1,rightshoulder:b28,righttrigger:b26,rightx:a3,righty:a2,start:b30,x:b24,y:b21,platform:Android,
 050000004c05000068020000dfff3f00,PS3 Controller,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
@@ -1661,20 +1707,21 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 34613139376634626133336530386430,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 35643031303033326130316330353564,PS4 Controller,a:b1,b:b17,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b3,leftstick:b4,lefttrigger:+a3,leftx:a0,lefty:a1,rightshoulder:b18,rightstick:b6,righttrigger:+a4,rightx:a2,righty:a5,start:b16,x:b0,y:b2,platform:Android,
 37626233336235343937333961353732,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
+37626464343430636562316661643863,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 38393161636261653636653532386639,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
-536f6e7920496e746572616374697665,PS4 Controller,a:b0,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
-576972656c65737320436f6e74726f6c,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 63313733393535663339656564343962,PS4 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 63393662363836383439353064663939,PS4 Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 65366465656364636137653363376531,PS4 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a3,start:b18,x:b0,y:b2,platform:Android,
 66613532303965383534396638613230,PS4 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a5,start:b18,x:b0,y:b2,platform:Android,
 050000004c050000e60c0000fffe3f00,PS5 Controller,a:b1,b:b17,back:b15,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b5,leftshoulder:b3,leftstick:b4,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b18,rightstick:b6,righttrigger:a4,rightx:a2,righty:a5,start:b16,x:b0,y:b2,platform:Android,
+050000004c050000e60c0000ffff3f00,PS5 Controller,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 32346465346533616263386539323932,PS5 Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 32633532643734376632656664383733,PS5 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a5,start:b18,x:b0,y:b2,platform:Android,
 37363764353731323963323639666565,PS5 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a5,start:b18,x:b0,y:b2,platform:Android,
-61303162353165316365336436343139,PS5 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a5,start:b18,x:b0,y:b2,platform:Android,
+61303162353165316365336436343139,PS5 Controller,a:b1,b:b19,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,leftstick:b15,lefttrigger:b9,leftx:a0,lefty:a1,misc1:b8,rightshoulder:b20,rightstick:b6,righttrigger:b10,rightx:a2,righty:a5,start:b18,x:b0,y:b2,platform:Android,
 64336263393933626535303339616332,Qanba 4RAF,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b20,lefttrigger:b10,leftx:a0,lefty:a1,rightshoulder:b3,righttrigger:b9,rightx:a2,righty:a3,start:b18,x:b19,y:b2,platform:Android,
 36626666353861663864336130363137,Razer Junglecat,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
+05000000f8270000bf0b0000ffff3f00,Razer Kishi,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 62653861643333663663383332396665,Razer Kishi,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 050000003215000005070000ffff3f00,Razer Raiju Mobile,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 050000003215000007070000ffff3f00,Razer Raiju Mobile,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
@@ -1686,6 +1733,10 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 61343739353764363165343237303336,Retro Controller,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b17,lefttrigger:b18,leftx:a0,lefty:a1,start:b10,x:b2,y:b3,platform:Android,
 38653130373365613538333235303036,Retroid Pocket 2,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 64363363336633363736393038313463,Retrolink,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,start:b6,platform:Android,
+37393234373533633333323633646531,RetroUSB N64 RetroPort,+rightx:b17,+righty:b15,-rightx:b18,-righty:b6,a:b10,b:b9,dpdown:b19,dpleft:b1,dpright:b0,dpup:b2,leftshoulder:b7,lefttrigger:b20,leftx:a0,lefty:a1,rightshoulder:b5,start:b3,platform:Android,
+5365616c6965436f6d707574696e6720,RetroUSB N64 RetroPort,+rightx:b17,+righty:b15,-rightx:b18,-righty:b6,a:b10,b:b9,dpdown:b19,dpleft:b1,dpright:b0,dpup:b2,leftshoulder:b7,lefttrigger:b20,leftx:a0,lefty:a1,rightshoulder:b5,start:b3,platform:Android,
+526574726f5553422e636f6d20534e45,RetroUSB SNES RetroPort,a:b1,b:b20,back:b19,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b9,rightshoulder:b10,start:b2,x:b0,y:b3,platform:Android,
+64643037633038386238303966376137,RetroUSB SNES RetroPort,a:b1,b:b20,back:b19,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b9,rightshoulder:b10,start:b2,x:b0,y:b3,platform:Android,
 33373336396634316434323337666361,RumblePad 2,a:b22,b:b23,back:b29,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b25,lefttrigger:b27,leftx:a0,lefty:a1,rightshoulder:b26,righttrigger:b28,rightx:a2,righty:a3,start:b30,x:b21,y:b24,platform:Android,
 66386565396238363534313863353065,Sanwa PlayOnline Mobile,a:b21,b:b22,back:b23,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,start:b24,platform:Android,
 32383165316333383766336338373261,Saturn,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,lefttrigger:b10,leftx:a0,lefty:a1,rightshoulder:a4,righttrigger:a5,x:b2,y:b3,platform:Android,
@@ -1706,6 +1757,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000de2800000611000001000000,Steam Controller,a:b0,b:b1,back:b6,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:a3,start:b7,x:b2,y:b3,platform:Android,
 0500000011010000201400000f7e0f00,SteelSeries Nimbus,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b3,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b20,righttrigger:b10,rightx:a2,righty:a3,x:b19,y:b2,platform:Android,
 35306436396437373135383665646464,SteelSeries Nimbus Plus,a:b0,b:b1,leftshoulder:b3,leftstick:b17,lefttrigger:b9,leftx:a0,rightshoulder:b20,rightstick:b18,righttrigger:b10,rightx:a2,x:b19,y:b2,platform:Android,
+54475a20436f6e74726f6c6c65720000,TGZ Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
+62363434353532386238336663643836,TGZ Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 050000004f0400000ed00000fffe3f00,ThrustMaster eSwap Pro Controller,a:b0,b:b1,back:b4,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
 5477696e20555342204a6f7973746963,Twin Joystick,a:b22,b:b21,back:b28,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b26,leftstick:b30,lefttrigger:b24,leftx:a0,lefty:a1,rightshoulder:b27,rightstick:b31,righttrigger:b25,rightx:a3,righty:a2,start:b29,x:b23,y:b20,platform:Android,
 30623739343039643830333266346439,Valve Steam Controller,a:b0,b:b1,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,leftx:a0,lefty:a1,paddle1:b24,paddle2:b23,rightshoulder:b10,rightstick:b8,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
@@ -1749,9 +1802,12 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000ac050000020000004f066d02,*,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b6,leftshoulder:b4,rightshoulder:b5,x:b2,y:b3,platform:iOS,
 4d466947616d65706164010000000000,MFi Extended Gamepad,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:a5,rightx:a3,righty:a4,start:b6,x:b2,y:b3,platform:iOS,
 4d466947616d65706164020000000000,MFi Gamepad,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,rightshoulder:b5,start:b6,x:b2,y:b3,platform:iOS,
+050000007e050000062000000f060000,Nintendo Switch Joy-Con (L),+leftx:h0.2,+lefty:h0.4,-leftx:h0.8,-lefty:h0.1,a:b0,b:b2,leftshoulder:b4,rightshoulder:b5,x:b1,y:b3,platform:iOS,
 050000007e050000062000004f060000,Nintendo Switch Joy-Con (L),+leftx:h0.1,+lefty:h0.2,-leftx:h0.4,-lefty:h0.8,dpdown:b2,dpleft:b0,dpright:b3,dpup:b1,leftshoulder:b4,misc1:b6,rightshoulder:b5,platform:iOS,
+050000007e05000008200000df070000,Nintendo Switch Joy-Con (L/R),a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b2,y:b3,platform:iOS,
 050000007e0500000e200000df070000,Nintendo Switch Joy-Con (L/R),a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:iOS,
 050000007e050000072000004f060000,Nintendo Switch Joy-Con (R),+rightx:h0.4,+righty:h0.8,-rightx:h0.1,-righty:h0.2,a:b1,b:b0,guide:b6,leftshoulder:b4,rightshoulder:b5,x:b3,y:b2,platform:iOS,
+050000007e05000009200000df870000,Nintendo Switch Pro Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,misc1:b10,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b2,platform:iOS,
 050000007e05000009200000ff870000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,misc1:b11,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b10,x:b2,y:b3,platform:iOS,
 050000004c050000cc090000df070000,PS4 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b2,y:b3,platform:iOS,
 050000004c050000cc090000df870001,PS4 Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b2,y:b3,platform:iOS,

+ 1 - 0
main/godotcontrollerdb.txt

@@ -32,6 +32,7 @@ Linux046dc216,046d-c216-Logitech Logitech Dual Action,a:b1,b:b2,y:b3,x:b0,start:
 Linux20d6a713,Bensussen Deutsch & Associates Inc.(BDA) NSW Wired controller,a:b1,b:b2,y:b3,x:b0,start:b9,back:b8,leftstick:b10,rightstick:b11,leftshoulder:b4,rightshoulder:b5,dpup:-a5,dpleft:-a4,dpdown:+a5,dpright:+a4,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:b6,righttrigger:b7,platform:Javascript
 Linux054c05c4,Sony Computer Entertainment Wireless Controller,a:b0,b:b1,y:b2,x:b3,start:b9,back:b8,leftstick:b11,rightstick:b12,leftshoulder:b4,rightshoulder:b5,dpup:-a7,dpleft:-a6,dpdown:+a7,dpright:+a6,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,platform:Javascript
 Linux18d19400,18d1-9400-Google LLC Stadia Controller rev. A,a:b0,b:b1,y:b3,x:b2,start:b7,back:b6,leftstick:b9,rightstick:b10,leftshoulder:b4,rightshoulder:b5,dpup:-a7,dpleft:-a6,dpdown:+a7,dpright:+a6,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:a5,righttrigger:a4,platform:Javascript
+Linux054c0268,054c-0268-Sony PLAYSTATION(R)3 Controller,a:b0,b:b1,y:b2,x:b3,start:b9,back:b8,leftstick:b11,rightstick:b12,leftshoulder:b4,rightshoulder:b5,dpup:b13,dpleft:b15,dpdown:b14,dpright:b16,leftx:a0,lefty:a1,rightx:a3,righty:a4,lefttrigger:a2,righttrigger:a5,platform:Web
 
 # UWP
 __UWP_GAMEPAD__,Xbox Controller,a:b2,b:b3,x:b4,y:b5,start:b0,back:b1,leftstick:b12,rightstick:b13,leftshoulder:b10,rightshoulder:b11,dpup:b6,dpdown:b7,dpleft:b8,dpright:b9,leftx:a0,lefty:a1,rightx:a2,righty:a3,lefttrigger:a4,righttrigger:a5,platform:UWP,

+ 21 - 10
methods.py

@@ -46,17 +46,28 @@ def disable_warnings(self):
     if self.msvc:
         # We have to remove existing warning level defines before appending /w,
         # otherwise we get: "warning D9025 : overriding '/W3' with '/w'"
-        warn_flags = ["/Wall", "/W4", "/W3", "/W2", "/W1", "/WX"]
-        self.Append(CCFLAGS=["/w"])
-        self.Append(CFLAGS=["/w"])
-        self.Append(CXXFLAGS=["/w"])
-        self["CCFLAGS"] = [x for x in self["CCFLAGS"] if not x in warn_flags]
-        self["CFLAGS"] = [x for x in self["CFLAGS"] if not x in warn_flags]
-        self["CXXFLAGS"] = [x for x in self["CXXFLAGS"] if not x in warn_flags]
+        self["CCFLAGS"] = [x for x in self["CCFLAGS"] if not (x.startswith("/W") or x.startswith("/w"))]
+        self["CFLAGS"] = [x for x in self["CFLAGS"] if not (x.startswith("/W") or x.startswith("/w"))]
+        self["CXXFLAGS"] = [x for x in self["CXXFLAGS"] if not (x.startswith("/W") or x.startswith("/w"))]
+        self.AppendUnique(CCFLAGS=["/w"])
     else:
-        self.Append(CCFLAGS=["-w"])
-        self.Append(CFLAGS=["-w"])
-        self.Append(CXXFLAGS=["-w"])
+        self.AppendUnique(CCFLAGS=["-w"])
+
+
+def force_optimization_on_debug(self):
+    # 'self' is the environment
+    if self["target"] != "debug":
+        return
+
+    if self.msvc:
+        # We have to remove existing optimization level defines before appending /O2,
+        # otherwise we get: "warning D9025 : overriding '/0d' with '/02'"
+        self["CCFLAGS"] = [x for x in self["CCFLAGS"] if not x.startswith("/O")]
+        self["CFLAGS"] = [x for x in self["CFLAGS"] if not x.startswith("/O")]
+        self["CXXFLAGS"] = [x for x in self["CXXFLAGS"] if not x.startswith("/O")]
+        self.AppendUnique(CCFLAGS=["/O2"])
+    else:
+        self.AppendUnique(CCFLAGS=["-O3"])
 
 
 def add_module_version_string(self, s):

+ 0 - 1
misc/dist/ios_xcode/godot_ios.xcodeproj/project.pbxproj

@@ -241,7 +241,6 @@
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
 					"$(inherited)",
 				);
 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;

+ 1 - 1
modules/denoise/SCsub

@@ -103,9 +103,9 @@ env_oidn.Append(
         "__STDC_LIMIT_MACROS",
         "DISABLE_VERBOSE",
         "MKLDNN_ENABLE_CONCURRENT_EXEC",
-        "NDEBUG",
     ]
 )
+env_oidn.AppendUnique(CPPDEFINES=["NDEBUG"])  # No assert() even in debug builds.
 
 env_thirdparty = env_oidn.Clone()
 env_thirdparty.disable_warnings()

+ 6 - 1
modules/raycast/SCsub

@@ -55,12 +55,16 @@ if env["builtin_embree"]:
         "kernels/bvh/bvh_builder_sah_mb.cpp",
         "kernels/bvh/bvh_builder_twolevel.cpp",
         "kernels/bvh/bvh_intersector1_bvh4.cpp",
+        "kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
+        "kernels/bvh/bvh_intersector_stream_bvh4.cpp",
+        "kernels/bvh/bvh_intersector_stream_filters.cpp",
     ]
 
     thirdparty_sources = [thirdparty_dir + file for file in embree_src]
 
     env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"])
-    env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL", "NDEBUG"])
+    env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
+    env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"])  # No assert() even in debug builds.
 
     if not env.msvc:
         if env["arch"] in ["x86", "x86_64"]:
@@ -76,6 +80,7 @@ if env["builtin_embree"]:
             env.Append(LIBS=["psapi"])
 
     env_thirdparty = env_raycast.Clone()
+    env_thirdparty.force_optimization_on_debug()
     env_thirdparty.disable_warnings()
     env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
 

+ 25 - 11
modules/raycast/godot_update_embree.py

@@ -1,5 +1,7 @@
 import glob, os, shutil, subprocess, re
 
+git_tag = "v3.13.5"
+
 include_dirs = [
     "common/tasking",
     "kernels/bvh",
@@ -12,6 +14,7 @@ include_dirs = [
     "common/lexers",
     "common/simd",
     "common/simd/arm",
+    "common/simd/wasm",
     "include/embree3",
     "kernels/subdiv",
     "kernels/geometry",
@@ -61,6 +64,11 @@ cpp_files = [
     "kernels/bvh/bvh_builder_twolevel.cpp",
     "kernels/bvh/bvh_intersector1.cpp",
     "kernels/bvh/bvh_intersector1_bvh4.cpp",
+    "kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
+    "kernels/bvh/bvh_intersector_stream_bvh4.cpp",
+    "kernels/bvh/bvh_intersector_stream_filters.cpp",
+    "kernels/bvh/bvh_intersector_hybrid.cpp",
+    "kernels/bvh/bvh_intersector_stream.cpp",
 ]
 
 os.chdir("../../thirdparty")
@@ -71,6 +79,7 @@ if os.path.exists(dir_name):
 
 subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"])
 os.chdir("embree-tmp")
+subprocess.run(["git", "checkout", git_tag])
 
 commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
 
@@ -89,8 +98,7 @@ for f in all_files:
 
 with open(os.path.join(dest_dir, "kernels/hash.h"), "w") as hash_file:
     hash_file.write(
-        f"""
-// Copyright 2009-2020 Intel Corporation
+        f"""// Copyright 2009-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #define RTC_HASH "{commit_hash}"
@@ -99,8 +107,7 @@ with open(os.path.join(dest_dir, "kernels/hash.h"), "w") as hash_file:
 
 with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
     config_file.write(
-        """
-// Copyright 2009-2020 Intel Corporation
+        """// Copyright 2009-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 /* #undef EMBREE_RAY_MASK */
@@ -117,10 +124,11 @@ with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
 /* #undef EMBREE_GEOMETRY_INSTANCE */
 /* #undef EMBREE_GEOMETRY_GRID */
 /* #undef EMBREE_GEOMETRY_POINT */
-/* #undef EMBREE_RAY_PACKETS */
+#define EMBREE_RAY_PACKETS
 /* #undef EMBREE_COMPACT_POLYS */
 
 #define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
+#define EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE
 
 #if defined(EMBREE_GEOMETRY_TRIANGLE)
   #define IF_ENABLED_TRIS(x) x
@@ -187,8 +195,7 @@ with open("CMakeLists.txt", "r") as cmake_file:
 
 with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as config_file:
     config_file.write(
-        f"""
-// Copyright 2009-2021 Intel Corporation
+        f"""// Copyright 2009-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
@@ -204,14 +211,16 @@ with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as con
 #define EMBREE_MIN_WIDTH 0
 #define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
 
-#define EMBREE_STATIC_LIB
-/* #undef EMBREE_API_NAMESPACE */
+#if !defined(EMBREE_STATIC_LIB)
+#   define EMBREE_STATIC_LIB
+#endif
+/* #undef EMBREE_API_NAMESPACE*/
 
 #if defined(EMBREE_API_NAMESPACE)
 #  define RTC_NAMESPACE
-#  define RTC_NAMESPACE_BEGIN namespace  {{
+#  define RTC_NAMESPACE_BEGIN namespace {{
 #  define RTC_NAMESPACE_END }}
-#  define RTC_NAMESPACE_USE using namespace ;
+#  define RTC_NAMESPACE_USE using namespace;
 #  define RTC_API_EXTERN_C
 #  undef EMBREE_API_NAMESPACE
 #else
@@ -249,3 +258,8 @@ with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as con
 
 os.chdir("..")
 shutil.rmtree("embree-tmp")
+
+subprocess.run(["git", "restore", "embree/patches"])
+
+for patch in os.listdir("embree/patches"):
+    subprocess.run(["git", "apply", "embree/patches/" + patch])

+ 2 - 2
modules/upnp/SCsub

@@ -26,9 +26,9 @@ if env["builtin_miniupnpc"]:
         "receivedata.c",
         "addr_is_reserved.c",
     ]
-    thirdparty_sources = [thirdparty_dir + "miniupnpc/" + file for file in thirdparty_sources]
+    thirdparty_sources = [thirdparty_dir + "src/" + file for file in thirdparty_sources]
 
-    env_upnp.Prepend(CPPPATH=[thirdparty_dir])
+    env_upnp.Prepend(CPPPATH=[thirdparty_dir + "include"])
     env_upnp.Append(CPPDEFINES=["MINIUPNP_STATICLIB"])
     env_upnp.Append(CPPDEFINES=["MINIUPNPC_SET_SOCKET_TIMEOUT"])
 

+ 2 - 2
modules/upnp/upnp.cpp

@@ -30,8 +30,8 @@
 
 #include "upnp.h"
 
-#include <miniupnpc/miniwget.h>
-#include <miniupnpc/upnpcommands.h>
+#include <miniwget.h>
+#include <upnpcommands.h>
 
 #include <stdlib.h>
 

+ 1 - 1
modules/upnp/upnp.h

@@ -35,7 +35,7 @@
 
 #include "upnp_device.h"
 
-#include <miniupnpc/miniupnpc.h>
+#include <miniupnpc.h>
 
 class UPNP : public Reference {
 	GDCLASS(UPNP, Reference);

+ 1 - 1
modules/upnp/upnp_device.cpp

@@ -32,7 +32,7 @@
 
 #include "upnp.h"
 
-#include <miniupnpc/upnpcommands.h>
+#include <upnpcommands.h>
 
 String UPNPDevice::query_external_address() const {
 	ERR_FAIL_COND_V(!is_valid_gateway(), "");

+ 1 - 4
platform/android/detect.py

@@ -129,12 +129,9 @@ def configure(env):
             env.Append(CCFLAGS=[opt])
         elif env["optimize"] == "size":  # optimize for size
             env.Append(CCFLAGS=["-Oz"])
-        env.Append(CPPDEFINES=["NDEBUG"])
     elif env["target"] == "debug":
         env.Append(LINKFLAGS=["-O0"])
-        env.Append(CCFLAGS=["-O0", "-g", "-fno-limit-debug-info"])
-        env.Append(CPPDEFINES=["_DEBUG"])
-        env.Append(CPPFLAGS=["-UNDEBUG"])
+        env.Append(CCFLAGS=["-O0", "-g"])
 
     # LTO
 

+ 4 - 4
platform/android/java/app/config.gradle

@@ -1,10 +1,10 @@
 ext.versions = [
-    androidGradlePlugin: '7.0.3',
+    androidGradlePlugin: '7.2.1',
     compileSdk         : 32,
-    minSdk             : 19, // Also update 'platform/android/java/lib/AndroidManifest.xml#minSdkVersion' & 'platform/android/export/export_plugin.cpp#DEFAULT_MIN_SDK_VERSION'
-    targetSdk          : 32, // Also update 'platform/android/java/lib/AndroidManifest.xml#targetSdkVersion' & 'platform/android/export/export_plugin.cpp#DEFAULT_TARGET_SDK_VERSION'
+    minSdk             : 19, // Also update 'platform/android/export/export_plugin.cpp#DEFAULT_MIN_SDK_VERSION'
+    targetSdk          : 32, // Also update 'platform/android/export/export_plugin.cpp#DEFAULT_TARGET_SDK_VERSION'
     buildTools         : '32.0.0',
-    kotlinVersion      : '1.6.21',
+    kotlinVersion      : '1.7.0',
     fragmentVersion    : '1.3.6',
     nexusPublishVersion: '1.1.0',
     javaVersion        : 11,

+ 1 - 1
platform/android/java/gradle/wrapper/gradle-wrapper.properties

@@ -1,5 +1,5 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists

+ 0 - 3
platform/android/java/lib/AndroidManifest.xml

@@ -4,9 +4,6 @@
     android:versionCode="1"
     android:versionName="1.0">
 
-    <!-- Should match the mindSdk and targetSdk values in platform/android/java/app/config.gradle -->
-    <uses-sdk android:minSdkVersion="19" android:targetSdkVersion="32" />
-
     <application>
 
         <!-- Records the version of the Godot library -->

+ 6 - 7
platform/android/java/lib/build.gradle

@@ -170,11 +170,10 @@ android {
         }
     }
 
-    // TODO: Enable when issues with AGP 7.1+ are resolved (https://github.com/GodotVR/godot_openxr/issues/187).
-//    publishing {
-//        singleVariant("templateRelease") {
-//            withSourcesJar()
-//            withJavadocJar()
-//        }
-//    }
+    publishing {
+        singleVariant("templateRelease") {
+            withSourcesJar()
+            withJavadocJar()
+        }
+    }
 }

+ 2 - 3
platform/iphone/detect.py

@@ -49,7 +49,7 @@ def configure(env):
     ## Build type
 
     if env["target"].startswith("release"):
-        env.Append(CPPDEFINES=["NDEBUG", ("NS_BLOCK_ASSERTIONS", 1)])
+        env.Append(CPPDEFINES=[("NS_BLOCK_ASSERTIONS", 1)])
         if env["optimize"] == "speed":  # optimize for speed (default)
             # `-O2` is more friendly to debuggers than `-O3`, leading to better crash backtraces
             # when using `target=release_debug`.
@@ -61,8 +61,7 @@ def configure(env):
             env.Append(LINKFLAGS=["-Os"])
 
     elif env["target"] == "debug":
-        env.Append(CCFLAGS=["-gdwarf-2", "-O0"])
-        env.Append(CPPDEFINES=["_DEBUG", ("DEBUG", 1)])
+        env.Append(CCFLAGS=["-g", "-O0"])
 
     ## LTO
 

+ 14 - 0
platform/javascript/SCsub

@@ -2,6 +2,20 @@
 
 Import("env")
 
+# The HTTP server "targets". Run with "scons p=javascript serve", or "scons p=javascript run"
+if "serve" in COMMAND_LINE_TARGETS or "run" in COMMAND_LINE_TARGETS:
+    from serve import serve
+    import os
+
+    port = os.environ.get("GODOT_WEB_TEST_PORT", 8060)
+    try:
+        port = int(port)
+    except Exception:
+        print("GODOT_WEB_TEST_PORT must be a valid integer")
+        sys.exit(255)
+    serve(env.Dir("#bin/.javascript_zip").abspath, port, "run" in COMMAND_LINE_TARGETS)
+    sys.exit(0)
+
 javascript_files = [
     "audio_driver_javascript.cpp",
     "godot_webgl2.cpp",

文件差异内容过多而无法显示
+ 1 - 722
platform/javascript/package-lock.json


+ 2 - 4
platform/javascript/package.json

@@ -14,8 +14,7 @@
     "format:engine": "npm run lint:engine -- --fix",
     "format:libs": "npm run lint:libs -- --fix",
     "format:modules": "npm run lint:modules -- --fix",
-    "format:tools": "npm run lint:tools -- --fix",
-    "serve": "serve"
+    "format:tools": "npm run lint:tools -- --fix"
   },
   "author": "Godot Engine contributors",
   "license": "MIT",
@@ -23,7 +22,6 @@
     "eslint": "^7.28.0",
     "eslint-config-airbnb-base": "^14.2.1",
     "eslint-plugin-import": "^2.23.4",
-    "jsdoc": "^3.6.7",
-    "serve": "^13.0.2"
+    "jsdoc": "^3.6.7"
   }
 }

+ 0 - 21
platform/javascript/serve.json

@@ -1,21 +0,0 @@
-{
-    "public": "../../bin",
-    "headers": [{
-        "source": "**/*",
-        "headers": [
-            {
-                "key": "Cross-Origin-Embedder-Policy",
-                "value": "require-corp"
-            }, {
-                "key": "Cross-Origin-Opener-Policy",
-                "value": "same-origin"
-            }, {
-                "key":  "Access-Control-Allow-Origin",
-                "value": "*"
-            }, {
-                "key": "Cache-Control",
-                "value": "no-store, max-age=0"
-            }
-        ]
-    }]
-}

+ 55 - 0
platform/javascript/serve.py

@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+from http.server import HTTPServer, SimpleHTTPRequestHandler, test  # type: ignore
+from pathlib import Path
+import os
+import sys
+import argparse
+import subprocess
+
+
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+    def end_headers(self):
+        self.send_header("Cross-Origin-Opener-Policy", "same-origin")
+        self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
+        self.send_header("Access-Control-Allow-Origin", "*")
+        super().end_headers()
+
+
+def shell_open(url):
+    if sys.platform == "win32":
+        os.startfile(url)
+    else:
+        opener = "open" if sys.platform == "darwin" else "xdg-open"
+        subprocess.call([opener, url])
+
+
+def serve(root, port, run_browser):
+    os.chdir(root)
+
+    if run_browser:
+        # Open the served page in the user's default browser.
+        print("Opening the served URL in the default browser (use `--no-browser` or `-n` to disable this).")
+        shell_open(f"http://127.0.0.1:{port}")
+
+    test(CORSRequestHandler, HTTPServer, port=port)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--port", help="port to listen on", default=8060, type=int)
+    parser.add_argument(
+        "-r", "--root", help="path to serve as root (relative to `platform/web/`)", default="../../bin", type=Path
+    )
+    browser_parser = parser.add_mutually_exclusive_group(required=False)
+    browser_parser.add_argument(
+        "-n", "--no-browser", help="don't open default web browser automatically", dest="browser", action="store_false"
+    )
+    parser.set_defaults(browser=True)
+    args = parser.parse_args()
+
+    # Change to the directory where the script is located,
+    # so that the script can be run from any location.
+    os.chdir(Path(__file__).resolve().parent)
+
+    serve(args.root, args.port, args.browser)

+ 0 - 1
platform/windows/detect.py

@@ -193,7 +193,6 @@ def configure_msvc(env, manual_msvc_config):
 
     elif env["target"] == "debug":
         env.AppendUnique(CCFLAGS=["/Zi", "/FS", "/Od", "/EHsc"])
-        # Allow big objects. Only needed for debug, see MinGW branch for rationale.
         env.Append(LINKFLAGS=["/DEBUG"])
 
     if env["windows_subsystem"] == "gui":

+ 6 - 0
scene/gui/control.cpp

@@ -172,6 +172,12 @@ void Control::set_custom_minimum_size(const Size2 &p_custom) {
 	if (p_custom == data.custom_minimum_size) {
 		return;
 	}
+
+	if (isnan(p_custom.x) || isnan(p_custom.y)) {
+		// Prevent infinite loop.
+		return;
+	}
+
 	data.custom_minimum_size = p_custom;
 	minimum_size_changed();
 }

+ 3 - 1
scene/gui/tree.cpp

@@ -3089,7 +3089,9 @@ Tree::SelectMode Tree::get_select_mode() const {
 void Tree::deselect_all() {
 	TreeItem *item = get_next_selected(get_root());
 	while (item) {
-		item->deselect(selected_col);
+		for (int i = 0; i < columns.size(); i++) {
+			item->deselect(i);
+		}
 		TreeItem *prev_item = item;
 		item = get_next_selected(get_root());
 		ERR_FAIL_COND(item == prev_item);

+ 7 - 10
thirdparty/README.md

@@ -53,7 +53,7 @@ Files extracted from upstream source:
 ## embree
 
 - Upstream: https://github.com/embree/embree
-- Version: 3.13.0 (7c53133eb21424f7f0ae1e25bf357e358feaf6ab, 2021)
+- Version: 3.13.5 (698442324ccddd11725fb8875275dc1384f7fb40, 2022)
 - License: Apache 2.0
 
 Files extracted from upstream:
@@ -308,22 +308,19 @@ Files extracted from upstream repository:
 ## miniupnpc
 
 - Upstream: https://github.com/miniupnp/miniupnp
-- Version: 2.2.2 (81029a860baf1f727903e5b85307903b3f40cbc8, 2021)
+- Version: 2.2.4 (7d1d8bc3868b08ad003bad235eee57562b95b76d, 2022)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
 
-- All `*.c` and `*.h` files from `miniupnpc` to `thirdparty/miniupnpc/miniupnpc`
+- Copy `miniupnpc/src` and `miniupnpc/include` to `thirdparty/miniupnpc`
 - Remove the following test or sample files:
-  `listdevices.c minihttptestserver.c miniupnpcmodule.c upnpc.c upnperrors.* test* wingenminiupnpcstrings.c`
+  `listdevices.c minihttptestserver.c miniupnpcmodule.c upnpc.c upnperrors.* test*`
 - `LICENSE`
 
-The only modified file is `miniupnpcstrings.h`, which was created for Godot
-(it is usually autogenerated by cmake). Bump the version number for miniupnpc in that
-file when upgrading.
-
-Note: The following upstream patch has been applied, remove this notice on next update.
-https://github.com/miniupnp/miniupnp/commit/3a08dd4b89af2e9effa22a136bac86f2f306fd79
+The only modified file is `src/miniupnpcstrings.h`, which was created for Godot
+(it is usually autogenerated by cmake). Bump the version number for miniupnpc in
+that file when upgrading.
 
 
 ## minizip

+ 0 - 1
thirdparty/embree/common/algorithms/parallel_for.h

@@ -26,7 +26,6 @@ namespace embree
         abort();
         // -- GODOT end --
     }
-    
 #elif defined(TASKING_TBB)
   #if TBB_INTERFACE_VERSION >= 12002
     tbb::task_group_context context;

+ 17 - 6
thirdparty/embree/common/algorithms/parallel_for_for.h

@@ -30,15 +30,20 @@ namespace embree
     template<typename ArrayArray>
       __forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
       init(array2,minStepSize);
+    }
+
+    template<typename SizeFunc>
+    __forceinline ParallelForForState (const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) {
+      init(numArrays,getSize,minStepSize);
     } 
 
-    template<typename ArrayArray>
-      __forceinline void init ( ArrayArray& array2, const size_t minStepSize )
+    template<typename SizeFunc>
+    __forceinline void init ( const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize )
     {
       /* first calculate total number of elements */
       size_t N = 0;
-      for (size_t i=0; i<array2.size(); i++) {
-	N += array2[i] ? array2[i]->size() : 0;
+      for (size_t i=0; i<numArrays; i++) {
+	N += getSize(i);
       }
       this->N = N;
 
@@ -54,8 +59,8 @@ namespace embree
       size_t k0 = (++taskIndex)*N/taskCount;
       for (size_t i=0, k=0; taskIndex < taskCount; i++) 
       {
-	assert(i<array2.size());
-	size_t j=0, M = array2[i] ? array2[i]->size() : 0;
+	assert(i<numArrays);
+	size_t j=0, M = getSize(i);
 	while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
 	  assert(taskIndex<taskCount);
 	  i0[taskIndex] = i;
@@ -67,6 +72,12 @@ namespace embree
       }
     }
 
+    template<typename ArrayArray>
+      __forceinline void init ( ArrayArray& array2, const size_t minStepSize )
+    {
+      init(array2.size(),[&](size_t i) { return array2[i] ? array2[i]->size() : 0; },minStepSize);
+    }
+    
     __forceinline size_t size() const {
       return N;
     }

+ 40 - 10
thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h

@@ -17,15 +17,20 @@ namespace embree
       __forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
       : ParallelForForState(array2,minStepSize) {}
 
+    template<typename SizeFunc>
+    __forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize)
+      : ParallelForForState(numArrays,getSize,minStepSize) {}
+
     ParallelPrefixSumState<Value> prefix_state;
   };
   
-  template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
-    __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize, 
-                                                      const Value& identity, const Func& func, const Reduction& reduction)
+  template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
+    __forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState<Value>& state, Index minStepSize, 
+                                                       const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction)
   {
     /* calculate number of tasks to use */
     const size_t taskCount = state.taskCount;
+    
     /* perform parallel prefix sum */
     parallel_for(taskCount, [&](const size_t taskIndex)
     {
@@ -38,9 +43,9 @@ namespace embree
       size_t k=k0;
       Value N=identity;
       for (size_t i=i0; k<k1; i++) {
-	const size_t size = array2[i] ? array2[i]->size() : 0;
+	const size_t size = getSize(i);
         const size_t r0 = j0, r1 = min(size,r0+k1-k);
-        if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i));
+        if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k));
         k+=r1-r0; j0 = 0;
       }
       state.prefix_state.counts[taskIndex] = N;
@@ -58,9 +63,10 @@ namespace embree
     return sum;
   }
 
-  template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
-    __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize, 
-                                                      const Value& identity, const Func& func, const Reduction& reduction)
+  template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
+    __forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState<Value>& state, Index minStepSize, 
+                                                       const SizeFunc& getSize, 
+                                                       const Value& identity, const Func& func, const Reduction& reduction)
   {
     /* calculate number of tasks to use */
     const size_t taskCount = state.taskCount;
@@ -76,9 +82,9 @@ namespace embree
       size_t k=k0;
       Value N=identity;
       for (size_t i=i0; k<k1; i++) {
-	const size_t size = array2[i] ? array2[i]->size() : 0;
+	const size_t size = getSize(i);
         const size_t r0 = j0, r1 = min(size,r0+k1-k);
-        if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i,reduction(state.prefix_state.sums[taskIndex],N)));
+        if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N)));
         k+=r1-r0; j0 = 0;
       }
       state.prefix_state.counts[taskIndex] = N;
@@ -96,6 +102,30 @@ namespace embree
     return sum;
   }
 
+  template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
+  __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state,
+                                                    ArrayArray& array2, Index minStepSize, 
+                                                    const Value& identity, const Func& func, const Reduction& reduction)
+  {
+    return parallel_for_for_prefix_sum0_(state,minStepSize,
+                                        [&](Index i) { return array2[i] ? array2[i]->size() : 0; },
+                                        identity,
+                                        [&](Index i, const range<Index>& r, Index k) { return func(array2[i], r, k, i); },
+                                        reduction);
+  }
+
+  template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
+  __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state,
+                                                    ArrayArray& array2, Index minStepSize, 
+                                                    const Value& identity, const Func& func, const Reduction& reduction)
+  {
+    return parallel_for_for_prefix_sum1_(state,minStepSize,
+                                        [&](Index i) { return array2[i] ? array2[i]->size() : 0; },
+                                        identity,
+                                        [&](Index i, const range<Index>& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); },
+                                        reduction);
+  }                                       
+
   template<typename ArrayArray, typename Value, typename Func, typename Reduction>
     __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, 
 						     const Value& identity, const Func& func, const Reduction& reduction)

+ 1 - 1
thirdparty/embree/common/algorithms/parallel_reduce.h

@@ -26,7 +26,7 @@ namespace embree
     const Index threadCount = (Index) TaskScheduler::threadCount();
     taskCount = min(taskCount,threadCount,maxTasks);
 
-    /* parallel invokation of all tasks */
+    /* parallel invocation of all tasks */
     dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
     parallel_for(taskCount, [&](const Index taskIndex) {
         const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;

+ 5 - 5
thirdparty/embree/common/math/bbox.h

@@ -77,7 +77,7 @@ namespace embree
     return lower > upper;
   }
 
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
   template<> __forceinline bool BBox<Vec3fa>::empty() const {
     return !all(le_mask(lower,upper));
   }
@@ -196,11 +196,11 @@ namespace embree
   }
 
   template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
-    return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
+    return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
   }
 
   template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
-    return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
+    return all(ge_mask(a.lower,b.lower)) && all(le_mask(a.upper,b.upper));
   }
   
   /*! blending */
@@ -228,11 +228,11 @@ namespace embree
 /// SSE / AVX / MIC specializations
 ////////////////////////////////////////////////////////////////////////////////
 
-#if defined __SSE__
+#if defined (__SSE__) || defined(__ARM_NEON)
 #include "../simd/sse.h"
 #endif
 
-#if defined __AVX__
+#if defined (__AVX__)
 #include "../simd/avx.h"
 #endif
 

+ 18 - 1
thirdparty/embree/common/math/color.h

@@ -152,21 +152,38 @@ namespace embree
   }
   __forceinline const Color rcp  ( const Color& a )
   {
+#if defined(__aarch64__)
+    __m128 reciprocal = _mm_rcp_ps(a.m128);
+    reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
+    return (const Color)reciprocal;
+#else
 #if defined(__AVX512VL__)
     const Color r = _mm_rcp14_ps(a.m128);
 #else
     const Color r = _mm_rcp_ps(a.m128);
 #endif
-    return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+    return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(a, r))));   // computes r + r * (1 - a * r)
+
+#endif  //defined(__aarch64__)
   }
   __forceinline const Color rsqrt( const Color& a )
   {
+#if defined(__aarch64__)
+    __m128 r = _mm_rsqrt_ps(a.m128);
+    r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+    r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+    return r;
+#else
+
 #if defined(__AVX512VL__)
     __m128 r = _mm_rsqrt14_ps(a.m128);
 #else
     __m128 r = _mm_rsqrt_ps(a.m128);
 #endif
     return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+
+#endif  //defined(__aarch64__)
   }
   __forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); }
 

+ 0 - 19
thirdparty/embree/common/math/constants.cpp

@@ -5,23 +5,4 @@
 
 namespace embree
 {
-  TrueTy True;
-  FalseTy False;
-  ZeroTy zero;
-  OneTy one;
-  NegInfTy neg_inf;
-  PosInfTy inf;
-  PosInfTy pos_inf;
-  NaNTy nan;
-  UlpTy ulp;
-  PiTy pi;
-  OneOverPiTy one_over_pi;
-  TwoPiTy two_pi;
-  OneOverTwoPiTy one_over_two_pi;
-  FourPiTy four_pi;
-  OneOverFourPiTy one_over_four_pi;
-  StepTy step;
-  ReverseStepTy reverse_step;
-  EmptyTy empty;
-  UndefinedTy undefined;
 }

+ 33 - 21
thirdparty/embree/common/math/constants.h

@@ -24,13 +24,13 @@ namespace embree
     __forceinline operator bool( ) const { return true; }
   };
 
-  extern MAYBE_UNUSED TrueTy True;
+  const constexpr TrueTy True = TrueTy();
 
   struct FalseTy {
     __forceinline operator bool( ) const { return false; }
   };
 
-  extern MAYBE_UNUSED FalseTy False;
+  const constexpr FalseTy False = FalseTy();
   
   struct ZeroTy
   {
@@ -48,7 +48,7 @@ namespace embree
     __forceinline operator unsigned char     ( ) const { return 0; }
   }; 
 
-  extern MAYBE_UNUSED ZeroTy zero;
+  const constexpr ZeroTy zero = ZeroTy();
 
   struct OneTy
   {
@@ -66,7 +66,7 @@ namespace embree
     __forceinline operator unsigned char     ( ) const { return 1; }
   };
 
-  extern MAYBE_UNUSED OneTy one;
+  const constexpr OneTy one = OneTy();
 
   struct NegInfTy
   {
@@ -85,7 +85,7 @@ namespace embree
 
   };
 
-  extern MAYBE_UNUSED NegInfTy neg_inf;
+  const constexpr NegInfTy neg_inf = NegInfTy();
 
   struct PosInfTy
   {
@@ -103,8 +103,8 @@ namespace embree
     __forceinline operator unsigned char     ( ) const { return std::numeric_limits<unsigned char>::max(); }
   };
 
-  extern MAYBE_UNUSED PosInfTy inf;
-  extern MAYBE_UNUSED PosInfTy pos_inf;
+  const constexpr PosInfTy     inf = PosInfTy();
+  const constexpr PosInfTy pos_inf = PosInfTy();
 
   struct NaNTy
   {
@@ -112,15 +112,15 @@ namespace embree
     __forceinline operator float ( ) const { return std::numeric_limits<float>::quiet_NaN(); }
   };
 
-  extern MAYBE_UNUSED NaNTy nan;
+  const constexpr NaNTy nan = NaNTy();
 
   struct UlpTy
   {
     __forceinline operator double( ) const { return std::numeric_limits<double>::epsilon(); }
     __forceinline operator float ( ) const { return std::numeric_limits<float>::epsilon(); }
   };
-
-  extern MAYBE_UNUSED UlpTy ulp;
+  
+  const constexpr UlpTy ulp = UlpTy();
 
   struct PiTy
   {
@@ -128,7 +128,7 @@ namespace embree
     __forceinline operator float ( ) const { return float(M_PI); }
   };
 
-  extern MAYBE_UNUSED PiTy pi;
+  const constexpr PiTy pi = PiTy();
 
   struct OneOverPiTy
   {
@@ -136,7 +136,7 @@ namespace embree
     __forceinline operator float ( ) const { return float(M_1_PI); }
   };
 
-  extern MAYBE_UNUSED OneOverPiTy one_over_pi;
+  const constexpr OneOverPiTy one_over_pi = OneOverPiTy();
 
   struct TwoPiTy
   {
@@ -144,7 +144,7 @@ namespace embree
     __forceinline operator float ( ) const { return float(2.0*M_PI); }
   };
 
-  extern MAYBE_UNUSED TwoPiTy two_pi;
+  const constexpr TwoPiTy two_pi = TwoPiTy();
 
   struct OneOverTwoPiTy
   {
@@ -152,7 +152,7 @@ namespace embree
     __forceinline operator float ( ) const { return float(0.5*M_1_PI); }
   };
 
-  extern MAYBE_UNUSED OneOverTwoPiTy one_over_two_pi;
+  const constexpr OneOverTwoPiTy one_over_two_pi = OneOverTwoPiTy();
 
   struct FourPiTy
   {
@@ -160,7 +160,7 @@ namespace embree
     __forceinline operator float ( ) const { return float(4.0*M_PI); }
   };
 
-  extern MAYBE_UNUSED FourPiTy four_pi;
+  const constexpr FourPiTy four_pi = FourPiTy();
 
   struct OneOverFourPiTy
   {
@@ -168,30 +168,42 @@ namespace embree
     __forceinline operator float ( ) const { return float(0.25*M_1_PI); }
   };
 
-  extern MAYBE_UNUSED OneOverFourPiTy one_over_four_pi;
+  const constexpr OneOverFourPiTy one_over_four_pi = OneOverFourPiTy();
 
   struct StepTy {
+    __forceinline operator          double   ( ) const { return 0; }
+    __forceinline operator          float    ( ) const { return 0; }
+    __forceinline operator          long long( ) const { return 0; }
+    __forceinline operator unsigned long long( ) const { return 0; }
+    __forceinline operator          long     ( ) const { return 0; }
+    __forceinline operator unsigned long     ( ) const { return 0; }
+    __forceinline operator          int      ( ) const { return 0; }
+    __forceinline operator unsigned int      ( ) const { return 0; }
+    __forceinline operator          short    ( ) const { return 0; }
+    __forceinline operator unsigned short    ( ) const { return 0; }
+    __forceinline operator          char     ( ) const { return 0; }
+    __forceinline operator unsigned char     ( ) const { return 0; }
   };
 
-  extern MAYBE_UNUSED StepTy step;
+  const constexpr StepTy step = StepTy();
 
   struct ReverseStepTy {
   };
 
-  extern MAYBE_UNUSED ReverseStepTy reverse_step;
+  const constexpr ReverseStepTy reverse_step = ReverseStepTy();
 
   struct EmptyTy {
   };
 
-  extern MAYBE_UNUSED EmptyTy empty;
+  const constexpr EmptyTy empty = EmptyTy();
 
   struct FullTy {
   };
 
-  extern MAYBE_UNUSED FullTy full;
+  const constexpr FullTy full = FullTy();
 
   struct UndefinedTy {
   };
 
-  extern MAYBE_UNUSED UndefinedTy undefined;
+  const constexpr UndefinedTy undefined = UndefinedTy();
 }

+ 95 - 9
thirdparty/embree/common/math/math.h

@@ -53,6 +53,16 @@ namespace embree
 
   __forceinline float rcp  ( const float x )
   {
+#if defined(__aarch64__)
+      // Move scalar to vector register and do rcp.
+      __m128 a;
+      a[0] = x;
+      float32x4_t reciprocal = vrecpeq_f32(a);
+      reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
+      reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
+      return reciprocal[0];
+#else
+
     const __m128 a = _mm_set_ss(x);
 
 #if defined(__AVX512VL__)
@@ -66,30 +76,71 @@ namespace embree
 #else
     return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
 #endif
+
+#endif  //defined(__aarch64__)
   }
 
   __forceinline float signmsk ( const float x ) {
+#if defined(__aarch64__)
+      // FP and Neon shares same vector register in arm64
+      __m128 a;
+      __m128i b;
+      a[0] = x;
+      b[0] = 0x80000000;
+      a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
+      return a[0];
+#else
     return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+#endif
   }
   __forceinline float xorf( const float x, const float y ) {
+#if defined(__aarch64__)
+      // FP and Neon shares same vector register in arm64
+      __m128 a;
+      __m128 b;
+      a[0] = x;
+      b[0] = y;
+      a = _mm_xor_ps(a, b);
+      return a[0];
+#else
     return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
+#endif
   }
   __forceinline float andf( const float x, const unsigned y ) {
+#if defined(__aarch64__)
+      // FP and Neon shares same vector register in arm64
+      __m128 a;
+      __m128i b;
+      a[0] = x;
+      b[0] = y;
+      a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
+      return a[0];
+#else
     return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
+#endif
   }
   __forceinline float rsqrt( const float x )
   {
+#if defined(__aarch64__)
+      // FP and Neon shares same vector register in arm64
+      __m128 a;
+      a[0] = x;
+      __m128 value = _mm_rsqrt_ps(a);
+      value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
+      value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
+      return value[0];
+#else
+
     const __m128 a = _mm_set_ss(x);
 #if defined(__AVX512VL__)
     __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
 #else
     __m128 r = _mm_rsqrt_ss(a);
 #endif
-    r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
-#if defined(__ARM_NEON)
-    r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
+    const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
+                                _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
+    return _mm_cvtss_f32(c);
 #endif
-    return _mm_cvtss_f32(r);
   }
 
 #if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
@@ -146,7 +197,17 @@ namespace embree
   __forceinline double floor( const double x ) { return ::floor (x); }
   __forceinline double ceil ( const double x ) { return ::ceil (x); }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    __forceinline float mini(float a, float b) {
+        // FP and Neon shares same vector register in arm64
+        __m128 x;
+        __m128 y;
+        x[0] = a;
+        y[0] = b;
+        x = _mm_min_ps(x, y);
+        return x[0];
+    }
+#elif defined(__SSE4_1__)
   __forceinline float mini(float a, float b) {
     const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
     const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
@@ -155,7 +216,17 @@ namespace embree
   }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    __forceinline float maxi(float a, float b) {
+        // FP and Neon shares same vector register in arm64
+        __m128 x;
+        __m128 y;
+        x[0] = a;
+        y[0] = b;
+        x = _mm_max_ps(x, y);
+        return x[0];
+    }
+#elif defined(__SSE4_1__)
   __forceinline float maxi(float a, float b) {
     const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
     const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
@@ -172,9 +243,12 @@ namespace embree
   __forceinline  int64_t min(int64_t  a, int64_t  b) { return a<b ? a:b; }
   __forceinline    float min(float    a, float    b) { return a<b ? a:b; }
   __forceinline   double min(double   a, double   b) { return a<b ? a:b; }
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
   __forceinline   size_t min(size_t   a, size_t   b) { return a<b ? a:b; }
 #endif
+#if defined(__EMSCRIPTEN__)
+  __forceinline   long   min(long     a, long     b) { return a<b ? a:b; }
+#endif
 
   template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
   template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
@@ -189,9 +263,12 @@ namespace embree
   __forceinline  int64_t max(int64_t  a, int64_t  b) { return a<b ? b:a; }
   __forceinline    float max(float    a, float    b) { return a<b ? b:a; }
   __forceinline   double max(double   a, double   b) { return a<b ? b:a; }
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
   __forceinline   size_t max(size_t   a, size_t   b) { return a<b ? b:a; }
 #endif
+#if defined(__EMSCRIPTEN__)
+  __forceinline   long   max(long     a, long     b) { return a<b ? b:a; }
+#endif
 
   template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
   template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
@@ -231,6 +308,15 @@ namespace embree
   __forceinline float msub  ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
   __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
   __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
+  
+#elif defined (__aarch64__) && defined(__clang__)
+#pragma clang fp contract(fast)
+__forceinline float madd  ( const float a, const float b, const float c) { return a*b + c; }
+__forceinline float msub  ( const float a, const float b, const float c) { return a*b - c; }
+__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
+__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
+#pragma clang fp contract(on)
+  
 #else
   __forceinline float madd  ( const float a, const float b, const float c) { return a*b+c; }
   __forceinline float msub  ( const float a, const float b, const float c) { return a*b-c; }
@@ -326,7 +412,7 @@ namespace embree
     return x | (y << 1) | (z << 2);
   }
 
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
 
   template<>
     __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)

+ 8 - 4
thirdparty/embree/common/math/quaternion.h

@@ -242,13 +242,17 @@ namespace embree
     T cosTheta = dot(q0, q1_);
     QuaternionT<T> q1 = select(cosTheta < 0.f, -q1_, q1_);
     cosTheta          = select(cosTheta < 0.f, -cosTheta, cosTheta);
-    if (unlikely(all(cosTheta > 0.9995f))) {
-      return normalize(lerp(q0, q1, t));
-    }
+
+    // spherical linear interpolation
     const T phi = t * fastapprox::acos(cosTheta);
     T sinPhi, cosPhi;
     fastapprox::sincos(phi, sinPhi, cosPhi);
     QuaternionT<T> qperp = sinPhi * normalize(msub(cosTheta, q0, q1));
-    return msub(cosPhi, q0, qperp);
+    QuaternionT<T> qslerp = msub(cosPhi, q0, qperp);
+
+    // regular linear interpolation as fallback
+    QuaternionT<T> qlerp = normalize(lerp(q0, q1, t));
+
+    return select(cosTheta > 0.9995f, qlerp, qslerp);
   }
 }

+ 3 - 3
thirdparty/embree/common/math/transcendental.h

@@ -27,7 +27,7 @@ __forceinline T sin(const T &v)
   // Reduced range version of x
   auto x = v - kReal * piOverTwoVec;
   auto kMod4 = k & 3;
-  auto sinUseCos = (kMod4 == 1 | kMod4 == 3);
+  auto sinUseCos = (kMod4 == 1) | (kMod4 == 3);
   auto flipSign = (kMod4 > 1);
 
   // These coefficients are from sollya with fpminimax(sin(x)/x, [|0, 2,
@@ -76,8 +76,8 @@ __forceinline T cos(const T &v)
   auto x = v - kReal * piOverTwoVec;
 
   auto kMod4 = k & 3;
-  auto cosUseCos = (kMod4 == 0 | kMod4 == 2);
-  auto flipSign = (kMod4 == 1 | kMod4 == 2);
+  auto cosUseCos = (kMod4 == 0) | (kMod4 == 2);
+  auto flipSign = (kMod4 == 1) | (kMod4 == 2);
 
   const float sinC2  = -0.16666667163372039794921875;
   const float sinC4  = +8.333347737789154052734375e-3;

+ 4 - 4
thirdparty/embree/common/math/vec2.h

@@ -144,7 +144,7 @@ namespace embree
   }
   
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
   template<typename T> __forceinline T       dot      ( const Vec2<T>& a, const Vec2<T>& b ) { return madd(a.x,b.x,a.y*b.y); }
@@ -205,11 +205,11 @@ namespace embree
 
 #include "vec2fa.h"
 
-#if defined __SSE__
+#if defined(__SSE__) || defined(__ARM_NEON)
 #include "../simd/sse.h"
 #endif
 
-#if defined __AVX__
+#if defined(__AVX__)
 #include "../simd/avx.h"
 #endif
 
@@ -221,7 +221,7 @@ namespace embree
 {
   template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
 
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
   template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
 #endif
 

+ 23 - 6
thirdparty/embree/common/math/vec2fa.h

@@ -97,6 +97,12 @@ namespace embree
 
   __forceinline Vec2fa rcp  ( const Vec2fa& a )
   {
+#if defined(__aarch64__)
+        __m128 reciprocal = _mm_rcp_ps(a.m128);
+        reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
+        reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
+        return (const Vec2fa)reciprocal;
+#else
 #if defined(__AVX512VL__)
     const Vec2fa r = _mm_rcp14_ps(a.m128);
 #else
@@ -104,13 +110,15 @@ namespace embree
 #endif
 
 #if defined(__AVX2__)
-    const Vec2fa res = _mm_mul_ps(r,_mm_fnmadd_ps(r, a, vfloat4(2.0f)));
+    const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0));  // First, compute 1 - a * r (which will be very close to 0)
+    const Vec2fa res = _mm_fmadd_ps(r, h_n, r);            // Then compute r + r * h_n
 #else
-    const Vec2fa res = _mm_mul_ps(r,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r, a)));
-    //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+    const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r));  // First, compute 1 - a * r (which will be very close to 0)
+    const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n));             // Then compute r + r * h_n  
 #endif
 
     return res;
+#endif  //defined(__aarch64__)
   }
 
   __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
@@ -118,12 +126,21 @@ namespace embree
 
   __forceinline Vec2fa rsqrt( const Vec2fa& a )
   {
+#if defined(__aarch64__)
+        __m128 r = _mm_rsqrt_ps(a.m128);
+        r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+        r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+        return r;
+#else
+
 #if defined(__AVX512VL__)
     __m128 r = _mm_rsqrt14_ps(a.m128);
 #else
     __m128 r = _mm_rsqrt_ps(a.m128);
 #endif
     return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+
+#endif
   }
 
   __forceinline Vec2fa zero_fix(const Vec2fa& a) {
@@ -156,7 +173,7 @@ namespace embree
   __forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
   __forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
     __forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
       const vint4 ai = _mm_castps_si128(a);
       const vint4 bi = _mm_castps_si128(b);
@@ -165,7 +182,7 @@ namespace embree
     }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
     __forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
       const vint4 ai = _mm_castps_si128(a);
       const vint4 bi = _mm_castps_si128(b);
@@ -227,7 +244,7 @@ namespace embree
   __forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
 #if defined(__SSE4_1__)

+ 5 - 6
thirdparty/embree/common/math/vec3.h

@@ -197,7 +197,7 @@ namespace embree
   template<typename T> __forceinline Vec3<bool> ge_mask( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<bool>(a.x>=b.x,a.y>=b.y,a.z>=b.z); }
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
   template<typename T> __forceinline T       sqr      ( const Vec3<T>& a )                   { return dot(a,a); }
@@ -207,7 +207,6 @@ namespace embree
   template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a )                   { return a*rsqrt(sqr(a)); }
   template<typename T> __forceinline T       distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); }
   template<typename T> __forceinline Vec3<T> cross    ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); }
-
   template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c )
   {
     const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x;
@@ -266,11 +265,11 @@ namespace embree
 /// SSE / AVX / MIC specializations
 ////////////////////////////////////////////////////////////////////////////////
 
-#if defined __SSE__
+#if defined(__SSE__) || defined(__ARM_NEON)
 #include "../simd/sse.h"
 #endif
 
-#if defined __AVX__
+#if defined(__AVX__)
 #include "../simd/avx.h"
 #endif
 
@@ -291,14 +290,14 @@ namespace embree
   template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
     x = a.x; y = a.y; z = a.z;
   }
-#elif defined(__SSE__)
+#elif defined(__SSE__) || defined(__ARM_NEON)
   template<>
   __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
     const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v);
   }
 #endif
 
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
   template<>
   __forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) {
     return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]));

+ 75 - 17
thirdparty/embree/common/math/vec3fa.h

@@ -55,7 +55,13 @@ namespace embree
     ////////////////////////////////////////////////////////////////////////////////
 
     static __forceinline Vec3fa load( const void* const a ) {
+#if defined(__aarch64__)
+        __m128 t = _mm_load_ps((float*)a);
+        t[3] = 0.0f;
+        return Vec3fa(t);
+#else
       return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))));
+#endif
     }
 
     static __forceinline Vec3fa loadu( const void* const a ) {
@@ -89,12 +95,20 @@ namespace embree
 
   __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
   __forceinline Vec3fa operator -( const Vec3fa& a ) {
+#if defined(__aarch64__)
+    return vnegq_f32(a.m128);
+#else
     const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
     return _mm_xor_ps(a.m128, mask);
+#endif
   }
   __forceinline Vec3fa abs  ( const Vec3fa& a ) {
+#if defined(__aarch64__)
+    return _mm_abs_ps(a.m128);
+#else
     const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
     return _mm_and_ps(a.m128, mask);
+#endif
   }
   __forceinline Vec3fa sign ( const Vec3fa& a ) {
     return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128));
@@ -102,6 +116,10 @@ namespace embree
 
   __forceinline Vec3fa rcp  ( const Vec3fa& a )
   {
+#if defined(__aarch64__)
+  return vdivq_f32(vdupq_n_f32(1.0f),a.m128);
+#else
+
 #if defined(__AVX512VL__)
     const Vec3fa r = _mm_rcp14_ps(a.m128);
 #else
@@ -109,13 +127,15 @@ namespace embree
 #endif
 
 #if defined(__AVX2__)
-    const Vec3fa res = _mm_mul_ps(r.m128,_mm_fnmadd_ps(r.m128, a.m128, vfloat4(2.0f)));
+    const Vec3fa h_n = _mm_fnmadd_ps(a.m128, r.m128, vfloat4(1.0));  // First, compute 1 - a * r (which will be very close to 0)
+    const Vec3fa res = _mm_fmadd_ps(r.m128, h_n.m128, r.m128);       // Then compute r + r * h_n
 #else
-    const Vec3fa res = _mm_mul_ps(r.m128,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r.m128, a.m128)));
-    //return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+    const Vec3fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a.m128, r.m128));  // First, compute 1 - a * r (which will be very close to 0)
+    const Vec3fa res = _mm_add_ps(r.m128,_mm_mul_ps(r.m128, h_n.m128));        // Then compute r + r * h_n  
 #endif
 
     return res;
+#endif  //defined(__aarch64__)
   }
 
   __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); }
@@ -123,12 +143,20 @@ namespace embree
 
   __forceinline Vec3fa rsqrt( const Vec3fa& a )
   {
+#if defined(__aarch64__)
+        __m128 r = _mm_rsqrt_ps(a.m128);
+        r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+        r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
+        return r;
+#else
+
 #if defined(__AVX512VL__)
     __m128 r = _mm_rsqrt14_ps(a.m128);
 #else
     __m128 r = _mm_rsqrt_ps(a.m128);
 #endif
     return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+#endif
   }
 
   __forceinline Vec3fa zero_fix(const Vec3fa& a) {
@@ -161,7 +189,7 @@ namespace embree
   __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); }
   __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
     __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
       const vint4 ai = _mm_castps_si128(a.m128);
       const vint4 bi = _mm_castps_si128(b.m128);
@@ -170,7 +198,7 @@ namespace embree
     }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
     __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
       const vint4 ai = _mm_castps_si128(a.m128);
       const vint4 bi = _mm_castps_si128(b.m128);
@@ -187,16 +215,16 @@ namespace embree
   /// Ternary Operators
   ////////////////////////////////////////////////////////////////////////////////
 
-#if defined(__AVX2__)
+#if defined(__AVX2__) || defined(__ARM_NEON)
   __forceinline Vec3fa madd  ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmadd_ps(a.m128,b.m128,c.m128); }
   __forceinline Vec3fa msub  ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fmsub_ps(a.m128,b.m128,c.m128); }
   __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); }
   __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); }
 #else
   __forceinline Vec3fa madd  ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; }
-  __forceinline Vec3fa msub  ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
   __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;}
   __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; }
+  __forceinline Vec3fa msub  ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; }
 #endif
 
   __forceinline Vec3fa madd  ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
@@ -218,8 +246,26 @@ namespace embree
   ////////////////////////////////////////////////////////////////////////////////
   /// Reductions
   ////////////////////////////////////////////////////////////////////////////////
+#if defined(__aarch64__)
+  __forceinline float reduce_add(const Vec3fa& v) {
+    float32x4_t t = v.m128;
+    t[3] = 0.0f;
+    return vaddvq_f32(t);
+  }
 
-  __forceinline float reduce_add(const Vec3fa& v) { 
+  __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
+  __forceinline float reduce_min(const Vec3fa& v) {
+    float32x4_t t = v.m128;
+      t[3] = t[2];
+    return vminvq_f32(t);
+  }
+  __forceinline float reduce_max(const Vec3fa& v) {
+    float32x4_t t = v.m128;
+      t[3] = t[2];
+    return vmaxvq_f32(t);
+  }
+#else
+  __forceinline float reduce_add(const Vec3fa& v) {
     const vfloat4 a(v.m128);
     const vfloat4 b = shuffle<1>(a);
     const vfloat4 c = shuffle<2>(a);
@@ -229,6 +275,7 @@ namespace embree
   __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
   __forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); }
   __forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); }
+#endif
 
   ////////////////////////////////////////////////////////////////////////////////
   /// Comparison Operators
@@ -241,8 +288,13 @@ namespace embree
   __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); }
   __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); }
   __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); }
-  __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnle_ps(a.m128, b.m128); }
-  __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); }
+ #if defined(__aarch64__)
+  __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpgt_ps (a.m128, b.m128); }
+  __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpge_ps (a.m128, b.m128); }
+#else
+  __forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); }
+  __forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); }
+#endif
 
   __forceinline bool isvalid ( const Vec3fa& v ) {
     return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
@@ -261,7 +313,7 @@ namespace embree
   }
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
 #if defined(__SSE4_1__)
@@ -335,7 +387,11 @@ namespace embree
   /// Rounding Functions
   ////////////////////////////////////////////////////////////////////////////////
 
-#if defined (__SSE4_1__)
+#if defined(__aarch64__)
+  __forceinline Vec3fa floor(const Vec3fa& a) { return vrndmq_f32(a.m128); }
+  __forceinline Vec3fa ceil (const Vec3fa& a) { return vrndpq_f32(a.m128); }
+  __forceinline Vec3fa trunc(const Vec3fa& a) { return vrndq_f32(a.m128); }
+#elif defined (__SSE4_1__)
   __forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); }
   __forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF    ); }
   __forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF    ); }
@@ -393,8 +449,10 @@ namespace embree
 
     __forceinline Vec3fx( const Vec3fa& other, const int      a1) { m128 = other.m128; a = a1; }
     __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; }
-    __forceinline Vec3fx( const Vec3fa& other, const float    w1) {      
-#if defined (__SSE4_1__)
+    __forceinline Vec3fx( const Vec3fa& other, const float    w1) {
+#if defined (__aarch64__)
+      m128 = other.m128; m128[3] = w1;
+#elif defined (__SSE4_1__)
       m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4);
 #else
       const vint4 mask(-1,-1,-1,0);
@@ -526,7 +584,7 @@ namespace embree
   __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); }
   __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); }
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) || defined(__aarch64__)
     __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
       const vint4 ai = _mm_castps_si128(a.m128);
       const vint4 bi = _mm_castps_si128(b.m128);
@@ -535,7 +593,7 @@ namespace embree
     }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) || defined(__aarch64__)
     __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
       const vint4 ai = _mm_castps_si128(a.m128);
       const vint4 bi = _mm_castps_si128(b.m128);
@@ -626,7 +684,7 @@ namespace embree
   }
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
 #if defined(__SSE4_1__)

+ 28 - 18
thirdparty/embree/common/math/vec3ia.h

@@ -65,7 +65,9 @@ namespace embree
 
   __forceinline Vec3ia operator +( const Vec3ia& a ) { return a; }
   __forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); }
-#if defined(__SSSE3__)
+#if (defined(__aarch64__))
+  __forceinline Vec3ia abs       ( const Vec3ia& a ) { return vabsq_s32(a.m128); }
+#elif defined(__SSSE3__)
   __forceinline Vec3ia abs       ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); }
 #endif
 
@@ -81,7 +83,7 @@ namespace embree
   __forceinline Vec3ia operator -( const Vec3ia& a, const int     b ) { return a-Vec3ia(b); }
   __forceinline Vec3ia operator -( const int     a, const Vec3ia& b ) { return Vec3ia(a)-b; }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
   __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); }
   __forceinline Vec3ia operator *( const Vec3ia& a, const int     b ) { return a * Vec3ia(b); }
   __forceinline Vec3ia operator *( const int     a, const Vec3ia& b ) { return Vec3ia(a) * b; }
@@ -116,7 +118,7 @@ namespace embree
   __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
   __forceinline Vec3ia& operator -=( Vec3ia& a, const int&   b ) { return a = a - b; }
   
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
   __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
   __forceinline Vec3ia& operator *=( Vec3ia& a, const int&    b ) { return a = a * b; }
 #endif
@@ -127,18 +129,38 @@ namespace embree
   __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
   __forceinline Vec3ia& operator |=( Vec3ia& a, const int&    b ) { return a = a | b; }
   
+#if !defined(__ARM_NEON)
   __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
   __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
+#endif
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Reductions
+  /// Select
   ////////////////////////////////////////////////////////////////////////////////
 
+  __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
+#if defined(__aarch64__) || defined(__SSE4_1__)
+    return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
+#else
+    return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f)); 
+#endif
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Reductions
+  ////////////////////////////////////////////////////////////////////////////////
+#if defined(__aarch64__)
+  __forceinline int reduce_add(const Vec3ia& v) { return vaddvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0))); }
+  __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
+  __forceinline int reduce_min(const Vec3ia& v) { return vminvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x7FFFFFFF))); }
+  __forceinline int reduce_max(const Vec3ia& v) { return vmaxvq_s32(select(Vec3ba(1,1,1),v,Vec3ia(0x80000000))); }
+#else
   __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
   __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
   __forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); }
   __forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); }
-
+#endif
+  
   ////////////////////////////////////////////////////////////////////////////////
   /// Comparison Operators
   ////////////////////////////////////////////////////////////////////////////////
@@ -156,19 +178,7 @@ namespace embree
   __forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); }
   __forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); }
 
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Select
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
-#if defined(__SSE4_1__)
-    return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
-#else
-    return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f)); 
-#endif
-  }
-
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
   __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); }
   __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); }
 #else

+ 3 - 3
thirdparty/embree/common/math/vec4.h

@@ -149,7 +149,7 @@ namespace embree
   }
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
   template<typename T> __forceinline T       dot      ( const Vec4<T>& a, const Vec4<T>& b ) { return madd(a.x,b.x,madd(a.y,b.y,madd(a.z,b.z,a.w*b.w))); }
@@ -205,7 +205,7 @@ namespace embree
 /// SSE / AVX / MIC specializations
 ////////////////////////////////////////////////////////////////////////////////
 
-#if defined __SSE__
+#if defined(__SSE__) || defined(__ARM_NEON)
 #include "../simd/sse.h"
 #endif
 
@@ -225,7 +225,7 @@ namespace embree
   template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
     x = a.x; y = a.y; z = a.z; w = a.w;
   }
-#elif defined(__SSE__)
+#elif defined(__SSE__) || defined(__ARM_NEON)
   template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
     const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v);
   }

+ 1196 - 0
thirdparty/embree/common/simd/arm/avx2neon.h

@@ -0,0 +1,1196 @@
+#pragma once
+
+#if !defined(__aarch64__)
+#error "avx2neon is only supported for AARCH64"
+#endif
+
+#include "sse2neon.h"
+
+#define AVX2NEON_ABI static inline  __attribute__((always_inline))
+
+
+struct __m256 {
+    __m128 lo,hi;
+    __m256() {}
+};
+
+
+
+
+struct __m256i {
+    __m128i lo,hi;
+    explicit __m256i(const __m256 a) : lo(__m128i(a.lo)),hi(__m128i(a.hi)) {}
+    operator __m256() const {__m256 res; res.lo = __m128(lo);res.hi = __m128(hi); return res;}
+    __m256i() {}
+};
+
+
+
+
+struct __m256d {
+    float64x2_t lo,hi;
+    __m256d() {}
+    __m256d(const __m256& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {}
+    __m256d(const __m256i& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {}
+};
+
+#define UNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a) {type res;res.lo=basic_func(a.lo);res.hi=basic_func(a.hi);return res;}
+
+
+#define BINARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=basic_func(a.lo,b.lo);res.hi=basic_func(a.hi,b.hi);return res;}
+#define BINARY_AVX_OP_CAST(type,func,basic_func,bdst,bsrc) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=bdst(basic_func(bsrc(a.lo),bsrc(b.lo)));res.hi=bdst(basic_func(bsrc(a.hi),bsrc(b.hi)));return res;}
+
+#define TERNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b,const type& c) {type res;res.lo=basic_func(a.lo,b.lo,c.lo);res.hi=basic_func(a.hi,b.hi,c.hi);return res;}
+
+
+#define CAST_SIMD_TYPE(to,name,from,basic_dst) AVX2NEON_ABI to name(const from& a) { to res; res.lo = basic_dst(a.lo); res.hi=basic_dst(a.hi); return res;}
+
+
+
+#define _mm_stream_load_si128 _mm_load_si128
+#define _mm256_stream_load_si256 _mm256_load_si256
+
+
+AVX2NEON_ABI
+__m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
+{
+    __m128 af = _mm_castsi128_ps(a);
+    __m128 bf = _mm_castsi128_ps(b);
+    __m128 blendf = _mm_blend_ps(af, bf, imm8);
+    return _mm_castps_si128(blendf);
+}
+
+AVX2NEON_ABI
+int _mm_movemask_popcnt(__m128 a)
+{
+    return __builtin_popcount(_mm_movemask_ps(a));
+}
+
+AVX2NEON_ABI
+__m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
+{
+    float32x4_t res;
+    uint32x4_t mask_u32 = vreinterpretq_u32_m128i(mask);
+    for (int i=0;i<4;i++) {
+        if (mask_u32[i] & 0x80000000) res[i] = mem_addr[i]; else res[i] = 0;
+    }
+    return vreinterpretq_m128_f32(res);
+}
+
+AVX2NEON_ABI
+void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
+{
+    float32x4_t a_f32 = vreinterpretq_f32_m128(a);
+    uint32x4_t mask_u32 = vreinterpretq_u32_m128i(mask);
+    for (int i=0;i<4;i++) {
+        if (mask_u32[i] & 0x80000000) mem_addr[i] = a_f32[i];
+    }
+}
+
+AVX2NEON_ABI
+void _mm_maskstore_epi32 (int * mem_addr, __m128i mask, __m128i a)
+{
+    uint32x4_t mask_u32 = vreinterpretq_u32_m128i(mask);
+    int32x4_t a_s32 = vreinterpretq_s32_m128i(a);
+    for (int i=0;i<4;i++) {
+        if (mask_u32[i] & 0x80000000) mem_addr[i] = a_s32[i];
+    }
+}
+
+
+#define _mm_fmadd_ss _mm_fmadd_ps
+#define _mm_fmsub_ss _mm_fmsub_ps
+#define _mm_fnmsub_ss _mm_fnmsub_ps
+#define _mm_fnmadd_ss _mm_fnmadd_ps
+
+template<int code>
+AVX2NEON_ABI float32x4_t dpps_neon(const float32x4_t& a,const float32x4_t& b)
+{
+    float v;
+    v = 0;
+    v += (code & 0x10) ? a[0]*b[0] : 0;
+    v += (code & 0x20) ? a[1]*b[1] : 0;
+    v += (code & 0x40) ? a[2]*b[2] : 0;
+    v += (code & 0x80) ? a[3]*b[3] : 0;
+    float32x4_t res;
+    res[0] = (code & 0x1) ? v : 0;
+    res[1] = (code & 0x2) ? v : 0;
+    res[2] = (code & 0x4) ? v : 0;
+    res[3] = (code & 0x8) ? v : 0;
+    return res;
+}
+
+template<>
+inline float32x4_t dpps_neon<0x7f>(const float32x4_t& a,const float32x4_t& b)
+{
+    float v;
+    float32x4_t m = _mm_mul_ps(a,b);
+    m[3] = 0;
+    v = vaddvq_f32(m);
+    return _mm_set1_ps(v);
+}
+
+template<>
+inline float32x4_t dpps_neon<0xff>(const float32x4_t& a,const float32x4_t& b)
+{
+    float v;
+    float32x4_t m = _mm_mul_ps(a,b);
+    v = vaddvq_f32(m);
+    return _mm_set1_ps(v);
+}
+
+#define _mm_dp_ps(a,b,c) dpps_neon<c>((a),(b))
+
+
+AVX2NEON_ABI
+__m128 _mm_permutevar_ps (__m128 a, __m128i b)
+{
+    uint32x4_t b_u32 = vreinterpretq_u32_m128i(b);
+    float32x4_t x;
+    for (int i=0;i<4;i++)
+    {
+        x[i] = a[b_u32[i]];
+    }
+    return vreinterpretq_m128_f32(x);
+}
+
+AVX2NEON_ABI
+__m256i _mm256_setzero_si256()
+{
+    __m256i res;
+    res.lo = res.hi = vdupq_n_s32(0);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_setzero_ps()
+{
+    __m256 res;
+    res.lo = res.hi = vdupq_n_f32(0.0f);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_undefined_si256()
+{
+    return _mm256_setzero_si256();
+}
+
+AVX2NEON_ABI
+__m256 _mm256_undefined_ps()
+{
+    return _mm256_setzero_ps();
+}
+
+CAST_SIMD_TYPE(__m256d, _mm256_castps_pd,    __m256,  float64x2_t)
+CAST_SIMD_TYPE(__m256i, _mm256_castps_si256, __m256,  __m128i)
+CAST_SIMD_TYPE(__m256,  _mm256_castsi256_ps, __m256i, __m128)
+CAST_SIMD_TYPE(__m256,  _mm256_castpd_ps ,   __m256d, __m128)
+CAST_SIMD_TYPE(__m256d, _mm256_castsi256_pd, __m256i, float64x2_t)
+CAST_SIMD_TYPE(__m256i, _mm256_castpd_si256, __m256d, __m128i)
+
+
+
+
+AVX2NEON_ABI
+__m128 _mm256_castps256_ps128 (__m256 a)
+{
+    return a.lo;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_castsi128_si256 (__m128i a)
+{
+    __m256i res;
+    res.lo = a ;
+    res.hi = vdupq_n_s32(0);
+    return res;
+}
+
+AVX2NEON_ABI
+__m128i _mm256_castsi256_si128 (__m256i a)
+{
+    return a.lo;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_castps128_ps256 (__m128 a)
+{
+    __m256 res;
+    res.lo = a;
+    res.hi = vdupq_n_f32(0);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256 _mm256_broadcast_ss (float const * mem_addr)
+{
+    __m256 res;
+    res.lo = res.hi = vdupq_n_f32(*mem_addr);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_set_epi32 (int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0)
+{
+    __m256i res;
+    res.lo = _mm_set_epi32(e3,e2,e1,e0);
+    res.hi = _mm_set_epi32(e7,e6,e5,e4);
+    return res;
+
+}
+
+AVX2NEON_ABI
+__m256i _mm256_set1_epi32 (int a)
+{
+    __m256i res;
+    res.lo = res.hi = vdupq_n_s32(a);
+    return res;
+}
+AVX2NEON_ABI
+__m256i _mm256_set1_epi8 (int a)
+{
+    __m256i res;
+    res.lo = res.hi = vdupq_n_s8(a);
+    return res;
+}
+AVX2NEON_ABI
+__m256i _mm256_set1_epi16 (int a)
+{
+    __m256i res;
+    res.lo = res.hi = vdupq_n_s16(a);
+    return res;
+}
+
+
+
+
+AVX2NEON_ABI
+int _mm256_movemask_ps(const __m256& v)
+{
+    return (_mm_movemask_ps(v.hi) << 4) | _mm_movemask_ps(v.lo);
+}
+
+template<int imm8>
+AVX2NEON_ABI
+__m256 __mm256_permute_ps (const __m256& a)
+{
+    __m256 res;
+    res.lo = _mm_shuffle_ps(a.lo,a.lo,imm8);
+    res.hi = _mm_shuffle_ps(a.hi,a.hi,imm8);
+    return res;
+
+}
+
+#define _mm256_permute_ps(a,c) __mm256_permute_ps<c>(a)
+
+
+template<int imm8>
+AVX2NEON_ABI
+__m256 __mm256_shuffle_ps (const __m256 a,const __m256& b)
+{
+    __m256 res;
+    res.lo = _mm_shuffle_ps(a.lo,b.lo,imm8);
+    res.hi = _mm_shuffle_ps(a.hi,b.hi,imm8);
+    return res;
+
+}
+
+template<int imm8>
+AVX2NEON_ABI
+__m256i __mm256_shuffle_epi32 (const __m256i a)
+{
+    __m256i res;
+    res.lo = _mm_shuffle_epi32(a.lo,imm8);
+    res.hi = _mm_shuffle_epi32(a.hi,imm8);
+    return res;
+
+}
+
+template<int imm8>
+AVX2NEON_ABI
+__m256i __mm256_srli_si256 (__m256i a)
+{
+    __m256i res;
+    res.lo = _mm_srli_si128(a.lo,imm8);
+    res.hi = _mm_srli_si128(a.hi,imm8);
+    return res;
+}
+
+template<int imm8>
+AVX2NEON_ABI
+__m256i __mm256_slli_si256 (__m256i a)
+{
+    __m256i res;
+    res.lo = _mm_slli_si128(a.lo,imm8);
+    res.hi = _mm_slli_si128(a.hi,imm8);
+    return res;
+}
+
+
+#define _mm256_srli_si256(a,b) __mm256_srli_si256<b>(a)
+#define _mm256_slli_si256(a,b) __mm256_slli_si256<b>(a)
+
+
+
+#define _mm256_shuffle_ps(a,b,c) __mm256_shuffle_ps<c>(a,b)
+#define _mm256_shuffle_epi32(a,c) __mm256_shuffle_epi32<c>(a)
+
+
+AVX2NEON_ABI
+__m256i _mm256_set1_epi64x (long long a)
+{
+    __m256i res;
+    int64x2_t t = vdupq_n_s64(a);
+    res.lo = res.hi = __m128i(t);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8)
+{
+    __m256 res;
+    __m128 tmp;
+    switch (imm8 & 0x7)
+    {
+        case 0: tmp = a.lo; break;
+        case 1: tmp = a.hi; break;
+        case 2: tmp = b.lo; break;
+        case 3: tmp = b.hi; break;
+    }
+    if (imm8 & 0x8)
+        tmp = _mm_setzero_ps();
+
+
+
+    res.lo = tmp;
+    imm8 >>= 4;
+
+    switch (imm8 & 0x7)
+    {
+        case 0: tmp = a.lo; break;
+        case 1: tmp = a.hi; break;
+        case 2: tmp = b.lo; break;
+        case 3: tmp = b.hi; break;
+    }
+    if (imm8 & 0x8)
+        tmp = _mm_setzero_ps();
+
+    res.hi = tmp;
+
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_moveldup_ps (__m256 a)
+{
+    __m256 res;
+    res.lo = _mm_moveldup_ps(a.lo);
+    res.hi = _mm_moveldup_ps(a.hi);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_movehdup_ps (__m256 a)
+{
+    __m256 res;
+    res.lo = _mm_movehdup_ps(a.lo);
+    res.hi = _mm_movehdup_ps(a.hi);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8)
+{
+    __m256 res = a;
+    if (imm8 & 1) res.hi = b;
+    else res.lo = b;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m128 _mm256_extractf128_ps (__m256 a, const int imm8)
+{
+    if (imm8 & 1) return a.hi;
+    return a.lo;
+}
+
+
+AVX2NEON_ABI
+__m256d _mm256_movedup_pd (__m256d a)
+{
+    __m256d res;
+    res.lo = _mm_movedup_pd(a.lo);
+    res.hi = _mm_movedup_pd(a.hi);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_abs_epi32(__m256i a)
+{
+   __m256i res;
+   res.lo = vabsq_s32(a.lo);
+   res.hi = vabsq_s32(a.hi);
+   return res;
+}
+
+UNARY_AVX_OP(__m256,_mm256_sqrt_ps,_mm_sqrt_ps)
+UNARY_AVX_OP(__m256,_mm256_rsqrt_ps,_mm_rsqrt_ps)
+UNARY_AVX_OP(__m256,_mm256_rcp_ps,_mm_rcp_ps)
+UNARY_AVX_OP(__m256,_mm256_floor_ps,vrndmq_f32)
+UNARY_AVX_OP(__m256,_mm256_ceil_ps,vrndpq_f32)
+UNARY_AVX_OP(__m256i,_mm256_abs_epi16,_mm_abs_epi16)
+
+
+BINARY_AVX_OP(__m256i,_mm256_add_epi8,_mm_add_epi8)
+BINARY_AVX_OP(__m256i,_mm256_adds_epi8,_mm_adds_epi8)
+
+BINARY_AVX_OP(__m256i,_mm256_hadd_epi32,_mm_hadd_epi32)
+BINARY_AVX_OP(__m256i,_mm256_add_epi32,_mm_add_epi32)
+BINARY_AVX_OP(__m256i,_mm256_sub_epi32,_mm_sub_epi32)
+BINARY_AVX_OP(__m256i,_mm256_mullo_epi32,_mm_mullo_epi32)
+
+BINARY_AVX_OP(__m256i,_mm256_min_epi32,_mm_min_epi32)
+BINARY_AVX_OP(__m256i,_mm256_max_epi32,_mm_max_epi32)
+BINARY_AVX_OP(__m256i,_mm256_min_epi16,_mm_min_epi16)
+BINARY_AVX_OP(__m256i,_mm256_max_epi16,_mm_max_epi16)
+BINARY_AVX_OP(__m256i,_mm256_min_epi8,_mm_min_epi8)
+BINARY_AVX_OP(__m256i,_mm256_max_epi8,_mm_max_epi8)
+BINARY_AVX_OP(__m256i,_mm256_min_epu16,_mm_min_epu16)
+BINARY_AVX_OP(__m256i,_mm256_max_epu16,_mm_max_epu16)
+BINARY_AVX_OP(__m256i,_mm256_min_epu8,_mm_min_epu8)
+BINARY_AVX_OP(__m256i,_mm256_max_epu8,_mm_max_epu8)
+BINARY_AVX_OP(__m256i,_mm256_sign_epi16,_mm_sign_epi16)
+
+
+BINARY_AVX_OP_CAST(__m256i,_mm256_min_epu32,vminq_u32,__m128i,uint32x4_t)
+BINARY_AVX_OP_CAST(__m256i,_mm256_max_epu32,vmaxq_u32,__m128i,uint32x4_t)
+
+BINARY_AVX_OP(__m256,_mm256_min_ps,_mm_min_ps)
+BINARY_AVX_OP(__m256,_mm256_max_ps,_mm_max_ps)
+
+BINARY_AVX_OP(__m256,_mm256_add_ps,_mm_add_ps)
+BINARY_AVX_OP(__m256,_mm256_mul_ps,_mm_mul_ps)
+BINARY_AVX_OP(__m256,_mm256_sub_ps,_mm_sub_ps)
+BINARY_AVX_OP(__m256,_mm256_div_ps,_mm_div_ps)
+
+BINARY_AVX_OP(__m256,_mm256_and_ps,_mm_and_ps)
+BINARY_AVX_OP(__m256,_mm256_andnot_ps,_mm_andnot_ps)
+BINARY_AVX_OP(__m256,_mm256_or_ps,_mm_or_ps)
+BINARY_AVX_OP(__m256,_mm256_xor_ps,_mm_xor_ps)
+
+BINARY_AVX_OP_CAST(__m256d,_mm256_and_pd,vandq_s64,float64x2_t,int64x2_t)
+BINARY_AVX_OP_CAST(__m256d,_mm256_or_pd,vorrq_s64,float64x2_t,int64x2_t)
+BINARY_AVX_OP_CAST(__m256d,_mm256_xor_pd,veorq_s64,float64x2_t,int64x2_t)
+
+
+
+BINARY_AVX_OP(__m256i,_mm256_and_si256,_mm_and_si128)
+BINARY_AVX_OP(__m256i,_mm256_andnot_si256,_mm_andnot_si128)
+BINARY_AVX_OP(__m256i,_mm256_or_si256,_mm_or_si128)
+BINARY_AVX_OP(__m256i,_mm256_xor_si256,_mm_xor_si128)
+
+
+BINARY_AVX_OP(__m256,_mm256_unpackhi_ps,_mm_unpackhi_ps)
+BINARY_AVX_OP(__m256,_mm256_unpacklo_ps,_mm_unpacklo_ps)
+TERNARY_AVX_OP(__m256,_mm256_blendv_ps,_mm_blendv_ps)
+TERNARY_AVX_OP(__m256i,_mm256_blendv_epi8,_mm_blendv_epi8)
+
+
+TERNARY_AVX_OP(__m256,_mm256_fmadd_ps,_mm_fmadd_ps)
+TERNARY_AVX_OP(__m256,_mm256_fnmadd_ps,_mm_fnmadd_ps)
+TERNARY_AVX_OP(__m256,_mm256_fmsub_ps,_mm_fmsub_ps)
+TERNARY_AVX_OP(__m256,_mm256_fnmsub_ps,_mm_fnmsub_ps)
+
+
+
+BINARY_AVX_OP(__m256i,_mm256_packs_epi32,_mm_packs_epi32)
+BINARY_AVX_OP(__m256i,_mm256_packs_epi16,_mm_packs_epi16)
+BINARY_AVX_OP(__m256i,_mm256_packus_epi32,_mm_packus_epi32)
+BINARY_AVX_OP(__m256i,_mm256_packus_epi16,_mm_packus_epi16)
+
+
+BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi64,_mm_unpackhi_epi64)
+BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi32,_mm_unpackhi_epi32)
+BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi16,_mm_unpackhi_epi16)
+BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi8,_mm_unpackhi_epi8)
+
+BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi64,_mm_unpacklo_epi64)
+BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi32,_mm_unpacklo_epi32)
+BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi16,_mm_unpacklo_epi16)
+BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi8,_mm_unpacklo_epi8)
+
+BINARY_AVX_OP(__m256i,_mm256_mulhrs_epi16,_mm_mulhrs_epi16)
+BINARY_AVX_OP(__m256i,_mm256_mulhi_epu16,_mm_mulhi_epu16)
+BINARY_AVX_OP(__m256i,_mm256_mulhi_epi16,_mm_mulhi_epi16)
+//BINARY_AVX_OP(__m256i,_mm256_mullo_epu16,_mm_mullo_epu16)
+BINARY_AVX_OP(__m256i,_mm256_mullo_epi16,_mm_mullo_epi16)
+
+BINARY_AVX_OP(__m256i,_mm256_subs_epu16,_mm_subs_epu16)
+BINARY_AVX_OP(__m256i,_mm256_adds_epu16,_mm_adds_epu16)
+BINARY_AVX_OP(__m256i,_mm256_subs_epi16,_mm_subs_epi16)
+BINARY_AVX_OP(__m256i,_mm256_adds_epi16,_mm_adds_epi16)
+BINARY_AVX_OP(__m256i,_mm256_sub_epi16,_mm_sub_epi16)
+BINARY_AVX_OP(__m256i,_mm256_add_epi16,_mm_add_epi16)
+BINARY_AVX_OP(__m256i,_mm256_sub_epi8,_mm_sub_epi8)
+
+
+BINARY_AVX_OP(__m256i,_mm256_hadd_epi16,_mm_hadd_epi16)
+BINARY_AVX_OP(__m256i,_mm256_hadds_epi16,_mm_hadds_epi16)
+
+
+
+
+BINARY_AVX_OP(__m256i,_mm256_cmpeq_epi32,_mm_cmpeq_epi32)
+BINARY_AVX_OP(__m256i,_mm256_cmpgt_epi32,_mm_cmpgt_epi32)
+
+BINARY_AVX_OP(__m256i,_mm256_cmpeq_epi8,_mm_cmpeq_epi8)
+BINARY_AVX_OP(__m256i,_mm256_cmpgt_epi8,_mm_cmpgt_epi8)
+
+BINARY_AVX_OP(__m256i,_mm256_cmpeq_epi16,_mm_cmpeq_epi16)
+BINARY_AVX_OP(__m256i,_mm256_cmpgt_epi16,_mm_cmpgt_epi16)
+
+
+BINARY_AVX_OP(__m256i,_mm256_shuffle_epi8,_mm_shuffle_epi8)
+
+
+BINARY_AVX_OP(__m256,_mm256_cmpeq_ps,_mm_cmpeq_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpneq_ps,_mm_cmpneq_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpnlt_ps,_mm_cmpnlt_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpngt_ps,_mm_cmpngt_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpge_ps,_mm_cmpge_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpnge_ps,_mm_cmpnge_ps)
+BINARY_AVX_OP(__m256,_mm256_cmplt_ps,_mm_cmplt_ps)
+BINARY_AVX_OP(__m256,_mm256_cmple_ps,_mm_cmple_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpgt_ps,_mm_cmpgt_ps)
+BINARY_AVX_OP(__m256,_mm256_cmpnle_ps,_mm_cmpnle_ps)
+
+
+AVX2NEON_ABI
+__m256i _mm256_cvtps_epi32 (__m256 a)
+{
+    __m256i res;
+    res.lo = _mm_cvtps_epi32(a.lo);
+    res.hi = _mm_cvtps_epi32(a.hi);
+    return res;
+
+}
+
+AVX2NEON_ABI
+__m256i _mm256_cvttps_epi32 (__m256 a)
+{
+    __m256i res;
+    res.lo = _mm_cvttps_epi32(a.lo);
+    res.hi = _mm_cvttps_epi32(a.hi);
+    return res;
+
+}
+
+AVX2NEON_ABI
+__m256 _mm256_loadu_ps (float const * mem_addr)
+{
+    __m256 res;
+    res.lo = *(__m128 *)(mem_addr + 0);
+    res.hi = *(__m128 *)(mem_addr + 4);
+    return res;
+}
+#define _mm256_load_ps _mm256_loadu_ps
+
+
+AVX2NEON_ABI
+int _mm256_testz_ps (const __m256& a, const __m256& b)
+{
+    __m256 t = a;
+    if (&a != &b)
+        t = _mm256_and_ps(a,b);
+
+    int32x4_t l  = vshrq_n_s32(vreinterpretq_s32_m128(t.lo),31);
+    int32x4_t h  = vshrq_n_s32(vreinterpretq_s32_m128(t.hi),31);
+    return vaddvq_s32(vaddq_s32(l,h)) == 0;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0)
+{
+    __m256i res;
+    int64x2_t t0 = {e0,e1};
+    int64x2_t t1 = {e2,e3};
+    res.lo = __m128i(t0);
+    res.hi = __m128i(t1);
+    return res;
+}
+AVX2NEON_ABI
+__m256i _mm256_setr_epi64x (int64_t e0, int64_t e1, int64_t e2, int64_t e3)
+{
+    __m256i res;
+    int64x2_t t0 = {e0,e1};
+    int64x2_t t1 = {e2,e3};
+    res.lo = __m128i(t0);
+    res.hi = __m128i(t1);
+    return res;
+}
+
+
+
+AVX2NEON_ABI
+__m256i _mm256_set_epi8 (char e31, char e30, char e29, char e28, char e27, char e26, char e25, char e24, char e23, char e22, char e21, char e20, char e19, char e18, char e17, char e16, char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
+{
+    int8x16_t lo = {e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15};
+    int8x16_t hi = {e16,e17,e18,e19,e20,e21,e22,e23,e24,e25,e26,e27,e28,e29,e30,e31};
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_setr_epi8 (char e0, char e1, char e2, char e3, char e4, char e5, char e6, char e7, char e8, char e9, char e10, char e11, char e12, char e13, char e14, char e15, char e16, char e17, char e18, char e19, char e20, char e21, char e22, char e23, char e24, char e25, char e26, char e27, char e28, char e29, char e30, char e31)
+{
+    int8x16_t lo = {e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15};
+    int8x16_t hi = {e16,e17,e18,e19,e20,e21,e22,e23,e24,e25,e26,e27,e28,e29,e30,e31};
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_set_epi16 (short e15, short e14, short e13, short e12, short e11, short e10, short e9, short e8, short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
+{
+    int16x8_t lo = {e0,e1,e2,e3,e4,e5,e6,e7};
+    int16x8_t hi = {e8,e9,e10,e11,e12,e13,e14,e15};
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_setr_epi16 (short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7, short e8, short e9, short e10, short e11, short e12, short e13, short e14, short e15)
+{
+    int16x8_t lo = {e0,e1,e2,e3,e4,e5,e6,e7};
+    int16x8_t hi = {e8,e9,e10,e11,e12,e13,e14,e15};
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+
+
+AVX2NEON_ABI
+int _mm256_movemask_epi8(const __m256i& a)
+{
+    return (_mm_movemask_epi8(a.hi) << 16) | _mm_movemask_epi8(a.lo);
+}
+
+
+AVX2NEON_ABI
+int _mm256_testz_si256(const __m256i& a,const __m256i& b)
+{
+    uint32x4_t lo = vandq_u32(a.lo,b.lo);
+    uint32x4_t hi = vandq_u32(a.hi,b.hi);
+
+    return (vaddvq_u32(lo) + vaddvq_u32(hi)) == 0;
+}
+
+AVX2NEON_ABI
+__m256d _mm256_setzero_pd ()
+{
+    __m256d res;
+    res.lo = res.hi = vdupq_n_f64(0);
+    return res;
+}
+
+AVX2NEON_ABI
+int _mm256_movemask_pd (__m256d a)
+{
+    return (_mm_movemask_pd(a.hi) << 2) | _mm_movemask_pd(a.lo);
+}
+
+AVX2NEON_ABI
+__m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
+{
+    __m256i res;
+    res.lo = _mm_cmpeq_epi64(a.lo, b.lo);
+    res.hi = _mm_cmpeq_epi64(a.hi, b.hi);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256d _mm256_cmpeq_pd (__m256d a, __m256d b)
+{
+    __m256d res;
+    res.lo = _mm_cmpeq_pd(a.lo, b.lo);
+    res.hi = _mm_cmpeq_pd(a.hi, b.hi);
+    return res;
+}
+
+
+AVX2NEON_ABI
+int _mm256_testz_pd (const __m256d& a, const __m256d& b)
+{
+    __m256d t = a;
+
+    if (&a != &b)
+        t = _mm256_and_pd(a,b);
+
+    return _mm256_movemask_pd(t) == 0;
+}
+
+AVX2NEON_ABI
+__m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask)
+{
+    __m256d res;
+    res.lo = _mm_blendv_pd(a.lo, b.lo, mask.lo);
+    res.hi = _mm_blendv_pd(a.hi, b.hi, mask.hi);
+    return res;
+}
+
+template<int imm8>
+AVX2NEON_ABI
+__m256 __mm256_dp_ps (__m256 a, __m256 b)
+{
+    __m256 res;
+    res.lo = _mm_dp_ps(a.lo, b.lo, imm8);
+    res.hi = _mm_dp_ps(a.hi, b.hi, imm8);
+    return res;
+}
+
+#define _mm256_dp_ps(a,b,c) __mm256_dp_ps<c>(a,b)
+
+AVX2NEON_ABI
+double _mm256_permute4x64_pd_select(__m256d a, const int imm8)
+{
+    switch (imm8 & 3) {
+        case 0:
+            return ((float64x2_t)a.lo)[0];
+        case 1:
+            return ((float64x2_t)a.lo)[1];
+        case 2:
+            return ((float64x2_t)a.hi)[0];
+        case 3:
+            return ((float64x2_t)a.hi)[1];
+    }
+    __builtin_unreachable();
+    return 0;
+}
+
+AVX2NEON_ABI
+__m256d _mm256_permute4x64_pd (__m256d a, const int imm8)
+{
+    float64x2_t lo,hi;
+    lo[0] = _mm256_permute4x64_pd_select(a,imm8 >> 0);
+    lo[1] = _mm256_permute4x64_pd_select(a,imm8 >> 2);
+    hi[0] = _mm256_permute4x64_pd_select(a,imm8 >> 4);
+    hi[1] = _mm256_permute4x64_pd_select(a,imm8 >> 6);
+
+    __m256d res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
+{
+    return __m256i(_mm256_insertf128_ps((__m256)a,(__m128)b,imm8));
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_loadu_si256 (__m256i const * mem_addr)
+{
+    __m256i res;
+    res.lo = *(__m128i *)((int32_t *)mem_addr + 0);
+    res.hi = *(__m128i *)((int32_t *)mem_addr + 4);
+    return res;
+}
+
+#define _mm256_load_si256 _mm256_loadu_si256
+
+AVX2NEON_ABI
+void _mm256_storeu_ps (float * mem_addr, __m256 a)
+{
+    *(__m128 *)(mem_addr + 0) = a.lo;
+    *(__m128 *)(mem_addr + 4) = a.hi;
+}
+
+#define _mm256_store_ps _mm256_storeu_ps
+#define _mm256_stream_ps _mm256_storeu_ps
+
+
+AVX2NEON_ABI
+void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
+{
+    *(__m128i *)((int32_t *)mem_addr + 0) = a.lo;
+    *(__m128i *)((int32_t *)mem_addr + 4) = a.hi;
+}
+
+#define _mm256_store_si256 _mm256_storeu_si256
+
+
+
+AVX2NEON_ABI
+__m256i _mm256_permute4x64_epi64 (const __m256i a, const int imm8)
+{
+    uint8x16x2_t tbl = {a.lo, a.hi};
+
+    uint8_t sz = sizeof(uint64_t);
+    uint8_t u64[4] = {
+        (uint8_t)(((imm8 >> 0) & 0x3) * sz),
+        (uint8_t)(((imm8 >> 2) & 0x3) * sz),
+        (uint8_t)(((imm8 >> 4) & 0x3) * sz),
+        (uint8_t)(((imm8 >> 6) & 0x3) * sz),
+    };
+
+    uint8x16_t idx_lo = {
+        // lo[0] bytes
+        (uint8_t)(u64[0]+0), (uint8_t)(u64[0]+1), (uint8_t)(u64[0]+2), (uint8_t)(u64[0]+3),
+        (uint8_t)(u64[0]+4), (uint8_t)(u64[0]+5), (uint8_t)(u64[0]+6), (uint8_t)(u64[0]+7),
+
+        // lo[1] bytes
+        (uint8_t)(u64[1]+0), (uint8_t)(u64[1]+1), (uint8_t)(u64[1]+2), (uint8_t)(u64[1]+3),
+        (uint8_t)(u64[1]+4), (uint8_t)(u64[1]+5), (uint8_t)(u64[1]+6), (uint8_t)(u64[1]+7),
+    };
+    uint8x16_t idx_hi = {
+        // hi[0] bytes
+        (uint8_t)(u64[2]+0), (uint8_t)(u64[2]+1), (uint8_t)(u64[2]+2), (uint8_t)(u64[2]+3),
+        (uint8_t)(u64[2]+4), (uint8_t)(u64[2]+5), (uint8_t)(u64[2]+6), (uint8_t)(u64[2]+7),
+
+        // hi[1] bytes
+        (uint8_t)(u64[3]+0), (uint8_t)(u64[3]+1), (uint8_t)(u64[3]+2), (uint8_t)(u64[3]+3),
+        (uint8_t)(u64[3]+4), (uint8_t)(u64[3]+5), (uint8_t)(u64[3]+6), (uint8_t)(u64[3]+7),
+    };
+
+    uint8x16_t lo = vqtbl2q_u8(tbl, idx_lo);
+    uint8x16_t hi = vqtbl2q_u8(tbl, idx_hi);
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_permute2x128_si256(const __m256i a,const __m256i b, const int imm8)
+{
+    return __m256i(_mm256_permute2f128_ps(__m256(a),__m256(b),imm8));
+}
+
+
+
+AVX2NEON_ABI
+__m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
+{
+    __m256 res;
+    res.lo = _mm_maskload_ps(mem_addr,mask.lo);
+    res.hi = _mm_maskload_ps(mem_addr + 4,mask.hi);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_cvtepu8_epi32 (__m128i a)
+{
+    uint8x16_t a_u8 = vreinterpretq_u8_m128i(a);     // xxxx xxxx xxxx xxxx HHGG FFEE DDCC BBAA
+    uint16x8_t u16x8 = vmovl_u8(vget_low_u8(a_u8));  // 00HH 00GG 00FF 00EE 00DD 00CC 00BB 00AA
+    uint32x4_t lo = vmovl_u16(vget_low_u16(u16x8));  // 0000 00DD 0000 00CC 0000 00BB 0000 00AA
+    uint32x4_t hi = vmovl_high_u16(u16x8);           // 0000 00HH 0000 00GG 0000 00FF 0000 00EE
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_cvtepi8_epi32 (__m128i a)
+{
+    int8x16_t a_s8 = vreinterpretq_s8_m128i(a);     // xxxx xxxx xxxx xxxx HHGG FFEE DDCC BBAA
+    int16x8_t s16x8 = vmovl_s8(vget_low_s8(a_s8));  // ssHH ssGG ssFF ssEE ssDD ssCC ssBB ssAA
+    int32x4_t lo = vmovl_s16(vget_low_s16(s16x8));  // ssss ssDD ssss ssCC ssss ssBB ssss ssAA
+    int32x4_t hi = vmovl_high_s16(s16x8);           // ssss ssHH ssss ssGG ssss ssFF ssss ssEE
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_cvtepi16_epi32 (__m128i a)
+{
+    int16x8_t a_s16 = vreinterpretq_s16_m128i(a);   // HHHH GGGG FFFF EEEE DDDD CCCC BBBB AAAA
+    int32x4_t lo = vmovl_s16(vget_low_s16(a_s16));  // ssss DDDD ssss CCCC ssss BBBB ssss AAAA
+    int32x4_t hi = vmovl_high_s16(a_s16);           // ssss HHHH ssss GGGG ssss FFFF ssss EEEE
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+
+AVX2NEON_ABI
+void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
+{
+    _mm_maskstore_epi32(mem_addr,mask.lo,a.lo);
+    _mm_maskstore_epi32(mem_addr + 4,mask.hi,a.hi);
+}
+
+AVX2NEON_ABI
+__m256i _mm256_slli_epi64 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_slli_epi64(a.lo,imm8);
+    res.hi = _mm_slli_epi64(a.hi,imm8);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_slli_epi32 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_slli_epi32(a.lo,imm8);
+    res.hi = _mm_slli_epi32(a.hi,imm8);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i __mm256_slli_epi16 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_slli_epi16(a.lo,imm8);
+    res.hi = _mm_slli_epi16(a.hi,imm8);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_srli_epi32 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_srli_epi32(a.lo,imm8);
+    res.hi = _mm_srli_epi32(a.hi,imm8);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i __mm256_srli_epi16 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_srli_epi16(a.lo,imm8);
+    res.hi = _mm_srli_epi16(a.hi,imm8);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_cvtepu16_epi32(__m128i a)
+{
+    __m256i res;
+    res.lo = vmovl_u16(vget_low_u16(a));
+    res.hi = vmovl_high_u16(a);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_cvtepu8_epi16(__m128i a)
+{
+    __m256i res;
+    res.lo = vmovl_u8(vget_low_u8(a));
+    res.hi = vmovl_high_u8(a);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_srai_epi32 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_srai_epi32(a.lo,imm8);
+    res.hi = _mm_srai_epi32(a.hi,imm8);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256i _mm256_srai_epi16 (__m256i a, int imm8)
+{
+    __m256i res;
+    res.lo = _mm_srai_epi16(a.lo,imm8);
+    res.hi = _mm_srai_epi16(a.hi,imm8);
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
+{
+    __m256i res;
+    res.lo = vshlq_s32(a.lo,count.lo);
+    res.hi = vshlq_s32(a.hi,count.hi);
+    return res;
+
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_srav_epi32 (__m256i a, __m256i count)
+{
+    __m256i res;
+    res.lo = vshlq_s32(a.lo,vnegq_s32(count.lo));
+    res.hi = vshlq_s32(a.hi,vnegq_s32(count.hi));
+    return res;
+
+}
+
+AVX2NEON_ABI
+__m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
+{
+    __m256i res;
+    res.lo = __m128i(vshlq_u32(uint32x4_t(a.lo),vnegq_s32(count.lo)));
+    res.hi = __m128i(vshlq_u32(uint32x4_t(a.hi),vnegq_s32(count.hi)));
+    return res;
+
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
+{
+    return __m256i(_mm256_permute2f128_ps(__m256(a),__m256(b),imm8));
+}
+
+
+AVX2NEON_ABI
+__m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
+{
+    if (imm8 & 1) return a.hi;
+    return a.lo;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_set1_ps(float x)
+{
+    __m256 res;
+    res.lo = res.hi = vdupq_n_f32(x);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_set_ps (float e7, float e6, float e5, float e4, float e3, float e2, float e1, float e0)
+{
+    __m256 res;
+    res.lo = _mm_set_ps(e3,e2,e1,e0);
+    res.hi = _mm_set_ps(e7,e6,e5,e4);
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_broadcast_ps (__m128 const * mem_addr)
+{
+    __m256 res;
+    res.lo = res.hi = *mem_addr;
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_cvtepi32_ps (__m256i a)
+{
+    __m256 res;
+    res.lo = _mm_cvtepi32_ps(a.lo);
+    res.hi = _mm_cvtepi32_ps(a.hi);
+    return res;
+}
+AVX2NEON_ABI
+void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
+{
+    uint32x4_t mask_lo = mask.lo;
+    uint32x4_t mask_hi = mask.hi;
+    float32x4_t a_lo = a.lo;
+    float32x4_t a_hi = a.hi;
+
+    for (int i=0;i<4;i++) {
+        if (mask_lo[i] & 0x80000000) mem_addr[i] = a_lo[i];
+        if (mask_hi[i] & 0x80000000) mem_addr[i+4] = a_hi[i];
+    }
+}
+
+AVX2NEON_ABI
+__m256d _mm256_andnot_pd (__m256d a, __m256d b)
+{
+    __m256d res;
+    res.lo = float64x2_t(_mm_andnot_ps(__m128(a.lo),__m128(b.lo)));
+    res.hi = float64x2_t(_mm_andnot_ps(__m128(a.hi),__m128(b.hi)));
+    return res;
+}
+
+AVX2NEON_ABI
+__m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8)
+{
+    __m256 res;
+    res.lo = _mm_blend_ps(a.lo,b.lo,imm8 & 0xf);
+    res.hi = _mm_blend_ps(a.hi,b.hi,imm8 >> 4);
+    return res;
+
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
+{
+    return __m256i(_mm256_blend_ps(__m256(a),__m256(b),imm8));
+
+}
+
+AVX2NEON_ABI
+__m256i _mm256_blend_epi16 (__m256i a, __m256i b, const int imm8)
+{
+    __m256i res;
+    res.lo = _mm_blend_epi16(a.lo,b.lo,imm8);
+    res.hi = _mm_blend_epi16(a.hi,b.hi,imm8);
+    return res;
+}
+
+
+
+AVX2NEON_ABI
+__m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
+{
+    int32x4_t vindex_lo = vindex.lo;
+    int32x4_t vindex_hi = vindex.hi;
+    int32x4_t lo,hi;
+    for (int i=0;i<4;i++)
+    {
+        lo[i] = *(int32_t *)((char *) base_addr + (vindex_lo[i]*scale));
+        hi[i] = *(int32_t *)((char *) base_addr + (vindex_hi[i]*scale));
+    }
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}
+
+
+AVX2NEON_ABI
+__m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
+{
+    uint32x4_t mask_lo = mask.lo;
+    uint32x4_t mask_hi = mask.hi;
+    int32x4_t vindex_lo = vindex.lo;
+    int32x4_t vindex_hi = vindex.hi;
+    int32x4_t lo,hi;
+    lo = hi = _mm_setzero_si128();
+    for (int i=0;i<4;i++)
+    {
+        if (mask_lo[i] >> 31) lo[i] = *(int32_t *)((char *) base_addr + (vindex_lo[i]*scale));
+        if (mask_hi[i] >> 31) hi[i] = *(int32_t *)((char *) base_addr + (vindex_hi[i]*scale));
+    }
+
+    __m256i res;
+    res.lo = lo; res.hi = hi;
+    return res;
+}

+ 53 - 18
thirdparty/embree/common/simd/arm/emulation.h

@@ -11,33 +11,28 @@
 
 #include "sse2neon.h"
 
-__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
-   __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c)));
-  return _mm_fmadd_ps(a, b, neg_c);
-}
-
-__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
-#if defined(__aarch64__)
-    return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c),
-                                            vreinterpretq_f32_m128(b),
-                                            vreinterpretq_f32_m128(a)));
-#else
-    return _mm_sub_ps(c, _mm_mul_ps(a, b));
-#endif
-}
+__forceinline __m128 _mm_abs_ps(__m128 a) { return vabsq_f32(a); }
+
+__forceinline __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c) { return vfmaq_f32(c, a, b); }
+__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { return vfmsq_f32(c, a, b); }
+__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { return vnegq_f32(vfmaq_f32(c, a, b)); }
+__forceinline __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) { return vnegq_f32(vfmsq_f32(c, a, b)); }
 
-__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
-  return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c))));
+__forceinline __m128 _mm_broadcast_ss (float const * mem_addr)
+{
+    return vdupq_n_f32(*mem_addr);
 }
 
+// AVX2 emulation leverages Intel FMA defs above.  Include after them.
+#include "avx2neon.h"
 
 /* Dummy defines for floating point control */
 #define _MM_MASK_MASK 0x1f80
 #define _MM_MASK_DIV_ZERO 0x200
-#define _MM_FLUSH_ZERO_ON 0x8000
+// #define _MM_FLUSH_ZERO_ON 0x8000
 #define _MM_MASK_DENORM 0x100
 #define _MM_SET_EXCEPTION_MASK(x)
-#define _MM_SET_FLUSH_ZERO_MODE(x)
+// #define _MM_SET_FLUSH_ZERO_MODE(x)
 
 __forceinline int _mm_getcsr()
 {
@@ -48,3 +43,43 @@ __forceinline void _mm_mfence()
 {
   __sync_synchronize();
 }
+
+__forceinline __m128i _mm_load4epu8_epi32(__m128i *ptr)
+{
+    uint8x8_t  t0 = vld1_u8((uint8_t*)ptr);
+    uint16x8_t t1 = vmovl_u8(t0);
+    uint32x4_t t2 = vmovl_u16(vget_low_u16(t1));
+    return vreinterpretq_s32_u32(t2);
+}
+
+__forceinline __m128i _mm_load4epu16_epi32(__m128i *ptr)
+{
+    uint16x8_t t0 = vld1q_u16((uint16_t*)ptr);
+    uint32x4_t t1 = vmovl_u16(vget_low_u16(t0));
+    return vreinterpretq_s32_u32(t1);
+}
+
+__forceinline __m128i _mm_load4epi8_f32(__m128i *ptr)
+{
+    int8x8_t    t0 = vld1_s8((int8_t*)ptr);
+    int16x8_t   t1 = vmovl_s8(t0);
+    int32x4_t   t2 = vmovl_s16(vget_low_s16(t1));
+    float32x4_t t3 = vcvtq_f32_s32(t2);
+    return vreinterpretq_s32_f32(t3);
+}
+
+__forceinline __m128i _mm_load4epu8_f32(__m128i *ptr)
+{
+    uint8x8_t   t0 = vld1_u8((uint8_t*)ptr);
+    uint16x8_t  t1 = vmovl_u8(t0);
+    uint32x4_t  t2 = vmovl_u16(vget_low_u16(t1));
+    return vreinterpretq_s32_u32(t2);
+}
+
+__forceinline __m128i _mm_load4epi16_f32(__m128i *ptr)
+{
+    int16x8_t   t0 = vld1q_s16((int16_t*)ptr);
+    int32x4_t   t1 = vmovl_s16(vget_low_s16(t0));
+    float32x4_t t2 = vcvtq_f32_s32(t1);
+    return vreinterpretq_s32_f32(t2);
+}

文件差异内容过多而无法显示
+ 2215 - 779
thirdparty/embree/common/simd/arm/sse2neon.h


+ 1 - 1
thirdparty/embree/common/simd/simd.h

@@ -6,7 +6,7 @@
 #include "../math/math.h"
 
 /* include SSE wrapper classes */
-#if defined(__SSE__)
+#if defined(__SSE__) || defined(__ARM_NEON)
 #  include "sse.h"
 #endif
 

+ 1 - 1
thirdparty/embree/common/simd/sse.h

@@ -11,7 +11,7 @@
 
 namespace embree 
 {
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
   __forceinline __m128 blendv_ps(__m128 f, __m128 t, __m128 mask) { 
     return _mm_blendv_ps(f,t,mask);
   }

+ 6 - 1
thirdparty/embree/common/simd/vboold4_avx.h

@@ -62,7 +62,11 @@ namespace embree
     ////////////////////////////////////////////////////////////////////////////////
 
     __forceinline vboold(FalseTy) : v(_mm256_setzero_pd()) {}
+#if !defined(__aarch64__)
     __forceinline vboold(TrueTy)  : v(_mm256_cmp_pd(_mm256_setzero_pd(), _mm256_setzero_pd(), _CMP_EQ_OQ)) {}
+#else
+    __forceinline vboold(TrueTy)  : v(_mm256_cmpeq_pd(_mm256_setzero_pd(), _mm256_setzero_pd())) {}
+#endif
 
     ////////////////////////////////////////////////////////////////////////////////
     /// Array Access
@@ -107,9 +111,10 @@ namespace embree
   /// Movement/Shifting/Shuffling Functions
   ////////////////////////////////////////////////////////////////////////////////
 
+#if !defined(__aarch64__)
   __forceinline vboold4 unpacklo(const vboold4& a, const vboold4& b) { return _mm256_unpacklo_pd(a, b); }
   __forceinline vboold4 unpackhi(const vboold4& a, const vboold4& b) { return _mm256_unpackhi_pd(a, b); }
-
+#endif
 
 #if defined(__AVX2__)
   template<int i0, int i1, int i2, int i3>

+ 1 - 1
thirdparty/embree/common/simd/vboolf16_avx512.h

@@ -116,7 +116,7 @@ namespace embree
   __forceinline size_t popcnt  (const vboolf16& a) { return popcnt(a.v); }
   
   ////////////////////////////////////////////////////////////////////////////////
-  /// Convertion Operations
+  /// Conversion Operations
   ////////////////////////////////////////////////////////////////////////////////
 
   __forceinline unsigned int toInt (const vboolf16& a) { return mm512_mask2int(a); }

+ 20 - 4
thirdparty/embree/common/simd/vboolf4_sse2.h

@@ -36,9 +36,11 @@ namespace embree
 
     __forceinline vboolf(__m128 input) : v(input) {}
     __forceinline operator const __m128&() const { return v; }
+    #if !defined(__EMSCRIPTEN__)
     __forceinline operator const __m128i() const { return _mm_castps_si128(v); }
     __forceinline operator const __m128d() const { return _mm_castps_pd(v); }
-    
+    #endif
+
     __forceinline vboolf(bool a)
       : v(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
     __forceinline vboolf(bool a, bool b)
@@ -100,7 +102,7 @@ namespace embree
   __forceinline vboolf4 operator ==(const vboolf4& a, const vboolf4& b) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); }
   
   __forceinline vboolf4 select(const vboolf4& m, const vboolf4& t, const vboolf4& f) {
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
     return _mm_blendv_ps(f, t, m); 
 #else
     return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); 
@@ -114,6 +116,17 @@ namespace embree
   __forceinline vboolf4 unpacklo(const vboolf4& a, const vboolf4& b) { return _mm_unpacklo_ps(a, b); }
   __forceinline vboolf4 unpackhi(const vboolf4& a, const vboolf4& b) { return _mm_unpackhi_ps(a, b); }
 
+#if defined(__aarch64__)
+  template<int i0, int i1, int i2, int i3>
+  __forceinline vboolf4 shuffle(const vboolf4& v) {
+    return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32(v), _MN_SHUFFLE(i0, i1, i2, i3)));
+  }
+
+  template<int i0, int i1, int i2, int i3>
+  __forceinline vboolf4 shuffle(const vboolf4& a, const vboolf4& b) {
+    return vreinterpretq_f32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3)));
+  }
+#else
   template<int i0, int i1, int i2, int i3>
   __forceinline vboolf4 shuffle(const vboolf4& v) {
     return _mm_castsi128_ps(_mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0)));
@@ -123,6 +136,7 @@ namespace embree
   __forceinline vboolf4 shuffle(const vboolf4& a, const vboolf4& b) {
     return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
   }
+#endif
 
   template<int i0>
   __forceinline vboolf4 shuffle(const vboolf4& v) {
@@ -135,7 +149,7 @@ namespace embree
   template<> __forceinline vboolf4 shuffle<0, 1, 0, 1>(const vboolf4& v) { return _mm_castpd_ps(_mm_movedup_pd(v)); }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) && !defined(__aarch64__)
   template<int dst, int src, int clr> __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
   template<int dst, int src> __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return insert<dst, src, 0>(a, b); }
   template<int dst> __forceinline vboolf4 insert(const vboolf4& a, const bool b) { return insert<dst, 0>(a, vboolf4(b)); }
@@ -157,7 +171,9 @@ namespace embree
   __forceinline bool none(const vboolf4& valid, const vboolf4& b) { return none(valid & b); }
   
   __forceinline size_t movemask(const vboolf4& a) { return _mm_movemask_ps(a); }
-#if defined(__SSE4_2__)
+#if defined(__aarch64__)
+  __forceinline size_t popcnt(const vboolf4& a) { return vaddvq_s32(vandq_u32(vreinterpretq_u32_f32(a.v),_mm_set1_epi32(1))); }
+#elif defined(__SSE4_2__)
   __forceinline size_t popcnt(const vboolf4& a) { return popcnt((size_t)_mm_movemask_ps(a)); }
 #else
   __forceinline size_t popcnt(const vboolf4& a) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3]); }

+ 1 - 1
thirdparty/embree/common/simd/vboolf8_avx.h

@@ -76,7 +76,7 @@ namespace embree
     ////////////////////////////////////////////////////////////////////////////////
 
     __forceinline vboolf(FalseTy) : v(_mm256_setzero_ps()) {}
-    __forceinline vboolf(TrueTy)  : v(_mm256_cmp_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _CMP_EQ_OQ)) {}
+    __forceinline vboolf(TrueTy)  : v(_mm256_castsi256_ps(_mm256_set1_epi32(0xFFFFFFFF))) {}
 
     ////////////////////////////////////////////////////////////////////////////////
     /// Array Access

+ 8 - 1
thirdparty/embree/common/simd/vdouble4_avx.h

@@ -189,13 +189,20 @@ namespace embree
   __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_GE); }
   __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_GT); }
   __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_LE); }
-#else
+#elif !defined(__aarch64__)
   __forceinline vboold4 operator ==(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_EQ_OQ);  }
   __forceinline vboold4 operator !=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ); }
   __forceinline vboold4 operator < (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_LT_OS);  }
   __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NLT_US); }
   __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NLE_US); }
   __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_LE_OS);  }
+#else
+  __forceinline vboold4 operator ==(const vdouble4& a, const vdouble4& b) { return _mm256_cmpeq_pd(a, b);  }
+  __forceinline vboold4 operator !=(const vdouble4& a, const vdouble4& b) { return _mm256_cmpneq_pd(a, b); }
+  __forceinline vboold4 operator < (const vdouble4& a, const vdouble4& b) { return _mm256_cmplt_pd(a, b);  }
+  __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmpnlt_pd(a, b); }
+  __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmpnle_pd(a, b); }
+  __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmple_pd(a, b);  }
 #endif
 
   __forceinline vboold4 operator ==(const vdouble4& a, double          b) { return a == vdouble4(b); }

+ 3 - 2
thirdparty/embree/common/simd/vfloat16_avx512.h

@@ -177,9 +177,10 @@ namespace embree
   __forceinline vfloat16 abs    (const vfloat16& a) { return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(a),_mm512_set1_epi32(0x7FFFFFFF))); }
   __forceinline vfloat16 signmsk(const vfloat16& a) { return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(a),_mm512_set1_epi32(0x80000000))); }
 
-  __forceinline vfloat16 rcp(const vfloat16& a) {
+  __forceinline vfloat16 rcp(const vfloat16& a)
+  {
     const vfloat16 r = _mm512_rcp14_ps(a);
-    return _mm512_mul_ps(r, _mm512_fnmadd_ps(r, a, vfloat16(2.0f)));
+    return _mm512_fmadd_ps(r, _mm512_fnmadd_ps(a, r, vfloat16(1.0)), r);  // computes r + r * (1 - a*r)
   }
 
   __forceinline vfloat16 sqr (const vfloat16& a) { return _mm512_mul_ps(a,a); }

+ 108 - 27
thirdparty/embree/common/simd/vfloat4_sse2.h

@@ -42,6 +42,11 @@ namespace embree
     __forceinline vfloat(float a, float b, float c, float d) : v(_mm_set_ps(d, c, b, a)) {}
 
     __forceinline explicit vfloat(const vint4& a) : v(_mm_cvtepi32_ps(a)) {}
+#if defined(__aarch64__)
+    __forceinline explicit vfloat(const vuint4& x) {
+        v = vcvtq_f32_u32(vreinterpretq_u32_s32(x.v));
+    }
+#else
     __forceinline explicit vfloat(const vuint4& x) {
       const __m128i a   = _mm_and_si128(x,_mm_set1_epi32(0x7FFFFFFF));
       const __m128i b   = _mm_and_si128(_mm_srai_epi32(x,31),_mm_set1_epi32(0x4F000000)); //0x4F000000 = 2^31 
@@ -49,7 +54,7 @@ namespace embree
       const __m128  bf  = _mm_castsi128_ps(b);  
       v  = _mm_add_ps(af,bf);
     }
-
+#endif
     ////////////////////////////////////////////////////////////////////////////////
     /// Constants
     ////////////////////////////////////////////////////////////////////////////////
@@ -107,7 +112,11 @@ namespace embree
 #endif
   }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    static __forceinline vfloat4 load(const char* ptr) {
+        return __m128(_mm_load4epi8_f32(((__m128i*)ptr)));
+    }
+#elif defined(__SSE4_1__)
     static __forceinline vfloat4 load(const char* ptr) {
       return _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_loadu_si128((__m128i*)ptr)));
     }
@@ -117,7 +126,11 @@ namespace embree
     }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    static __forceinline vfloat4 load(const unsigned char* ptr) {
+        return __m128(_mm_load4epu8_f32(((__m128i*)ptr)));
+    }
+#elif defined(__SSE4_1__)
     static __forceinline vfloat4 load(const unsigned char* ptr) {
       return _mm_cvtepi32_ps(_mm_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr)));
     }
@@ -128,7 +141,11 @@ namespace embree
     }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    static __forceinline vfloat4 load(const short* ptr) {
+        return __m128(_mm_load4epi16_f32(((__m128i*)ptr)));
+    }
+#elif defined(__SSE4_1__)
     static __forceinline vfloat4 load(const short* ptr) {
       return _mm_cvtepi32_ps(_mm_cvtepi16_epi32(_mm_loadu_si128((__m128i*)ptr)));
     }
@@ -145,7 +162,11 @@ namespace embree
     static __forceinline void store_nt(void* ptr, const vfloat4& v)
     {
 #if defined (__SSE4_1__)
+#if defined(__aarch64__)
       _mm_stream_ps((float*)ptr,v);
+#else
+      _mm_stream_ps((float*)ptr,v);
+#endif
 #else
       _mm_store_ps((float*)ptr,v);
 #endif
@@ -153,7 +174,7 @@ namespace embree
 
     template<int scale = 4>
     static __forceinline vfloat4 gather(const float* ptr, const vint4& index) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
       return _mm_i32gather_ps(ptr, index, scale);
 #else
       return vfloat4(
@@ -169,7 +190,7 @@ namespace embree
       vfloat4 r = zero;
 #if defined(__AVX512VL__)
       return _mm_mmask_i32gather_ps(r, mask, index, ptr, scale);
-#elif defined(__AVX2__)
+#elif defined(__AVX2__)  && !defined(__aarch64__)
       return _mm_mask_i32gather_ps(r, ptr, index, mask, scale);
 #else
       if (likely(mask[0])) r[0] = *(float*)(((char*)ptr)+scale*index[0]);
@@ -223,8 +244,8 @@ namespace embree
     friend __forceinline vfloat4 select(const vboolf4& m, const vfloat4& t, const vfloat4& f) {
 #if defined(__AVX512VL__)
       return _mm_mask_blend_ps(m, f, t);
-#elif defined(__SSE4_1__)
-      return _mm_blendv_ps(f, t, m); 
+#elif defined(__SSE4_1__) || (defined(__aarch64__))
+      return _mm_blendv_ps(f, t, m);
 #else
       return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); 
 #endif
@@ -256,18 +277,34 @@ namespace embree
   __forceinline vfloat4 toFloat(const vint4&   a) { return vfloat4(a); }
 
   __forceinline vfloat4 operator +(const vfloat4& a) { return a; }
+#if defined(__aarch64__)
+  __forceinline vfloat4 operator -(const vfloat4& a) {
+    return vnegq_f32(a);
+  }
+#else
   __forceinline vfloat4 operator -(const vfloat4& a) { return _mm_xor_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); }
+#endif
 
+#if defined(__aarch64__)
+  __forceinline vfloat4 abs(const vfloat4& a) { return _mm_abs_ps(a); }
+#else
   __forceinline vfloat4 abs(const vfloat4& a) { return _mm_and_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); }
+#endif
+
 #if defined(__AVX512VL__)
   __forceinline vfloat4 sign(const vfloat4& a) { return _mm_mask_blend_ps(_mm_cmp_ps_mask(a, vfloat4(zero), _CMP_LT_OQ), vfloat4(one), -vfloat4(one)); }
 #else
   __forceinline vfloat4 sign(const vfloat4& a) { return blendv_ps(vfloat4(one), -vfloat4(one), _mm_cmplt_ps(a, vfloat4(zero))); }
 #endif
+
   __forceinline vfloat4 signmsk(const vfloat4& a) { return _mm_and_ps(a,_mm_castsi128_ps(_mm_set1_epi32(0x80000000))); }
-  
+
   __forceinline vfloat4 rcp(const vfloat4& a)
   {
+#if defined(__aarch64__)
+    return vfloat4(vdivq_f32(vdupq_n_f32(1.0f),a.v));
+#else
+
 #if defined(__AVX512VL__)
     const vfloat4 r = _mm_rcp14_ps(a);
 #else
@@ -275,29 +312,38 @@ namespace embree
 #endif
 
 #if defined(__AVX2__)
-    return _mm_mul_ps(r,_mm_fnmadd_ps(r, a, vfloat4(2.0f)));
+    return _mm_fmadd_ps(r, _mm_fnmadd_ps(a, r, vfloat4(1.0f)), r);                    // computes r + r * (1 - a * r)
 #else
-    return _mm_mul_ps(r,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r, a)));
+    return _mm_add_ps(r,_mm_mul_ps(r, _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r))));  // computes r + r * (1 - a * r)
 #endif
+
+#endif  //defined(__aarch64__)
   }
   __forceinline vfloat4 sqr (const vfloat4& a) { return _mm_mul_ps(a,a); }
   __forceinline vfloat4 sqrt(const vfloat4& a) { return _mm_sqrt_ps(a); }
 
   __forceinline vfloat4 rsqrt(const vfloat4& a)
   {
+#if defined(__aarch64__)
+    vfloat4 r = _mm_rsqrt_ps(a);
+    r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r));
+    r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r));
+    r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r));
+    return r;
+#else
+
 #if defined(__AVX512VL__)
     vfloat4 r = _mm_rsqrt14_ps(a);
 #else
     vfloat4 r = _mm_rsqrt_ps(a);
 #endif
 
-#if defined(__ARM_NEON)
-    r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
-    r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
-#elif defined(__AVX2__)
+#if defined(__AVX2__)
     r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
 #else
     r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
+#endif
+
 #endif
     return r;
   }
@@ -344,7 +390,8 @@ namespace embree
   __forceinline vfloat4 max(const vfloat4& a, float          b) { return _mm_max_ps(a,vfloat4(b)); }
   __forceinline vfloat4 max(float          a, const vfloat4& b) { return _mm_max_ps(vfloat4(a),b); }
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) || defined(__aarch64__)
+
     __forceinline vfloat4 mini(const vfloat4& a, const vfloat4& b) {
       const vint4 ai = _mm_castps_si128(a);
       const vint4 bi = _mm_castps_si128(b);
@@ -393,9 +440,10 @@ namespace embree
   __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return _mm_fnmsub_ps(a,b,c); }
 #else
   __forceinline vfloat4 madd (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b+c; }
-  __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b-c; }
   __forceinline vfloat4 nmadd(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b+c;}
   __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b-c; }
+  __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b-c; }
+
 #endif
 
   ////////////////////////////////////////////////////////////////////////////////
@@ -429,8 +477,13 @@ namespace embree
   __forceinline vboolf4 operator ==(const vfloat4& a, const vfloat4& b) { return _mm_cmpeq_ps (a, b); }
   __forceinline vboolf4 operator !=(const vfloat4& a, const vfloat4& b) { return _mm_cmpneq_ps(a, b); }
   __forceinline vboolf4 operator < (const vfloat4& a, const vfloat4& b) { return _mm_cmplt_ps (a, b); }
+#if defined(__aarch64__)
+  __forceinline vboolf4 operator >=(const vfloat4& a, const vfloat4& b) { return _mm_cmpge_ps (a, b); }
+  __forceinline vboolf4 operator > (const vfloat4& a, const vfloat4& b) { return _mm_cmpgt_ps (a, b); }
+#else
   __forceinline vboolf4 operator >=(const vfloat4& a, const vfloat4& b) { return _mm_cmpnlt_ps(a, b); }
   __forceinline vboolf4 operator > (const vfloat4& a, const vfloat4& b) { return _mm_cmpnle_ps(a, b); }
+#endif
   __forceinline vboolf4 operator <=(const vfloat4& a, const vfloat4& b) { return _mm_cmple_ps (a, b); }
 #endif
 
@@ -484,7 +537,7 @@ namespace embree
     return select(vboolf4(mask), t, f);
 #endif
   }
-  
+
   __forceinline vfloat4 lerp(const vfloat4& a, const vfloat4& b, const vfloat4& t) {
     return madd(t,b-a,a);
   }
@@ -506,10 +559,10 @@ namespace embree
   ////////////////////////////////////////////////////////////////////////////////
 
 #if defined(__aarch64__)
-  __forceinline vfloat4 floor(const vfloat4& a) { return vrndmq_f32(a.v); }
-  __forceinline vfloat4 ceil (const vfloat4& a) { return vrndpq_f32(a.v); }
-  __forceinline vfloat4 trunc(const vfloat4& a) { return vrndq_f32(a.v); }
-  __forceinline vfloat4 round(const vfloat4& a) { return vrndnq_f32(a.v); }
+  __forceinline vfloat4 floor(const vfloat4& a) { return vrndmq_f32(a.v); } // towards -inf
+  __forceinline vfloat4 ceil (const vfloat4& a) { return vrndpq_f32(a.v); } // toward +inf
+  __forceinline vfloat4 trunc(const vfloat4& a) { return vrndq_f32(a.v); } // towards 0
+  __forceinline vfloat4 round(const vfloat4& a) { return vrndnq_f32(a.v); } // to nearest, ties to even. NOTE(LTE): arm clang uses vrndnq, old gcc uses vrndqn?
 #elif defined (__SSE4_1__)
   __forceinline vfloat4 floor(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF    ); }
   __forceinline vfloat4 ceil (const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF    ); }
@@ -524,7 +577,9 @@ namespace embree
   __forceinline vfloat4 frac(const vfloat4& a) { return a-floor(a); }
 
   __forceinline vint4 floori(const vfloat4& a) {
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    return vcvtq_s32_f32(floor(a));
+#elif defined(__SSE4_1__)
     return vint4(floor(a));
 #else
     return vint4(a-vfloat4(0.5f));
@@ -538,6 +593,16 @@ namespace embree
   __forceinline vfloat4 unpacklo(const vfloat4& a, const vfloat4& b) { return _mm_unpacklo_ps(a, b); }
   __forceinline vfloat4 unpackhi(const vfloat4& a, const vfloat4& b) { return _mm_unpackhi_ps(a, b); }
 
+#if defined(__aarch64__)
+      template<int i0, int i1, int i2, int i3>
+      __forceinline vfloat4 shuffle(const vfloat4& v) {
+          return vreinterpretq_f32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3)));
+      }
+      template<int i0, int i1, int i2, int i3>
+      __forceinline vfloat4 shuffle(const vfloat4& a, const vfloat4& b) {
+          return vreinterpretq_f32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3)));
+      }
+#else
   template<int i0, int i1, int i2, int i3>
   __forceinline vfloat4 shuffle(const vfloat4& v) {
     return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), _MM_SHUFFLE(i3, i2, i1, i0)));
@@ -547,8 +612,9 @@ namespace embree
   __forceinline vfloat4 shuffle(const vfloat4& a, const vfloat4& b) {
     return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
   }
+#endif
 
-#if defined(__SSE3__)
+#if defined(__SSE3__) && !defined(__aarch64__)
   template<> __forceinline vfloat4 shuffle<0, 0, 2, 2>(const vfloat4& v) { return _mm_moveldup_ps(v); }
   template<> __forceinline vfloat4 shuffle<1, 1, 3, 3>(const vfloat4& v) { return _mm_movehdup_ps(v); }
   template<> __forceinline vfloat4 shuffle<0, 1, 0, 1>(const vfloat4& v) { return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(v))); }
@@ -559,10 +625,14 @@ namespace embree
     return shuffle<i,i,i,i>(v);
   }
 
+#if defined(__aarch64__)
+  template<int i> __forceinline float extract(const vfloat4& a) { return a[i]; }
+#else
   template<int i> __forceinline float extract   (const vfloat4& a) { return _mm_cvtss_f32(shuffle<i>(a)); }
   template<>      __forceinline float extract<0>(const vfloat4& a) { return _mm_cvtss_f32(a); }
+#endif
 
-#if defined (__SSE4_1__)
+#if defined (__SSE4_1__) && !defined(__aarch64__)
   template<int dst, int src, int clr> __forceinline vfloat4 insert(const vfloat4& a, const vfloat4& b) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
   template<int dst, int src> __forceinline vfloat4 insert(const vfloat4& a, const vfloat4& b) { return insert<dst, src, 0>(a, b); }
   template<int dst> __forceinline vfloat4 insert(const vfloat4& a, const float b) { return insert<dst, 0>(a, _mm_set_ss(b)); }
@@ -664,14 +734,25 @@ namespace embree
   ////////////////////////////////////////////////////////////////////////////////
   /// Reductions
   ////////////////////////////////////////////////////////////////////////////////
-
+#if defined(__aarch64__)
+  __forceinline vfloat4 vreduce_min(const vfloat4& v) { float h = vminvq_f32(v); return vdupq_n_f32(h); }
+  __forceinline vfloat4 vreduce_max(const vfloat4& v) { float h = vmaxvq_f32(v); return vdupq_n_f32(h); }
+  __forceinline vfloat4 vreduce_add(const vfloat4& v) { float h = vaddvq_f32(v); return vdupq_n_f32(h); }
+#else
   __forceinline vfloat4 vreduce_min(const vfloat4& v) { vfloat4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); }
   __forceinline vfloat4 vreduce_max(const vfloat4& v) { vfloat4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); }
   __forceinline vfloat4 vreduce_add(const vfloat4& v) { vfloat4 h = shuffle<1,0,3,2>(v)   + v ; return shuffle<2,3,0,1>(h)   + h ; }
+#endif
 
+#if defined(__aarch64__)
+  __forceinline float reduce_min(const vfloat4& v) { return vminvq_f32(v); }
+  __forceinline float reduce_max(const vfloat4& v) { return vmaxvq_f32(v); }
+  __forceinline float reduce_add(const vfloat4& v) { return vaddvq_f32(v); }
+#else
   __forceinline float reduce_min(const vfloat4& v) { return _mm_cvtss_f32(vreduce_min(v)); }
   __forceinline float reduce_max(const vfloat4& v) { return _mm_cvtss_f32(vreduce_max(v)); }
   __forceinline float reduce_add(const vfloat4& v) { return _mm_cvtss_f32(vreduce_add(v)); }
+#endif
 
   __forceinline size_t select_min(const vboolf4& valid, const vfloat4& v) 
   { 
@@ -687,7 +768,7 @@ namespace embree
   }
   
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators
+  /// Euclidean Space Operators
   ////////////////////////////////////////////////////////////////////////////////
 
   __forceinline float dot(const vfloat4& a, const vfloat4& b) {

+ 75 - 12
thirdparty/embree/common/simd/vfloat8_avx.h

@@ -107,11 +107,11 @@ namespace embree
     static __forceinline void store (const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_mask_store_ps ((float*)ptr,mask,v); }
     static __forceinline void storeu(const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_mask_storeu_ps((float*)ptr,mask,v); }
 #else
-    static __forceinline vfloat8 load (const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask); }
-    static __forceinline vfloat8 loadu(const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask); }
+    static __forceinline vfloat8 load (const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,_mm256_castps_si256(mask.v)); }
+    static __forceinline vfloat8 loadu(const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,_mm256_castps_si256(mask.v)); }
 
-    static __forceinline void store (const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,v); }
-    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,v); }
+    static __forceinline void store (const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),v); }
+    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),v); }
 #endif
     
 #if defined(__AVX2__)
@@ -126,7 +126,7 @@ namespace embree
 
     template<int scale = 4>
     static __forceinline vfloat8 gather(const float* ptr, const vint8& index) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
       return _mm256_i32gather_ps(ptr, index ,scale);
 #else
       return vfloat8(
@@ -146,7 +146,7 @@ namespace embree
       vfloat8 r = zero;
 #if defined(__AVX512VL__)
       return _mm256_mmask_i32gather_ps(r, mask, index, ptr, scale);
-#elif defined(__AVX2__)
+#elif defined(__AVX2__) && !defined(__aarch64__)
       return _mm256_mask_i32gather_ps(r, ptr, index, mask, scale);
 #else
       if (likely(mask[0])) r[0] = *(float*)(((char*)ptr)+scale*index[0]);
@@ -215,20 +215,52 @@ namespace embree
   __forceinline vfloat8 toFloat(const vint8&   a) { return vfloat8(a); }
 
   __forceinline vfloat8 operator +(const vfloat8& a) { return a; }
+#if !defined(__aarch64__)
   __forceinline vfloat8 operator -(const vfloat8& a) {
     const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)); 
     return _mm256_xor_ps(a, mask);
   }
+#else
+  __forceinline vfloat8 operator -(const vfloat8& a) {
+      __m256 res;
+      res.lo = vnegq_f32(a.v.lo);
+      res.hi = vnegq_f32(a.v.hi);
+      return res;
+}
+#endif
+
+#if !defined(__aarch64__)
   __forceinline vfloat8 abs(const vfloat8& a) {
     const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff));
     return _mm256_and_ps(a, mask);
   }
+#else
+__forceinline vfloat8 abs(const vfloat8& a) {
+    __m256 res;
+    res.lo = vabsq_f32(a.v.lo);
+    res.hi = vabsq_f32(a.v.hi);
+    return res;
+}
+#endif
+
+#if !defined(__aarch64__)
   __forceinline vfloat8 sign   (const vfloat8& a) { return _mm256_blendv_ps(vfloat8(one), -vfloat8(one), _mm256_cmp_ps(a, vfloat8(zero), _CMP_NGE_UQ)); }
+#else
+  __forceinline vfloat8 sign   (const vfloat8& a) { return _mm256_blendv_ps(vfloat8(one), -vfloat8(one), _mm256_cmplt_ps(a, vfloat8(zero))); }
+#endif
   __forceinline vfloat8 signmsk(const vfloat8& a) { return _mm256_and_ps(a,_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); }
 
 
   static __forceinline vfloat8 rcp(const vfloat8& a)
   {
+#if defined(__aarch64__)
+    vfloat8 ret;
+    const float32x4_t one = vdupq_n_f32(1.0f);
+    ret.v.lo = vdivq_f32(one, a.v.lo);
+    ret.v.hi = vdivq_f32(one, a.v.hi);
+    return ret;
+#endif
+
 #if defined(__AVX512VL__)
     const vfloat8 r = _mm256_rcp14_ps(a);
 #else
@@ -236,9 +268,12 @@ namespace embree
 #endif
 
 #if defined(__AVX2__)
-    return _mm256_mul_ps(r, _mm256_fnmadd_ps(r, a, vfloat8(2.0f)));
+    // First, compute 1 - a * r (which will be very close to 0)
+    const vfloat8 h_n = _mm256_fnmadd_ps(a, r, vfloat8(1.0f));
+    // Then compute r + r * h_n
+    return _mm256_fmadd_ps(r, h_n, r);
 #else
-    return _mm256_mul_ps(r, _mm256_sub_ps(vfloat8(2.0f), _mm256_mul_ps(r, a)));
+    return _mm256_add_ps(r,_mm256_mul_ps(r, _mm256_sub_ps(vfloat8(1.0f), _mm256_mul_ps(a, r))));  // computes r + r * (1 - a * r)
 #endif
   }
   __forceinline vfloat8 sqr (const vfloat8& a) { return _mm256_mul_ps(a,a); }
@@ -384,7 +419,7 @@ namespace embree
   static __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) {
     return _mm256_mask_blend_ps(m, f, t);
   }
-#else
+#elif !defined(__aarch64__)
   static __forceinline vboolf8 operator ==(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ);  }
   static __forceinline vboolf8 operator !=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ); }
   static __forceinline vboolf8 operator < (const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS);  }
@@ -395,6 +430,18 @@ namespace embree
   static __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) {
     return _mm256_blendv_ps(f, t, m); 
   }
+#else
+  static __forceinline vboolf8 operator ==(const vfloat8& a, const vfloat8& b) { return _mm256_cmpeq_ps(a, b);  }
+  static __forceinline vboolf8 operator !=(const vfloat8& a, const vfloat8& b) { return _mm256_cmpneq_ps(a, b); }
+  static __forceinline vboolf8 operator < (const vfloat8& a, const vfloat8& b) { return _mm256_cmplt_ps(a, b);  }
+  static __forceinline vboolf8 operator >=(const vfloat8& a, const vfloat8& b) { return _mm256_cmpge_ps(a, b);  }
+  static __forceinline vboolf8 operator > (const vfloat8& a, const vfloat8& b) { return _mm256_cmpgt_ps(a, b);  }
+  static __forceinline vboolf8 operator <=(const vfloat8& a, const vfloat8& b) { return _mm256_cmple_ps(a, b);  }
+
+  static __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) {
+    return _mm256_blendv_ps(f, t, m);
+  }
+
 #endif
 
   template<int mask>
@@ -463,10 +510,17 @@ namespace embree
   /// Rounding Functions
   ////////////////////////////////////////////////////////////////////////////////
 
+#if !defined(__aarch64__)
   __forceinline vfloat8 floor(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_NEG_INF    ); }
   __forceinline vfloat8 ceil (const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_POS_INF    ); }
   __forceinline vfloat8 trunc(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_ZERO       ); }
   __forceinline vfloat8 round(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
+#else
+  __forceinline vfloat8 floor(const vfloat8& a) { return _mm256_floor_ps(a); }
+  __forceinline vfloat8 ceil (const vfloat8& a) { return _mm256_ceil_ps(a); }
+#endif
+
+
   __forceinline vfloat8 frac (const vfloat8& a) { return a-floor(a); }
 
   ////////////////////////////////////////////////////////////////////////////////
@@ -501,9 +555,11 @@ namespace embree
     return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
   }
 
+#if !defined(__aarch64__)
   template<> __forceinline vfloat8 shuffle<0, 0, 2, 2>(const vfloat8& v) { return _mm256_moveldup_ps(v); }
   template<> __forceinline vfloat8 shuffle<1, 1, 3, 3>(const vfloat8& v) { return _mm256_movehdup_ps(v); }
   template<> __forceinline vfloat8 shuffle<0, 1, 0, 1>(const vfloat8& v) { return _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(v))); }
+#endif
 
   __forceinline vfloat8 broadcast(const float* ptr) { return _mm256_broadcast_ss(ptr); }
   template<size_t i> __forceinline vfloat8 insert4(const vfloat8& a, const vfloat4& b) { return _mm256_insertf128_ps(a, b, i); }
@@ -512,7 +568,7 @@ namespace embree
 
   __forceinline float toScalar(const vfloat8& v) { return _mm_cvtss_f32(_mm256_castps256_ps128(v)); }
 
-#if defined (__AVX2__)
+#if defined (__AVX2__) && !defined(__aarch64__)
   static __forceinline vfloat8 permute(const vfloat8& a, const __m256i& index) {
     return _mm256_permutevar8x32_ps(a, index);
   }
@@ -609,7 +665,7 @@ namespace embree
   ////////////////////////////////////////////////////////////////////////////////
   /// Reductions
   ////////////////////////////////////////////////////////////////////////////////
-
+#if !defined(__aarch64__)
   __forceinline vfloat8 vreduce_min2(const vfloat8& v) { return min(v,shuffle<1,0,3,2>(v)); }
   __forceinline vfloat8 vreduce_min4(const vfloat8& v) { vfloat8 v1 = vreduce_min2(v); return min(v1,shuffle<2,3,0,1>(v1)); }
   __forceinline vfloat8 vreduce_min (const vfloat8& v) { vfloat8 v1 = vreduce_min4(v); return min(v1,shuffle4<1,0>(v1)); }
@@ -625,7 +681,14 @@ namespace embree
   __forceinline float reduce_min(const vfloat8& v) { return toScalar(vreduce_min(v)); }
   __forceinline float reduce_max(const vfloat8& v) { return toScalar(vreduce_max(v)); }
   __forceinline float reduce_add(const vfloat8& v) { return toScalar(vreduce_add(v)); }
+#else
+  __forceinline float reduce_min(const vfloat8& v) { return vminvq_f32(_mm_min_ps(v.v.lo,v.v.hi)); }
+  __forceinline float reduce_max(const vfloat8& v) { return vmaxvq_f32(_mm_max_ps(v.v.lo,v.v.hi)); }
+  __forceinline vfloat8 vreduce_min(const vfloat8& v) { return vfloat8(reduce_min(v)); }
+  __forceinline vfloat8 vreduce_max(const vfloat8& v) { return vfloat8(reduce_max(v)); }
+  __forceinline float reduce_add(const vfloat8& v) { return vaddvq_f32(_mm_add_ps(v.v.lo,v.v.hi)); }
 
+#endif
   __forceinline size_t select_min(const vboolf8& valid, const vfloat8& v) 
   { 
     const vfloat8 a = select(valid,v,vfloat8(pos_inf)); 
@@ -642,7 +705,7 @@ namespace embree
 
 
   ////////////////////////////////////////////////////////////////////////////////
-  /// Euclidian Space Operators (pairs of Vec3fa's)
+  /// Euclidean Space Operators (pairs of Vec3fa's)
   ////////////////////////////////////////////////////////////////////////////////
 
   //__forceinline vfloat8 dot(const vfloat8& a, const vfloat8& b) {

+ 77 - 23
thirdparty/embree/common/simd/vint4_sse2.h

@@ -106,7 +106,14 @@ namespace embree
 #endif
 
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    static __forceinline vint4 load(const unsigned char* ptr) {
+        return _mm_load4epu8_epi32(((__m128i*)ptr));
+    }
+    static __forceinline vint4 loadu(const unsigned char* ptr) {
+        return  _mm_load4epu8_epi32(((__m128i*)ptr));
+    }
+#elif defined(__SSE4_1__)
     static __forceinline vint4 load(const unsigned char* ptr) {
       return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr));
     }
@@ -127,7 +134,9 @@ namespace embree
 #endif
 
     static __forceinline vint4 load(const unsigned short* ptr) {
-#if defined (__SSE4_1__)
+#if defined(__aarch64__)
+      return __m128i(vmovl_u16(vld1_u16(ptr)));
+#elif defined (__SSE4_1__)
       return _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr));
 #else
       return vint4(ptr[0],ptr[1],ptr[2],ptr[3]);
@@ -135,7 +144,12 @@ namespace embree
     } 
 
     static __forceinline void store(unsigned char* ptr, const vint4& v) {
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+        int32x4_t x = v;
+        uint16x4_t y = vqmovn_u32(uint32x4_t(x));
+        uint8x8_t z = vqmovn_u16(vcombine_u16(y, y));
+        vst1_lane_u32((uint32_t *)ptr,uint32x2_t(z), 0);
+#elif defined(__SSE4_1__)
       __m128i x = v;
       x = _mm_packus_epi32(x, x);
       x = _mm_packus_epi16(x, x);
@@ -147,20 +161,26 @@ namespace embree
     }
 
     static __forceinline void store(unsigned short* ptr, const vint4& v) {
+#if defined(__aarch64__)
+      uint32x4_t x = uint32x4_t(v.v);
+      uint16x4_t y = vqmovn_u32(x);
+      vst1_u16(ptr, y);
+#else
       for (size_t i=0;i<4;i++)
         ptr[i] = (unsigned short)v[i];
+#endif
     }
 
     static __forceinline vint4 load_nt(void* ptr) {
-#if defined(__SSE4_1__)
-      return _mm_stream_load_si128((__m128i*)ptr); 
+#if defined(__aarch64__) || defined(__SSE4_1__)
+      return _mm_stream_load_si128((__m128i*)ptr);
 #else
       return _mm_load_si128((__m128i*)ptr); 
 #endif
     }
     
     static __forceinline void store_nt(void* ptr, const vint4& v) {
-#if defined(__SSE4_1__)
+#if !defined(__aarch64__) && defined(__SSE4_1__)
       _mm_stream_ps((float*)ptr, _mm_castsi128_ps(v));
 #else
       _mm_store_si128((__m128i*)ptr,v);
@@ -169,7 +189,7 @@ namespace embree
 
     template<int scale = 4>
     static __forceinline vint4 gather(const int* ptr, const vint4& index) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
       return _mm_i32gather_epi32(ptr, index, scale);
 #else
       return vint4(
@@ -185,7 +205,7 @@ namespace embree
       vint4 r = zero;
 #if defined(__AVX512VL__)
       return _mm_mmask_i32gather_epi32(r, mask, index, ptr, scale);
-#elif defined(__AVX2__)
+#elif defined(__AVX2__) && !defined(__aarch64__)
       return _mm_mask_i32gather_epi32(r, ptr, index, mask, scale);
 #else
       if (likely(mask[0])) r[0] = *(int*)(((char*)ptr)+scale*index[0]);
@@ -222,7 +242,7 @@ namespace embree
 #endif
     }
 
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__aarch64__)
     static __forceinline vint4 broadcast64(long long a) { return _mm_set1_epi64x(a); }
 #endif
 
@@ -236,6 +256,8 @@ namespace embree
     friend __forceinline vint4 select(const vboolf4& m, const vint4& t, const vint4& f) {
 #if defined(__AVX512VL__)
       return _mm_mask_blend_epi32(m, (__m128i)f, (__m128i)t);
+#elif defined(__aarch64__)
+      return _mm_castps_si128(_mm_blendv_ps((__m128)f.v,(__m128) t.v, (__m128)m.v));
 #elif defined(__SSE4_1__)
       return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); 
 #else
@@ -256,7 +278,9 @@ namespace embree
 
   __forceinline vint4 operator +(const vint4& a) { return a; }
   __forceinline vint4 operator -(const vint4& a) { return _mm_sub_epi32(_mm_setzero_si128(), a); }
-#if defined(__SSSE3__)
+#if defined(__aarch64__)
+  __forceinline vint4 abs(const vint4& a) { return vabsq_s32(a.v); }
+#elif defined(__SSSE3__)
   __forceinline vint4 abs(const vint4& a) { return _mm_abs_epi32(a); }
 #endif
 
@@ -272,7 +296,7 @@ namespace embree
   __forceinline vint4 operator -(const vint4& a, int          b) { return a - vint4(b); }
   __forceinline vint4 operator -(int          a, const vint4& b) { return vint4(a) - b; }
 
-#if defined(__SSE4_1__)
+#if (defined(__aarch64__)) || defined(__SSE4_1__)
   __forceinline vint4 operator *(const vint4& a, const vint4& b) { return _mm_mullo_epi32(a, b); }
 #else
   __forceinline vint4 operator *(const vint4& a, const vint4& b) { return vint4(a[0]*b[0],a[1]*b[1],a[2]*b[2],a[3]*b[3]); }
@@ -292,8 +316,8 @@ namespace embree
   __forceinline vint4 operator ^(const vint4& a, int          b) { return a ^ vint4(b); }
   __forceinline vint4 operator ^(int          a, const vint4& b) { return vint4(a) ^ b; }
 
-  __forceinline vint4 operator <<(const vint4& a, int n) { return _mm_slli_epi32(a, n); }
-  __forceinline vint4 operator >>(const vint4& a, int n) { return _mm_srai_epi32(a, n); }
+  __forceinline vint4 operator <<(const vint4& a, const int n) { return _mm_slli_epi32(a, n); }
+  __forceinline vint4 operator >>(const vint4& a, const int n) { return _mm_srai_epi32(a, n); }
 
   __forceinline vint4 sll (const vint4& a, int b) { return _mm_slli_epi32(a, b); }
   __forceinline vint4 sra (const vint4& a, int b) { return _mm_srai_epi32(a, b); }
@@ -309,7 +333,7 @@ namespace embree
   __forceinline vint4& operator -=(vint4& a, const vint4& b) { return a = a - b; }
   __forceinline vint4& operator -=(vint4& a, int          b) { return a = a - b; }
 
-#if defined(__SSE4_1__)
+#if (defined(__aarch64__)) || defined(__SSE4_1__)
   __forceinline vint4& operator *=(vint4& a, const vint4& b) { return a = a * b; }
   __forceinline vint4& operator *=(vint4& a, int          b) { return a = a * b; }
 #endif
@@ -393,7 +417,7 @@ namespace embree
 #endif    
   }
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
   __forceinline vint4 min(const vint4& a, const vint4& b) { return _mm_min_epi32(a, b); }
   __forceinline vint4 max(const vint4& a, const vint4& b) { return _mm_max_epi32(a, b); }
 
@@ -417,6 +441,16 @@ namespace embree
   __forceinline vint4 unpacklo(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); }
   __forceinline vint4 unpackhi(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); }
 
+#if defined(__aarch64__)
+    template<int i0, int i1, int i2, int i3>
+    __forceinline vint4 shuffle(const vint4& v) {
+        return vreinterpretq_s32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3)));
+    }
+    template<int i0, int i1, int i2, int i3>
+    __forceinline vint4 shuffle(const vint4& a, const vint4& b) {
+        return vreinterpretq_s32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3)));
+    }
+#else
   template<int i0, int i1, int i2, int i3>
   __forceinline vint4 shuffle(const vint4& v) {
     return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0));
@@ -426,7 +460,7 @@ namespace embree
   __forceinline vint4 shuffle(const vint4& a, const vint4& b) {
     return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
   }
-
+#endif
 #if defined(__SSE3__)
   template<> __forceinline vint4 shuffle<0, 0, 2, 2>(const vint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); }
   template<> __forceinline vint4 shuffle<1, 1, 3, 3>(const vint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); }
@@ -438,7 +472,7 @@ namespace embree
     return shuffle<i,i,i,i>(v);
   }
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) && !defined(__aarch64__)
   template<int src> __forceinline int extract(const vint4& b) { return _mm_extract_epi32(b, src); }
   template<int dst> __forceinline vint4 insert(const vint4& a, const int b) { return _mm_insert_epi32(a, b, dst); }
 #else
@@ -446,18 +480,27 @@ namespace embree
   template<int dst> __forceinline vint4 insert(const vint4& a, int b) { vint4 c = a; c[dst&3] = b; return c; }
 #endif
 
-
   template<> __forceinline int extract<0>(const vint4& b) { return _mm_cvtsi128_si32(b); }
-
+  
   __forceinline int toScalar(const vint4& v) { return _mm_cvtsi128_si32(v); }
-
-  __forceinline size_t toSizeT(const vint4& v) { 
+  
+#if defined(__aarch64__)
+  __forceinline size_t toSizeT(const vint4& v) {
+    uint64x2_t x = uint64x2_t(v.v);
+    return x[0];
+  }
+#else
+__forceinline size_t toSizeT(const vint4& v) { 
 #if defined(__WIN32__) && !defined(__X86_64__) // win32 workaround
     return toScalar(v);
+#elif defined(__ARM_NEON)
+    // FIXME(LTE): Do we need a swap(i.e. use lane 1)?
+    return vgetq_lane_u64(*(reinterpret_cast<const uint64x2_t *>(&v)), 0);
 #else
     return _mm_cvtsi128_si64(v); 
 #endif
   }
+#endif
 
 #if defined(__AVX512VL__)
 
@@ -475,7 +518,17 @@ namespace embree
   /// Reductions
   ////////////////////////////////////////////////////////////////////////////////
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__) || defined(__SSE4_1__)
+
+#if defined(__aarch64__)
+    __forceinline vint4 vreduce_min(const vint4& v) { int h = vminvq_s32(v); return vdupq_n_s32(h); }
+    __forceinline vint4 vreduce_max(const vint4& v) { int h = vmaxvq_s32(v); return vdupq_n_s32(h); }
+    __forceinline vint4 vreduce_add(const vint4& v) { int h = vaddvq_s32(v); return vdupq_n_s32(h); }
+
+    __forceinline int reduce_min(const vint4& v) { return vminvq_s32(v); }
+    __forceinline int reduce_max(const vint4& v) { return vmaxvq_s32(v); }
+    __forceinline int reduce_add(const vint4& v) { return vaddvq_s32(v); }
+#else
   __forceinline vint4 vreduce_min(const vint4& v) { vint4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); }
   __forceinline vint4 vreduce_max(const vint4& v) { vint4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); }
   __forceinline vint4 vreduce_add(const vint4& v) { vint4 h = shuffle<1,0,3,2>(v)   + v ; return shuffle<2,3,0,1>(h)   + h ; }
@@ -483,6 +536,7 @@ namespace embree
   __forceinline int reduce_min(const vint4& v) { return toScalar(vreduce_min(v)); }
   __forceinline int reduce_max(const vint4& v) { return toScalar(vreduce_max(v)); }
   __forceinline int reduce_add(const vint4& v) { return toScalar(vreduce_add(v)); }
+#endif
 
   __forceinline size_t select_min(const vint4& v) { return bsf(movemask(v == vreduce_min(v))); }
   __forceinline size_t select_max(const vint4& v) { return bsf(movemask(v == vreduce_max(v))); }
@@ -502,7 +556,7 @@ namespace embree
   /// Sorting networks
   ////////////////////////////////////////////////////////////////////////////////
 
-#if defined(__SSE4_1__)
+#if (defined(__aarch64__)) || defined(__SSE4_1__)
 
   __forceinline vint4 usort_ascending(const vint4& v)
   {

+ 2 - 2
thirdparty/embree/common/simd/vint8_avx.h

@@ -79,8 +79,8 @@ namespace embree
     static __forceinline void store (void* ptr, const vint8& f) { _mm256_store_ps((float*)ptr,_mm256_castsi256_ps(f)); }
     static __forceinline void storeu(void* ptr, const vint8& f) { _mm256_storeu_ps((float*)ptr,_mm256_castsi256_ps(f)); }
     
-    static __forceinline void store (const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); }
-    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); }
+    static __forceinline void store (const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),_mm256_castsi256_ps(f)); }
+    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),_mm256_castsi256_ps(f)); }
 
     static __forceinline void store_nt(void* ptr, const vint8& v) {
       _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v));

+ 4 - 0
thirdparty/embree/common/simd/vint8_avx2.h

@@ -393,6 +393,7 @@ namespace embree
 
   __forceinline int toScalar(const vint8& v) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(v)); }
 
+#if !defined(__aarch64__)
   __forceinline vint8 permute(const vint8& v, const __m256i& index) {
     return _mm256_permutevar8x32_epi32(v, index);
   }
@@ -410,6 +411,9 @@ namespace embree
 #endif
   }  
 
+#endif
+
+
   ////////////////////////////////////////////////////////////////////////////////
   /// Reductions
   ////////////////////////////////////////////////////////////////////////////////

+ 28 - 10
thirdparty/embree/common/simd/vuint4_sse2.h

@@ -95,7 +95,14 @@ namespace embree
     static __forceinline void storeu(const vboolf4& mask, void* ptr, const vuint4& i) { storeu(ptr,select(mask,i,loadu(ptr))); }
 #endif
 
-#if defined(__SSE4_1__)
+#if defined(__aarch64__)
+    static __forceinline vuint4 load(const unsigned char* ptr) {
+        return _mm_load4epu8_epi32(((__m128i*)ptr));
+    }
+    static __forceinline vuint4 loadu(const unsigned char* ptr) {
+        return _mm_load4epu8_epi32(((__m128i*)ptr));
+    }
+#elif defined(__SSE4_1__)
     static __forceinline vuint4 load(const unsigned char* ptr) {
       return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr));
     }
@@ -107,7 +114,9 @@ namespace embree
 #endif
 
     static __forceinline vuint4 load(const unsigned short* ptr) {
-#if defined (__SSE4_1__)
+#if defined(__aarch64__)
+      return _mm_load4epu16_epi32(((__m128i*)ptr));
+#elif defined (__SSE4_1__)
       return _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr));
 #else
       return vuint4(ptr[0],ptr[1],ptr[2],ptr[3]);
@@ -115,7 +124,7 @@ namespace embree
     } 
 
     static __forceinline vuint4 load_nt(void* ptr) {
-#if defined(__SSE4_1__)
+#if (defined(__aarch64__)) || defined(__SSE4_1__)
       return _mm_stream_load_si128((__m128i*)ptr); 
 #else
       return _mm_load_si128((__m128i*)ptr); 
@@ -123,8 +132,8 @@ namespace embree
     }
     
     static __forceinline void store_nt(void* ptr, const vuint4& v) {
-#if defined(__SSE4_1__)
-      _mm_stream_ps((float*)ptr,_mm_castsi128_ps(v)); 
+#if !defined(__aarch64__) && defined(__SSE4_1__)
+      _mm_stream_ps((float*)ptr, _mm_castsi128_ps(v));
 #else
       _mm_store_si128((__m128i*)ptr,v);
 #endif
@@ -132,7 +141,7 @@ namespace embree
 
     template<int scale = 4>
     static __forceinline vuint4 gather(const unsigned int* ptr, const vint4& index) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
       return _mm_i32gather_epi32((const int*)ptr, index, scale);
 #else
       return vuint4(
@@ -148,7 +157,7 @@ namespace embree
       vuint4 r = zero;
 #if defined(__AVX512VL__)
       return _mm_mmask_i32gather_epi32(r, mask, index, ptr, scale);
-#elif defined(__AVX2__)
+#elif defined(__AVX2__) && !defined(__aarch64__)
       return _mm_mask_i32gather_epi32(r, (const int*)ptr, index, mask, scale);
 #else
       if (likely(mask[0])) r[0] = *(unsigned int*)(((char*)ptr)+scale*index[0]);
@@ -344,6 +353,16 @@ namespace embree
   __forceinline vuint4 unpacklo(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); }
   __forceinline vuint4 unpackhi(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); }
 
+#if defined(__aarch64__)
+  template<int i0, int i1, int i2, int i3>
+  __forceinline vuint4 shuffle(const vuint4& v) {
+    return vreinterpretq_s32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3)));
+  }
+  template<int i0, int i1, int i2, int i3>
+  __forceinline vuint4 shuffle(const vuint4& a, const vuint4& b) {
+    return vreinterpretq_s32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3)));
+  }
+#else
   template<int i0, int i1, int i2, int i3>
   __forceinline vuint4 shuffle(const vuint4& v) {
     return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0));
@@ -353,7 +372,7 @@ namespace embree
   __forceinline vuint4 shuffle(const vuint4& a, const vuint4& b) {
     return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
   }
-
+#endif
 #if defined(__SSE3__)
   template<> __forceinline vuint4 shuffle<0, 0, 2, 2>(const vuint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); }
   template<> __forceinline vuint4 shuffle<1, 1, 3, 3>(const vuint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); }
@@ -365,7 +384,7 @@ namespace embree
     return shuffle<i,i,i,i>(v);
   }
 
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) && !defined(__aarch64__)
   template<int src> __forceinline unsigned int extract(const vuint4& b) { return _mm_extract_epi32(b, src); }
   template<int dst> __forceinline vuint4 insert(const vuint4& a, const unsigned b) { return _mm_insert_epi32(a, b, dst); }
 #else
@@ -373,7 +392,6 @@ namespace embree
   template<int dst> __forceinline vuint4 insert(const vuint4& a, const unsigned b) { vuint4 c = a; c[dst&3] = b; return c; }
 #endif
 
-
   template<> __forceinline unsigned int extract<0>(const vuint4& b) { return _mm_cvtsi128_si32(b); }
 
   __forceinline unsigned int toScalar(const vuint4& v) { return _mm_cvtsi128_si32(v); }

+ 2 - 2
thirdparty/embree/common/simd/vuint8_avx.h

@@ -77,8 +77,8 @@ namespace embree
     static __forceinline void store (void* ptr, const vuint8& f) { _mm256_store_ps((float*)ptr,_mm256_castsi256_ps(f)); }
     static __forceinline void storeu(void* ptr, const vuint8& f) { _mm256_storeu_ps((float*)ptr,_mm256_castsi256_ps(f)); }
     
-    static __forceinline void store (const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); }
-    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); }
+    static __forceinline void store (const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),_mm256_castsi256_ps(f)); }
+    static __forceinline void storeu(const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,_mm256_castps_si256(mask.v),_mm256_castsi256_ps(f)); }
 
     static __forceinline void store_nt(void* ptr, const vuint8& v) {
       _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v));

+ 2 - 0
thirdparty/embree/common/simd/vuint8_avx2.h

@@ -385,6 +385,7 @@ namespace embree
 
   __forceinline int toScalar(const vuint8& v) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(v)); }
 
+#if !defined(__aarch64__)
   __forceinline vuint8 permute(const vuint8& v, const __m256i& index) {
     return _mm256_permutevar8x32_epi32(v, index);
   }
@@ -401,6 +402,7 @@ namespace embree
     return _mm256_alignr_epi8(a, b, 4*i);
 #endif
   }  
+#endif // !defined(__aarch64__)
 
   ////////////////////////////////////////////////////////////////////////////////
   /// Reductions

+ 13 - 0
thirdparty/embree/common/simd/wasm/emulation.h

@@ -0,0 +1,13 @@
+// Copyright 2009-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+// According to https://emscripten.org/docs/porting/simd.html, _MM_SET_EXCEPTION_MASK and
+// _mm_setcsr are unavailable in WebAssembly.
+
+#define _MM_SET_EXCEPTION_MASK(x)
+
+__forceinline void _mm_setcsr(unsigned int)
+{
+}

+ 6 - 6
thirdparty/embree/common/sys/array.h

@@ -59,8 +59,8 @@ namespace embree
 
       /********************** Iterators  ****************************/
 
-      __forceinline T* begin() const { return items; };
-      __forceinline T* end  () const { return items+M; };
+      __forceinline T* begin() const { return (T*)items; };
+      __forceinline T* end  () const { return (T*)items+M; };
 
 
       /********************** Capacity ****************************/
@@ -101,8 +101,8 @@ namespace embree
       __forceinline       T& at(size_t i)       { assert(i < M); return items[i]; }
       __forceinline const T& at(size_t i) const { assert(i < M); return items[i]; }
 
-      __forceinline T& front() const { assert(M > 0); return items[0]; };
-      __forceinline T& back () const { assert(M > 0); return items[M-1]; };
+      __forceinline T& front() { assert(M > 0); return items[0]; };
+      __forceinline T& back () { assert(M > 0); return items[M-1]; };
 
       __forceinline       T* data()       { return items; };
       __forceinline const T* data() const { return items; };
@@ -139,7 +139,7 @@ namespace embree
     __forceinline       Ty& operator[](const unsigned i)       { assert(i<N); return data[i]; }
     __forceinline const Ty& operator[](const unsigned i) const { assert(i<N); return data[i]; }
 
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
     __forceinline       Ty& operator[](const size_t i)       { assert(i<N); return data[i]; }
     __forceinline const Ty& operator[](const size_t i) const { assert(i<N); return data[i]; }
 #endif
@@ -196,7 +196,7 @@ namespace embree
     __forceinline       Ty& operator[](const int i)      { assert(i>=0 && i<max_total_elements); resize(i+1); return data[i]; }
     __forceinline       Ty& operator[](const unsigned i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
 
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
     __forceinline       Ty& operator[](const size_t i) { assert(i<max_total_elements); resize(i+1); return data[i]; }
 #endif
 

+ 2 - 2
thirdparty/embree/common/sys/barrier.h

@@ -24,7 +24,7 @@ namespace embree
     BarrierSys& operator= (const BarrierSys& other) DELETED; // do not implement
 
   public:
-    /*! intializes the barrier with some number of threads */
+    /*! initializes the barrier with some number of threads */
     void init(size_t count);
 
     /*! lets calling thread wait in barrier */
@@ -94,7 +94,7 @@ namespace embree
     LinearBarrierActive& operator= (const LinearBarrierActive& other) DELETED; // do not implement
 
   public:
-    /*! intializes the barrier with some number of threads */
+    /*! initializes the barrier with some number of threads */
     void init(size_t threadCount);
     
     /*! thread with threadIndex waits in the barrier */

+ 74 - 50
thirdparty/embree/common/sys/intrinsics.h

@@ -13,6 +13,9 @@
 #include "../simd/arm/emulation.h"
 #else
 #include <immintrin.h>
+#if defined(__EMSCRIPTEN__)
+#include "../simd/wasm/emulation.h"
+#endif
 #endif
 
 #if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
@@ -24,24 +27,26 @@
   #endif
 #endif
 
-#if defined(__LZCNT__)
+#if defined(__aarch64__)
   #if !defined(_lzcnt_u32)
-    #define _lzcnt_u32 __lzcnt32
+    #define _lzcnt_u32 __builtin_clz
   #endif
-  #if !defined(_lzcnt_u64)
-    #define _lzcnt_u64 __lzcnt64
+#else
+  #if defined(__LZCNT__)
+    #if !defined(_lzcnt_u32)
+      #define _lzcnt_u32 __lzcnt32
+    #endif
+    #if !defined(_lzcnt_u64)
+      #define _lzcnt_u64 __lzcnt64
+    #endif
   #endif
 #endif
 
 #if defined(__WIN32__)
-// -- GODOT start --
-#if !defined(NOMINMAX)
-// -- GODOT end --
-#define NOMINMAX
-// -- GODOT start --
-#endif
-#include "windows.h"
-// -- GODOT end --
+#  if !defined(NOMINMAX)
+#    define NOMINMAX
+#  endif
+#  include <windows.h>
 #endif
 
 /* normally defined in pmmintrin.h, but we always need this */
@@ -69,7 +74,7 @@ namespace embree
   }
   
   __forceinline int bsf(int v) {
-#if defined(__AVX2__) 
+#if defined(__AVX2__) && !defined(__aarch64__)
     return _tzcnt_u32(v);
 #else
     unsigned long r = 0; _BitScanForward(&r,v); return r;
@@ -77,7 +82,7 @@ namespace embree
   }
   
   __forceinline unsigned bsf(unsigned v) {
-#if defined(__AVX2__) 
+#if defined(__AVX2__) && !defined(__aarch64__)
     return _tzcnt_u32(v);
 #else
     unsigned long r = 0; _BitScanForward(&r,v); return r;
@@ -118,7 +123,7 @@ namespace embree
 #endif
   
   __forceinline int bsr(int v) {
-#if defined(__AVX2__) 
+#if defined(__AVX2__)  && !defined(__aarch64__)
     return 31 - _lzcnt_u32(v);
 #else
     unsigned long r = 0; _BitScanReverse(&r,v); return r;
@@ -126,7 +131,7 @@ namespace embree
   }
   
   __forceinline unsigned bsr(unsigned v) {
-#if defined(__AVX2__) 
+#if defined(__AVX2__) && !defined(__aarch64__)
     return 31 - _lzcnt_u32(v);
 #else
     unsigned long r = 0; _BitScanReverse(&r,v); return r;
@@ -145,7 +150,7 @@ namespace embree
   
   __forceinline int lzcnt(const int x)
   {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
     return _lzcnt_u32(x);
 #else
     if (unlikely(x == 0)) return 32;
@@ -214,15 +219,26 @@ namespace embree
 #elif defined(__X86_ASM__)
 
   __forceinline void __cpuid(int out[4], int op) {
-    asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); 
+#if defined(__ARM_NEON)
+    if (op == 0) { // Get CPU name
+      out[0] = 0x41524d20;
+      out[1] = 0x41524d20;
+      out[2] = 0x41524d20;
+      out[3] = 0x41524d20;
+    }
+#else
+    asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
+#endif
   }
-  
+
+#if !defined(__ARM_NEON)
   __forceinline void __cpuid_count(int out[4], int op1, int op2) {
     asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); 
   }
-  
 #endif
-  
+
+#endif
+
   __forceinline uint64_t read_tsc()  {
 #if defined(__X86_ASM__)
     uint32_t high,low;
@@ -235,30 +251,38 @@ namespace embree
   }
   
   __forceinline int bsf(int v) {
-#if defined(__AVX2__) 
+#if defined(__ARM_NEON)
+    return __builtin_ctz(v);
+#else
+#if defined(__AVX2__)
     return _tzcnt_u32(v);
 #elif defined(__X86_ASM__)
     int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
 #else
     return __builtin_ctz(v);
+#endif
 #endif
   }
   
 #if defined(__64BIT__)
   __forceinline unsigned bsf(unsigned v) 
   {
-#if defined(__AVX2__) 
+#if defined(__ARM_NEON)
+    return __builtin_ctz(v);
+#else
+#if defined(__AVX2__)
     return _tzcnt_u32(v);
 #elif defined(__X86_ASM__)
     unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
 #else
     return __builtin_ctz(v);
+#endif
 #endif
   }
 #endif
   
   __forceinline size_t bsf(size_t v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
 #if defined(__X86_64__)
     return _tzcnt_u64(v);
 #else
@@ -295,7 +319,7 @@ namespace embree
   }
   
   __forceinline int bsr(int v) {
-#if defined(__AVX2__) 
+#if defined(__AVX2__) && !defined(__aarch64__)
     return 31 - _lzcnt_u32(v);
 #elif defined(__X86_ASM__)
     int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
@@ -304,7 +328,7 @@ namespace embree
 #endif
   }
   
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
   __forceinline unsigned bsr(unsigned v) {
 #if defined(__AVX2__) 
     return 31 - _lzcnt_u32(v);
@@ -317,7 +341,7 @@ namespace embree
 #endif
   
   __forceinline size_t bsr(size_t v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
 #if defined(__X86_64__)
     return 63 - _lzcnt_u64(v);
 #else
@@ -332,7 +356,7 @@ namespace embree
   
   __forceinline int lzcnt(const int x)
   {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
     return _lzcnt_u32(x);
 #else
     if (unlikely(x == 0)) return 32;
@@ -341,18 +365,18 @@ namespace embree
   }
 
   __forceinline size_t blsr(size_t v) {
-#if defined(__AVX2__) 
-#if defined(__INTEL_COMPILER)
+#if defined(__AVX2__) && !defined(__aarch64__)
+  #if defined(__INTEL_COMPILER)
     return _blsr_u64(v);
+  #else
+    #if defined(__X86_64__)
+       return __blsr_u64(v);
+    #else
+       return __blsr_u32(v);
+    #endif
+  #endif
 #else
-#if defined(__X86_64__)
-    return __blsr_u64(v);
-#else
-    return __blsr_u32(v);
-#endif
-#endif
-#else
-    return v & (v-1);
+       return v & (v-1);
 #endif
   }
   
@@ -368,7 +392,7 @@ namespace embree
 #if defined(__X86_ASM__)
     int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
 #else
-    return (v | (v << i));
+    return (v | (1 << i));
 #endif
   }
   
@@ -376,7 +400,7 @@ namespace embree
 #if defined(__X86_ASM__)
     int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
 #else
-    return (v & ~(v << i));
+    return (v & ~(1 << i));
 #endif
   }
   
@@ -392,7 +416,7 @@ namespace embree
 #if defined(__X86_ASM__)
     size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
 #else
-    return (v | (v << i));
+    return (v | (1 << i));
 #endif
   }
   
@@ -400,7 +424,7 @@ namespace embree
 #if defined(__X86_ASM__)
     size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
 #else
-    return (v & ~(v << i));
+    return (v & ~(1 << i));
 #endif
   }
 
@@ -435,8 +459,8 @@ namespace embree
 #endif
 #endif
 
-#if defined(__SSE4_2__)
-  
+#if defined(__SSE4_2__) || defined(__ARM_NEON)
+
   __forceinline int popcnt(int in) {
     return _mm_popcnt_u32(in);
   }
@@ -483,14 +507,14 @@ namespace embree
 #endif
   }
 
-  __forceinline void prefetchL1EX(const void* ptr) { 
-    prefetchEX(ptr); 
+  __forceinline void prefetchL1EX(const void* ptr) {
+    prefetchEX(ptr);
   }
-  
-  __forceinline void prefetchL2EX(const void* ptr) { 
-    prefetchEX(ptr); 
+
+  __forceinline void prefetchL2EX(const void* ptr) {
+    prefetchEX(ptr);
   }
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
    __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); }
    __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); }
 #if defined(__X86_64__)

+ 1 - 0
thirdparty/embree/common/sys/mutex.cpp

@@ -36,6 +36,7 @@ namespace embree
     MAYBE_UNUSED bool ok = pthread_mutex_destroy((pthread_mutex_t*)mutex) == 0;
     assert(ok);
     delete (pthread_mutex_t*)mutex; 
+    mutex = nullptr;
   }
   
   void MutexSys::lock() 

+ 6 - 0
thirdparty/embree/common/sys/mutex.h

@@ -7,6 +7,7 @@
 #include "intrinsics.h"
 #include "atomic.h"
 
+#define CPU_CACHELINE_SIZE 64
 namespace embree
 {
   /*! system mutex */
@@ -83,6 +84,11 @@ namespace embree
     atomic<bool> flag;
   };
 
+  class PaddedSpinLock : public SpinLock
+  {
+    private:
+      char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
+  };
   /*! safe mutex lock and unlock helper */
   template<typename Mutex> class Lock {
   public:

+ 10 - 10
thirdparty/embree/common/sys/platform.h

@@ -92,16 +92,19 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #ifdef __WIN32__
-#define dll_export __declspec(dllexport)
-#define dll_import __declspec(dllimport)
+#  if defined(EMBREE_STATIC_LIB)
+#    define dll_export
+#    define dll_import
+#  else
+#    define dll_export __declspec(dllexport)
+#    define dll_import __declspec(dllimport)
+#  endif
 #else
-#define dll_export __attribute__ ((visibility ("default")))
-#define dll_import 
+#  define dll_export __attribute__ ((visibility ("default")))
+#  define dll_import
 #endif
 
-// -- GODOT start --
 #if defined(__WIN32__) && !defined(__MINGW32__)
-// -- GODOT end --
 #if !defined(__noinline)
 #define __noinline             __declspec(noinline)
 #endif
@@ -151,9 +154,7 @@
   #define DELETED  = delete
 #endif
 
-// -- GODOT start --
 #if !defined(likely)
-// -- GODOT end --
 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
 #define   likely(expr) (expr)
 #define unlikely(expr) (expr)
@@ -161,9 +162,7 @@
 #define   likely(expr) __builtin_expect((bool)(expr),true )
 #define unlikely(expr) __builtin_expect((bool)(expr),false)
 #endif
-// -- GODOT start --
 #endif
-// -- GODOT end --
 
 ////////////////////////////////////////////////////////////////////////////////
 /// Error handling and debugging
@@ -252,6 +251,7 @@ __forceinline std::string toString(long long value) {
 #pragma warning(disable:4800) // forcing value to bool 'true' or 'false' (performance warning)
 //#pragma warning(disable:4267) // '=' : conversion from 'size_t' to 'unsigned long', possible loss of data
 #pragma warning(disable:4244) // 'argument' : conversion from 'ssize_t' to 'unsigned int', possible loss of data
+#pragma warning(disable:4267) // conversion from 'size_t' to 'const int', possible loss of data
 //#pragma warning(disable:4355) // 'this' : used in base member initializer list
 //#pragma warning(disable:391 ) // '<=' : signed / unsigned mismatch
 //#pragma warning(disable:4018) // '<' : signed / unsigned mismatch

+ 52 - 10
thirdparty/embree/common/sys/sysinfo.cpp

@@ -21,7 +21,11 @@ namespace embree
   
   std::string getPlatformName() 
   {
-#if defined(__LINUX__) && !defined(__64BIT__)
+#if defined(__ANDROID__) && !defined(__64BIT__)
+    return "Android (32bit)";
+#elif defined(__ANDROID__) && defined(__64BIT__)
+    return "Android (64bit)";
+#elif defined(__LINUX__) && !defined(__64BIT__)
     return "Linux (32bit)";
 #elif defined(__LINUX__) && defined(__64BIT__)
     return "Linux (64bit)";
@@ -248,9 +252,7 @@ namespace embree
 #if defined(__X86_ASM__)
   __noinline int64_t get_xcr0() 
   {
-// -- GODOT start --
-#if defined (__WIN32__) && !defined (__MINGW32__)
-// -- GODOT end --
+#if defined (__WIN32__) && !defined (__MINGW32__) && defined(_XCR_XFEATURE_ENABLED_MASK)
     int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
     xcr0 = _xgetbv(0);
     return xcr0;
@@ -337,9 +339,24 @@ namespace embree
     if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
 
     return cpu_features;
-#elif defined(__ARM_NEON)
-    /* emulated features with sse2neon */
-    return CPU_FEATURE_SSE|CPU_FEATURE_SSE2|CPU_FEATURE_XMM_ENABLED;
+
+#elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)
+
+    int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2;
+    cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42;
+    cpu_features |= CPU_FEATURE_XMM_ENABLED;
+    cpu_features |= CPU_FEATURE_YMM_ENABLED;
+    cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C;
+    cpu_features |= CPU_FEATURE_POPCNT;
+    cpu_features |= CPU_FEATURE_AVX;
+    cpu_features |= CPU_FEATURE_AVX2;
+    cpu_features |= CPU_FEATURE_FMA3;
+    cpu_features |= CPU_FEATURE_LZCNT;
+    cpu_features |= CPU_FEATURE_BMI1;
+    cpu_features |= CPU_FEATURE_BMI2;
+    cpu_features |= CPU_FEATURE_NEON_2X;
+    return cpu_features;
+
 #else
     /* Unknown CPU. */
     return 0;
@@ -376,6 +393,8 @@ namespace embree
     if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
     if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
     if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
+    if (features & CPU_FEATURE_NEON) str += "NEON ";
+    if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
     return str;
   }
   
@@ -390,6 +409,9 @@ namespace embree
     if (isa == AVX) return "AVX";
     if (isa == AVX2) return "AVX2";
     if (isa == AVX512) return "AVX512";
+
+    if (isa == NEON) return "NEON";
+    if (isa == NEON_2X) return "2xNEON";
     return "UNKNOWN";
   }
 
@@ -410,6 +432,9 @@ namespace embree
     if (hasISA(features,AVXI)) v += "AVXI ";
     if (hasISA(features,AVX2)) v += "AVX2 ";
     if (hasISA(features,AVX512)) v += "AVX512 ";
+
+    if (hasISA(features,NEON)) v += "NEON ";
+    if (hasISA(features,NEON_2X)) v += "2xNEON ";
     return v;
   }
 }
@@ -613,6 +638,10 @@ namespace embree
 #include <sys/time.h>
 #include <pthread.h>
 
+#if defined(__EMSCRIPTEN__)
+#include <emscripten.h>
+#endif
+
 namespace embree
 {
   unsigned int getNumberOfLogicalThreads() 
@@ -620,12 +649,25 @@ namespace embree
     static int nThreads = -1;
     if (nThreads != -1) return nThreads;
 
-// -- GODOT start --
-// #if defined(__MACOSX__)
 #if defined(__MACOSX__) || defined(__ANDROID__)
-// -- GODOT end --
     nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
     assert(nThreads);
+#elif defined(__EMSCRIPTEN__)
+    // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
+    // threads from the browser or Node.js using JavaScript.
+    nThreads = MAIN_THREAD_EM_ASM_INT({
+        const isBrowser = typeof window !== 'undefined';
+        const isNode = typeof process !== 'undefined' && process.versions != null &&
+            process.versions.node != null;
+        if (isBrowser) {
+            // Return 1 if the browser does not expose hardwareConcurrency.
+            return window.navigator.hardwareConcurrency || 1;
+        } else if (isNode) {
+            return require('os').cpus().length;
+        } else {
+            return 1;
+        }
+    });
 #else
     cpu_set_t set;
     if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)

+ 11 - 2
thirdparty/embree/common/sys/sysinfo.h

@@ -55,7 +55,12 @@
 #  define isa sse
 #  define ISA SSE
 #  define ISA_STR "SSE"
-#else 
+#elif defined(__ARM_NEON)
+// NOTE(LTE): Use sse2 for `isa` for the compatibility at the moment.
+#define isa sse2
+#define ISA NEON
+#define ISA_STR "NEON"
+#else
 #error Unknown ISA
 #endif
 
@@ -133,7 +138,9 @@ namespace embree
   static const int CPU_FEATURE_XMM_ENABLED = 1 << 25;
   static const int CPU_FEATURE_YMM_ENABLED = 1 << 26;
   static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27;
- 
+  static const int CPU_FEATURE_NEON = 1 << 28;
+  static const int CPU_FEATURE_NEON_2X = 1 << 29;
+
   /*! get CPU features */
   int getCPUFeatures();
 
@@ -154,6 +161,8 @@ namespace embree
   static const int AVXI   = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND;
   static const int AVX2   = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT;
   static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED;
+  static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2;
+  static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2;
 
   /*! converts ISA bitvector into a string */
   std::string stringOfISA(int features);

+ 23 - 12
thirdparty/embree/common/sys/thread.cpp

@@ -10,6 +10,9 @@
 #include "../simd/arm/emulation.h"
 #else
 #include <xmmintrin.h>
+#if defined(__EMSCRIPTEN__)
+#include "../simd/wasm/emulation.h"
+#endif
 #endif
 
 #if defined(PTHREADS_WIN32)
@@ -158,9 +161,7 @@ namespace embree
 /// Linux Platform
 ////////////////////////////////////////////////////////////////////////////////
 
-// -- GODOT start --
 #if defined(__LINUX__) && !defined(__ANDROID__)
-// -- GODOT end --
 
 #include <fstream>
 #include <sstream>
@@ -219,6 +220,8 @@ namespace embree
 
     /* find correct thread to affinitize to */
     cpu_set_t set;
+    CPU_ZERO(&set);
+    
     if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
     {
       for (int i=0, j=0; i<CPU_SETSIZE; i++)
@@ -241,7 +244,8 @@ namespace embree
   {
     cpu_set_t cset;
     CPU_ZERO(&cset);
-    size_t threadID = mapThreadID(affinity);
+    //size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
+    size_t threadID = affinity;
     CPU_SET(threadID, &cset);
 
     pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
@@ -249,7 +253,6 @@ namespace embree
 }
 #endif
 
-// -- GODOT start --
 ////////////////////////////////////////////////////////////////////////////////
 /// Android Platform
 ////////////////////////////////////////////////////////////////////////////////
@@ -269,7 +272,6 @@ namespace embree
   }
 }
 #endif
-// -- GODOT end --
 
 ////////////////////////////////////////////////////////////////////////////////
 /// FreeBSD Platform
@@ -293,6 +295,21 @@ namespace embree
 }
 #endif
 
+////////////////////////////////////////////////////////////////////////////////
+/// WebAssembly Platform
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__EMSCRIPTEN__)
+namespace embree
+{
+  /*! set affinity of the calling thread */
+  void setAffinity(ssize_t affinity)
+  {
+      // Setting thread affinity is not supported in WASM.
+  }
+}
+#endif
+
 ////////////////////////////////////////////////////////////////////////////////
 /// MacOSX Platform
 ////////////////////////////////////////////////////////////////////////////////
@@ -379,9 +396,7 @@ namespace embree
     pthread_attr_destroy(&attr);
 
     /* set affinity */
-// -- GODOT start --
 #if defined(__LINUX__) && !defined(__ANDROID__)
-// -- GODOT end --
     if (threadID >= 0) {
       cpu_set_t cset;
       CPU_ZERO(&cset);
@@ -396,7 +411,6 @@ namespace embree
       CPU_SET(threadID, &cset);
       pthread_setaffinity_np(*tid, sizeof(cset), &cset);
     }
-// -- GODOT start --
 #elif defined(__ANDROID__)
     if (threadID >= 0) {
       cpu_set_t cset;
@@ -405,7 +419,6 @@ namespace embree
       sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
     }
 #endif
-// -- GODOT end --
 
     return thread_t(tid);
   }
@@ -424,14 +437,12 @@ namespace embree
 
   /*! destroy a hardware thread by its handle */
   void destroyThread(thread_t tid) {
-// -- GODOT start --
 #if defined(__ANDROID__)
-    FATAL("Can't destroy threads on Android.");
+    FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
 #else
     pthread_cancel(*(pthread_t*)tid);
     delete (pthread_t*)tid;
 #endif
-// -- GODOT end --
   }
 
   /*! creates thread local storage */

+ 8 - 6
thirdparty/embree/common/sys/vector.h

@@ -127,14 +127,15 @@ namespace embree
       {
         assert(!empty());
         size_active--;
-        alloc.destroy(&items[size_active]);
+        items[size_active].~T();
       }
 
       __forceinline void clear() 
       {
         /* destroy elements */
-        for (size_t i=0; i<size_active; i++)
-          alloc.destroy(&items[i]);
+        for (size_t i=0; i<size_active; i++){
+          items[i].~T();
+        }
         
         /* free memory */
         alloc.deallocate(items,size_alloced); 
@@ -178,8 +179,9 @@ namespace embree
         /* destroy elements */
         if (new_active < size_active) 
         {
-          for (size_t i=new_active; i<size_active; i++)
-            alloc.destroy(&items[i]);
+          for (size_t i=new_active; i<size_active; i++){
+            items[i].~T();
+          }
           size_active = new_active;
         }
 
@@ -195,7 +197,7 @@ namespace embree
         items = alloc.allocate(new_alloced);
         for (size_t i=0; i<size_active; i++) {
           ::new (&items[i]) T(std::move(old_items[i]));
-          alloc.destroy(&old_items[i]);
+          old_items[i].~T();
         }
 
         for (size_t i=size_active; i<new_active; i++) {

+ 1 - 1
thirdparty/embree/common/tasking/taskschedulerinternal.h

@@ -143,7 +143,7 @@ namespace embree
 	/* allocate new task on right side of stack */
         size_t oldStackPtr = stackPtr;
         TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure);
-        new (&tasks[right]) Task(func,thread.task,oldStackPtr,size);
+        new (&(tasks[right.load()])) Task(func,thread.task,oldStackPtr,size);
         right++;
 
 	/* also move left pointer */

+ 1 - 7
thirdparty/embree/common/tasking/taskschedulertbb.h

@@ -11,14 +11,8 @@
 #include "../sys/condition.h"
 #include "../sys/ref.h"
 
-#if defined(__WIN32__)
-// -- GODOT start --
-#if !defined(NOMINMAX)
-// -- GODOT end --
+#if defined(__WIN32__) && !defined(NOMINMAX)
 #  define NOMINMAX
-// -- GODOT start --
-#endif
-// -- GODOT end --
 #endif
 
 // We need to define these to avoid implicit linkage against

+ 1 - 3
thirdparty/embree/include/embree3/rtcore_common.h

@@ -19,9 +19,7 @@ typedef int ssize_t;
 #endif
 #endif
 
-// -- GODOT start --
-#if defined(_WIN32) && defined(_MSC_VER)
-// -- GODOT end --
+#if defined(_WIN32) && !defined(__MINGW32__)
 #  define RTC_ALIGN(...) __declspec(align(__VA_ARGS__))
 #else
 #  define RTC_ALIGN(...) __attribute__((aligned(__VA_ARGS__)))

+ 9 - 8
thirdparty/embree/include/embree3/rtcore_config.h

@@ -1,4 +1,3 @@
-
 // Copyright 2009-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
@@ -6,23 +5,25 @@
 
 #define RTC_VERSION_MAJOR 3
 #define RTC_VERSION_MINOR 13
-#define RTC_VERSION_PATCH 0
-#define RTC_VERSION 31300
-#define RTC_VERSION_STRING "3.13.0"
+#define RTC_VERSION_PATCH 5
+#define RTC_VERSION 31305
+#define RTC_VERSION_STRING "3.13.5"
 
 #define RTC_MAX_INSTANCE_LEVEL_COUNT 1
 
 #define EMBREE_MIN_WIDTH 0
 #define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
 
-#define EMBREE_STATIC_LIB
-/* #undef EMBREE_API_NAMESPACE */
+#if !defined(EMBREE_STATIC_LIB)
+#   define EMBREE_STATIC_LIB
+#endif
+/* #undef EMBREE_API_NAMESPACE*/
 
 #if defined(EMBREE_API_NAMESPACE)
 #  define RTC_NAMESPACE
-#  define RTC_NAMESPACE_BEGIN namespace  {
+#  define RTC_NAMESPACE_BEGIN namespace {
 #  define RTC_NAMESPACE_END }
-#  define RTC_NAMESPACE_USE using namespace ;
+#  define RTC_NAMESPACE_USE using namespace;
 #  define RTC_API_EXTERN_C
 #  undef EMBREE_API_NAMESPACE
 #else

+ 1 - 1
thirdparty/embree/include/embree3/rtcore_quaternion.h

@@ -8,7 +8,7 @@
 RTC_NAMESPACE_BEGIN
 
 /*
- * Structure for transformation respresentation as a matrix decomposition using
+ * Structure for transformation representation as a matrix decomposition using
  * a quaternion
  */
 struct RTC_ALIGN(16) RTCQuaternionDecomposition

+ 4 - 1
thirdparty/embree/include/embree3/rtcore_scene.h

@@ -47,9 +47,12 @@ RTC_API void rtcAttachGeometryByID(RTCScene scene, RTCGeometry geometry, unsigne
 /* Detaches the geometry from the scene. */
 RTC_API void rtcDetachGeometry(RTCScene scene, unsigned int geomID);
 
-/* Gets a geometry handle from the scene. */
+/* Gets a geometry handle from the scene. This function is not thread safe and should get used during rendering. */
 RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
 
+/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
+RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
+
 
 /* Commits the scene. */
 RTC_API void rtcCommitScene(RTCScene scene);

+ 1 - 1
thirdparty/embree/kernels/builders/bvh_builder_morton.h

@@ -411,7 +411,7 @@ namespace embree
           ReductionTy bounds[MAX_BRANCHING_FACTOR];
           if (current.size() > singleThreadThreshold)
           {
-            /*! parallel_for is faster than spawing sub-tasks */
+            /*! parallel_for is faster than spawning sub-tasks */
             parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
                 for (size_t i=r.begin(); i<r.end(); i++) {
                   bounds[i] = recurse(depth+1,children[i],nullptr,true);

+ 2 - 2
thirdparty/embree/kernels/builders/bvh_builder_msmblur.h

@@ -374,7 +374,7 @@ namespace embree
 
             const size_t begin = set.begin();
             const size_t end   = set.end();
-            const size_t center = (begin + end)/2;
+            const size_t center = (begin + end + 1) / 2;
 
             PrimInfoMB linfo = empty;
             for (size_t i=begin; i<center; i++)
@@ -594,7 +594,7 @@ namespace embree
             /* spawn tasks */
             if (unlikely(current.size() > cfg.singleThreadThreshold))
             {
-              /*! parallel_for is faster than spawing sub-tasks */
+              /*! parallel_for is faster than spawning sub-tasks */
               parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
                   for (size_t i=r.begin(); i<r.end(); i++) {
                     values[i] = recurse(children[i],nullptr,true);

+ 1 - 1
thirdparty/embree/kernels/builders/bvh_builder_sah.h

@@ -298,7 +298,7 @@ namespace embree
             /* spawn tasks */
             if (current.size() > cfg.singleThreadThreshold)
             {
-              /*! parallel_for is faster than spawing sub-tasks */
+              /*! parallel_for is faster than spawning sub-tasks */
               parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here
                   for (size_t i=r.begin(); i<r.end(); i++) {
                     values[i] = recurse(children[i],nullptr,true);

+ 2 - 4
thirdparty/embree/kernels/builders/heuristic_binning.h

@@ -57,14 +57,12 @@ namespace embree
         __forceinline Vec3ia bin(const Vec3fa& p) const 
         {
           const vint4 i = floori((vfloat4(p)-ofs)*scale);
-#if 1
           assert(i[0] >= 0 && (size_t)i[0] < num); 
           assert(i[1] >= 0 && (size_t)i[1] < num);
           assert(i[2] >= 0 && (size_t)i[2] < num);
-          return Vec3ia(i);
-#else
+          
+          // we clamp to handle corner cases that could calculate out of bounds bin
           return Vec3ia(clamp(i,vint4(0),vint4(num-1)));
-#endif
         }
 
         /*! faster but unsafe binning */

+ 1 - 1
thirdparty/embree/kernels/builders/heuristic_openmerge_array.h

@@ -275,7 +275,7 @@ namespace embree
               openNodesBasedOnExtend(set);
 #endif
 
-            /* disable opening when unsufficient space for opening a node available */
+            /* disable opening when insufficient space for opening a node available */
             if (set.ext_range_size() < max_open_size-1) 
               set.set_ext_range(set.end()); /* disable opening */
           }

+ 32 - 79
thirdparty/embree/kernels/builders/heuristic_spatial.h

@@ -159,27 +159,25 @@ namespace embree
         assert(binID < BINS);
         bounds  [binID][dim].extend(b);        
       }
-      
-      /*! bins an array of triangles */
-      template<typename SplitPrimitive>
-        __forceinline void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t N, const SpatialBinMapping<BINS>& mapping)
+
+      /*! bins an array of primitives */
+      template<typename PrimitiveSplitterFactory>
+        __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
       {
-        for (size_t i=0; i<N; i++)
+        for (size_t i=begin; i<end; i++)
         {
-          const PrimRef prim = prims[i];
+          const PrimRef& prim = source[i];
           unsigned splits = prim.geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);
-
-          if (unlikely(splits == 1))
+          
+          if (unlikely(splits <= 1))
           {
             const vint4 bin = mapping.bin(center(prim.bounds()));
             for (size_t dim=0; dim<3; dim++) 
             {
               assert(bin[dim] >= (int)0 && bin[dim] < (int)BINS);
-              numBegin[bin[dim]][dim]++;
-              numEnd  [bin[dim]][dim]++;
-              bounds  [bin[dim]][dim].extend(prim.bounds());
+              add(dim,bin[dim],bin[dim],bin[dim],prim.bounds());
             }
-          } 
+          }
           else
           {
             const vint4 bin0 = mapping.bin(prim.bounds().lower);
@@ -187,89 +185,44 @@ namespace embree
             
             for (size_t dim=0; dim<3; dim++) 
             {
+              if (unlikely(mapping.invalid(dim))) 
+                continue;
+              
               size_t bin;
-              PrimRef rest = prim;
               size_t l = bin0[dim];
               size_t r = bin1[dim];
-
+              
               // same bin optimization
               if (likely(l == r)) 
               {
-                numBegin[l][dim]++;
-                numEnd  [l][dim]++;
-                bounds  [l][dim].extend(prim.bounds());
+                add(dim,l,l,l,prim.bounds());
                 continue;
               }
-
-              for (bin=(size_t)bin0[dim]; bin<(size_t)bin1[dim]; bin++) 
+              size_t bin_start = bin0[dim];
+              size_t bin_end   = bin1[dim];
+              BBox3fa rest = prim.bounds();
+              
+              /* assure that split position always overlaps the primitive bounds */
+              while (bin_start < bin_end && mapping.pos(bin_start+1,dim) <= rest.lower[dim]) bin_start++;
+              while (bin_start < bin_end && mapping.pos(bin_end    ,dim) >= rest.upper[dim]) bin_end--;
+              
+              const auto splitter = splitterFactory(prim);
+              for (bin=bin_start; bin<bin_end; bin++) 
               {
                 const float pos = mapping.pos(bin+1,dim);
+                BBox3fa left,right;
+                splitter(rest,dim,pos,left,right);
                 
-                PrimRef left,right;
-                splitPrimitive(rest,(int)dim,pos,left,right);
-                if (unlikely(left.bounds().empty())) l++;                
-                bounds[bin][dim].extend(left.bounds());
+                if (unlikely(left.empty())) l++;                
+                extend(dim,bin,left);
                 rest = right;
               }
-              if (unlikely(rest.bounds().empty())) r--;
-              numBegin[l][dim]++;
-              numEnd  [r][dim]++;
-              bounds  [bin][dim].extend(rest.bounds());
+              if (unlikely(rest.empty())) r--;
+              add(dim,l,r,bin,rest);
             }
-          }
+          }              
         }
       }
-      
-      /*! bins a range of primitives inside an array */
-      template<typename SplitPrimitive>
-        void bin(const SplitPrimitive& splitPrimitive, const PrimRef* prims, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping) {
-	bin(splitPrimitive,prims+begin,end-begin,mapping);
-      }
-
-      /*! bins an array of primitives */
-      template<typename PrimitiveSplitterFactory>
-        __forceinline void bin2(const PrimitiveSplitterFactory& splitterFactory, const PrimRef* source, size_t begin, size_t end, const SpatialBinMapping<BINS>& mapping)
-      {
-        for (size_t i=begin; i<end; i++)
-        {
-          const PrimRef &prim = source[i];
-          const vint4 bin0 = mapping.bin(prim.bounds().lower);
-          const vint4 bin1 = mapping.bin(prim.bounds().upper);
-          
-          for (size_t dim=0; dim<3; dim++) 
-          {
-            if (unlikely(mapping.invalid(dim))) 
-              continue;
-            
-            size_t bin;
-            size_t l = bin0[dim];
-            size_t r = bin1[dim];
-            
-            // same bin optimization
-            if (likely(l == r)) 
-            {
-              add(dim,l,l,l,prim.bounds());
-              continue;
-            }
-            const size_t bin_start = bin0[dim];
-            const size_t bin_end   = bin1[dim];
-            BBox3fa rest = prim.bounds();
-            const auto splitter = splitterFactory(prim);
-            for (bin=bin_start; bin<bin_end; bin++) 
-            {
-              const float pos = mapping.pos(bin+1,dim);
-              BBox3fa left,right;
-              splitter(rest,dim,pos,left,right);
-              if (unlikely(left.empty())) l++;                
-              extend(dim,bin,left);
-              rest = right;
-            }
-            if (unlikely(rest.empty())) r--;
-            add(dim,l,r,bin,rest);
-          }
-        }              
-      }
-
 
 
       /*! bins an array of primitives */

+ 8 - 7
thirdparty/embree/kernels/builders/heuristic_spatial_array.h

@@ -241,7 +241,7 @@ namespace embree
           SpatialBinner binner(empty); 
           const SpatialBinMapping<SPATIAL_BINS> mapping(set);
           binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);
-          /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
+          /* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
           return binner.best(mapping,logBlockSize); //,set.ext_size());
         }
 
@@ -256,7 +256,7 @@ namespace embree
                                      binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);
                                      return binner; },
                                    [&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });
-          /* todo: best spatial split not exeeding the extended range does not provide any benefit ?*/
+          /* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/
           return binner.best(mapping,logBlockSize); //,set.ext_size());
         }
 
@@ -286,6 +286,7 @@ namespace embree
                 //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
                 //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
                 //if (unlikely(bin0 < split.pos && bin1 >= split.pos))
+
                 if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
                 {
                   assert(splits > 1);
@@ -384,8 +385,8 @@ namespace embree
           new (&lset) PrimInfoExtRange(begin,center,center,local_left);
           new (&rset) PrimInfoExtRange(center,end,end,local_right);
 
-          assert(area(lset.geomBounds) >= 0.0f);
-          assert(area(rset.geomBounds) >= 0.0f);
+          assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
+          assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
           return std::pair<size_t,size_t>(left_weight,right_weight);
         }
 
@@ -410,7 +411,7 @@ namespace embree
                                               begin,end,local_left,local_right,
                                               [&] (const PrimRef& ref) {
                                                 const Vec3fa c = ref.bounds().center();
-                                                return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); 
+                                                return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);
                                               },
                                               [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });
 
@@ -419,8 +420,8 @@ namespace embree
           
           new (&lset) PrimInfoExtRange(begin,center,center,local_left);
           new (&rset) PrimInfoExtRange(center,end,end,local_right);
-          assert(area(lset.geomBounds) >= 0.0f);
-          assert(area(rset.geomBounds) >= 0.0f);
+          assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);
+          assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);
           return std::pair<size_t,size_t>(left_weight,right_weight);
         }
 

+ 0 - 4
thirdparty/embree/kernels/builders/primrefgen.cpp

@@ -184,9 +184,7 @@ namespace embree
 
     // special variants for grid meshes
 
-// -- GODOT start --
 #if defined(EMBREE_GEOMETRY_GRID)
-// -- GODOT end --
     PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids)
     {
       PrimInfo pinfo(empty);
@@ -296,9 +294,7 @@ namespace embree
 
       return pinfo;
     }
-// -- GODOT start --
 #endif
-// -- GODOT end --
     
     // ====================================================================================================
     // ====================================================================================================

+ 9 - 4
thirdparty/embree/kernels/builders/primrefgen_presplit.h

@@ -266,7 +266,7 @@ namespace embree
       /* anything to split ? */
       if (center < numPrimitives)
       {
-        const size_t numPrimitivesToSplit = numPrimitives - center;
+        size_t numPrimitivesToSplit = numPrimitives - center;
         assert(presplitItem[center].priority >= 1.0f);
 
         /* sort presplit items in ascending order */
@@ -279,8 +279,8 @@ namespace embree
             });
           );
 
-        unsigned int *const primOffset0 = (unsigned int*)tmp_presplitItem;
-        unsigned int *const primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
+        unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem;
+        unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
 
         /* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
         const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
@@ -317,11 +317,16 @@ namespace embree
             sum += numSubPrims;
           }
           new_center++;
+
+          primOffset0 += new_center - center;
+          numPrimitivesToSplit -= new_center - center;
           center = new_center;
+          assert(numPrimitivesToSplit == (numPrimitives - center));
         }
 
         /* parallel prefix sum to compute offsets for storing sub-primitives */
         const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
+        assert(numPrimitives+offset <= alloc_numPrimitives);
 
         /* iterate over range, and split primitives into sub primitives and append them to prims array */		    
         parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
@@ -338,7 +343,7 @@ namespace embree
               unsigned int numSubPrims = 0;
               splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
               const size_t newID = numPrimitives + primOffset1[j-center];              
-              assert(newID+numSubPrims <= alloc_numPrimitives);
+              assert(newID+numSubPrims-1 <= alloc_numPrimitives);
               prims[primrefID] = subPrims[0];
               for (size_t i=1;i<numSubPrims;i++)
                 prims[newID+i-1] = subPrims[i];

+ 15 - 13
thirdparty/embree/kernels/builders/splitter.h

@@ -128,28 +128,30 @@ namespace embree
         const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;
         const QuadMesh* mesh = (const QuadMesh*) scene->get(prim.geomID() & mask );  
         QuadMesh::Quad quad = mesh->quad(prim.primID());
-        v[0] = mesh->vertex(quad.v[0]);
-        v[1] = mesh->vertex(quad.v[1]);
-        v[2] = mesh->vertex(quad.v[2]);
-        v[3] = mesh->vertex(quad.v[3]);
-        v[4] = mesh->vertex(quad.v[0]);
-        inv_length[0] = Vec3fa(1.0f) / (v[1]-v[0]);
-        inv_length[1] = Vec3fa(1.0f) / (v[2]-v[1]);
-        inv_length[2] = Vec3fa(1.0f) / (v[3]-v[2]);
-        inv_length[3] = Vec3fa(1.0f) / (v[0]-v[3]);
+        v[0] = mesh->vertex(quad.v[1]);
+        v[1] = mesh->vertex(quad.v[2]);
+        v[2] = mesh->vertex(quad.v[3]);
+        v[3] = mesh->vertex(quad.v[0]);
+        v[4] = mesh->vertex(quad.v[1]);
+        v[5] = mesh->vertex(quad.v[3]);
+        inv_length[0] = Vec3fa(1.0f) / (v[1] - v[0]);
+        inv_length[1] = Vec3fa(1.0f) / (v[2] - v[1]);
+        inv_length[2] = Vec3fa(1.0f) / (v[3] - v[2]);
+        inv_length[3] = Vec3fa(1.0f) / (v[4] - v[3]);
+        inv_length[4] = Vec3fa(1.0f) / (v[5] - v[4]);
       }
       
       __forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
-        splitPolygon<4>(prim,dim,pos,v,left_o,right_o);
+        splitPolygon<5>(prim,dim,pos,v,left_o,right_o);
       }
       
       __forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
-        splitPolygon<4>(prim,dim,pos,v,inv_length,left_o,right_o);
+        splitPolygon<5>(prim,dim,pos,v,inv_length,left_o,right_o);
       }
       
     private:
-      Vec3fa v[5];
-      Vec3fa inv_length[4];
+      Vec3fa v[6];
+      Vec3fa inv_length[5];
     };
     
     struct QuadSplitterFactory

+ 1 - 1
thirdparty/embree/kernels/bvh/bvh.cpp

@@ -183,7 +183,7 @@ namespace embree
   template class BVHN<8>;
 #endif
 
-#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)
+#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__)
   template class BVHN<4>;
 #endif
 }

+ 917 - 0
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp

@@ -0,0 +1,917 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "bvh_intersector_hybrid.h"
+#include "bvh_traverser1.h"
+#include "node_intersector1.h"
+#include "node_intersector_packet.h"
+
+#include "../geometry/intersector_iterators.h"
+#include "../geometry/triangle_intersector.h"
+#include "../geometry/trianglev_intersector.h"
+#include "../geometry/trianglev_mb_intersector.h"
+#include "../geometry/trianglei_intersector.h"
+#include "../geometry/quadv_intersector.h"
+#include "../geometry/quadi_intersector.h"
+#include "../geometry/curveNv_intersector.h"
+#include "../geometry/curveNi_intersector.h"
+#include "../geometry/curveNi_mb_intersector.h"
+#include "../geometry/linei_intersector.h"
+#include "../geometry/subdivpatch1_intersector.h"
+#include "../geometry/object_intersector.h"
+#include "../geometry/instance_intersector.h"
+#include "../geometry/subgrid_intersector.h"
+#include "../geometry/subgrid_mb_intersector.h"
+#include "../geometry/curve_intersector_virtual.h"
+
+#define SWITCH_DURING_DOWN_TRAVERSAL 1
+#define FORCE_SINGLE_MODE 0
+
+#define ENABLE_FAST_COHERENT_CODEPATHS 1
+
+namespace embree
+{
+  namespace isa
+  {
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
+                                                                                                const BVH* bvh,
+                                                                                                NodeRef root,
+                                                                                                size_t k,
+                                                                                                Precalculations& pre,
+                                                                                                RayHitK<K>& ray,
+                                                                                                const TravRayK<K, robust>& tray,
+                                                                                                IntersectContext* context)
+    {
+      /* stack state */
+      StackItemT<NodeRef> stack[stackSizeSingle];  // stack of nodes
+      StackItemT<NodeRef>* stackPtr = stack + 1;   // current stack pointer
+      StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
+      stack[0].ptr = root;
+      stack[0].dist = neg_inf;
+
+      /* load the ray into SIMD registers */
+      TravRay<N,robust> tray1;
+      tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
+
+      /* pop loop */
+      while (true) pop:
+      {
+        /* pop next node */
+        if (unlikely(stackPtr == stack)) break;
+        stackPtr--;
+        NodeRef cur = NodeRef(stackPtr->ptr);
+
+        /* if popped node is too far, pop next one */
+        if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
+          continue;
+
+        /* downtraversal loop */
+        while (true)
+        {
+          /* intersect node */
+          size_t mask; vfloat<N> tNear;
+          STAT3(normal.trav_nodes, 1, 1, 1);
+          bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
+          if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
+
+          /* if no child is hit, pop next node */
+          if (unlikely(mask == 0))
+            goto pop;
+
+          /* select next child and push other children */
+          BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
+        }
+
+        /* this is a leaf node */
+        assert(cur != BVH::emptyNode);
+        STAT3(normal.trav_leaves, 1, 1, 1);
+        size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
+
+        size_t lazy_node = 0;
+        PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
+
+        tray1.tfar = ray.tfar[k];
+
+        if (unlikely(lazy_node)) {
+          stackPtr->ptr = lazy_node;
+          stackPtr->dist = neg_inf;
+          stackPtr++;
+        }
+      }
+    }
+
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
+                                                                                               Accel::Intersectors* __restrict__ This,
+                                                                                               RayHitK<K>& __restrict__ ray,
+                                                                                               IntersectContext* __restrict__ context)
+    {
+      BVH* __restrict__ bvh = (BVH*)This->ptr;
+      
+      /* we may traverse an empty BVH in case all geometry was invalid */
+      if (bvh->root == BVH::emptyNode)
+        return;
+            
+#if ENABLE_FAST_COHERENT_CODEPATHS == 1
+      assert(context);
+      if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
+      {
+        intersectCoherent(valid_i, This, ray, context);
+        return;
+      }
+#endif
+
+      /* filter out invalid rays */
+      vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+      valid &= ray.valid();
+#endif
+
+      /* return if there are no valid rays */
+      size_t valid_bits = movemask(valid);
+
+#if defined(__AVX__)
+      STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
+#endif
+
+      if (unlikely(valid_bits == 0)) return;
+
+      /* verify correct input */
+      assert(all(valid, ray.valid()));
+      assert(all(valid, ray.tnear() >= 0.0f));
+      assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+      Precalculations pre(valid, ray);
+
+      /* load ray */
+      TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+      const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+      const vfloat<K> org_ray_tfar  = max(ray.tfar , 0.0f);
+
+      if (single)
+      {
+        tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
+        tray.tfar  = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
+        
+        for (; valid_bits!=0; ) {
+          const size_t i = bscf(valid_bits);
+          intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
+        }
+        return;
+      }
+
+       /* determine switch threshold based on flags */
+      const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
+
+      vint<K> octant = ray.octant();
+      octant = select(valid, octant, vint<K>(0xffffffff));
+
+      /* test whether we have ray with opposing direction signs in the packet */
+      bool split = false;
+      {
+        size_t bits = valid_bits;
+        vbool<K> vsplit( false );
+        do
+        {
+          const size_t valid_index = bsf(bits);
+          vbool<K> octant_valid = octant[valid_index] == octant;
+          bits &= ~(size_t)movemask(octant_valid);
+          vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
+        } while (bits);
+        if (any(vsplit)) split = true;
+      }
+
+      do
+      {
+        const size_t valid_index = bsf(valid_bits);
+        const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
+        const vint<K> count_diff_octant = \
+          ((diff_octant >> 2) & 1) +
+          ((diff_octant >> 1) & 1) +
+          ((diff_octant >> 0) & 1);
+
+        vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
+        if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down 
+
+
+        octant = select(octant_valid,vint<K>(0xffffffff),octant);
+        valid_bits &= ~(size_t)movemask(octant_valid);
+
+        tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+        tray.tfar  = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+        /* allocate stack and push root node */
+        vfloat<K> stack_near[stackSizeChunk];
+        NodeRef stack_node[stackSizeChunk];
+        stack_node[0] = BVH::invalidNode;
+        stack_near[0] = inf;
+        stack_node[1] = bvh->root;
+        stack_near[1] = tray.tnear;
+        NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
+        NodeRef* __restrict__ sptr_node = stack_node + 2;
+        vfloat<K>* __restrict__ sptr_near = stack_near + 2;
+
+        while (1) pop:
+        {
+          /* pop next node from stack */
+          assert(sptr_node > stack_node);
+          sptr_node--;
+          sptr_near--;
+          NodeRef cur = *sptr_node;
+          if (unlikely(cur == BVH::invalidNode)) {
+            assert(sptr_node == stack_node);
+            break;
+          }
+
+          /* cull node if behind closest hit point */
+          vfloat<K> curDist = *sptr_near;
+          const vbool<K> active = curDist < tray.tfar;
+          if (unlikely(none(active)))
+            continue;
+
+          /* switch to single ray traversal */
+#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
+#if FORCE_SINGLE_MODE == 0
+          if (single)
+#endif
+          {
+            size_t bits = movemask(active);
+#if FORCE_SINGLE_MODE == 0
+            if (unlikely(popcnt(bits) <= switchThreshold))
+#endif
+            {
+              for (; bits!=0; ) {
+                const size_t i = bscf(bits);
+                intersect1(This, bvh, cur, i, pre, ray, tray, context);
+              }
+              tray.tfar = min(tray.tfar, ray.tfar);
+              continue;
+            }
+          }
+#endif
+          while (likely(!cur.isLeaf()))
+          {
+            /* process nodes */
+            const vbool<K> valid_node = tray.tfar > curDist;
+            STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+            const NodeRef nodeRef = cur;
+            const BaseNode* __restrict__ const node = nodeRef.baseNode();
+
+            /* set cur to invalid */
+            cur = BVH::emptyNode;
+            curDist = pos_inf;
+
+            size_t num_child_hits = 0;
+
+            for (unsigned i = 0; i < N; i++)
+            {
+              const NodeRef child = node->children[i];
+              if (unlikely(child == BVH::emptyNode)) break;
+              vfloat<K> lnearP;
+              vbool<K> lhit = valid_node;
+              BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+              /* if we hit the child we choose to continue with that child if it
+                 is closer than the current next child, or we push it onto the stack */
+              if (likely(any(lhit)))
+              {                                
+                assert(sptr_node < stackEnd);
+                assert(child != BVH::emptyNode);
+                const vfloat<K> childDist = select(lhit, lnearP, inf);
+                /* push cur node onto stack and continue with hit child */
+                if (any(childDist < curDist))
+                {
+                  if (likely(cur != BVH::emptyNode)) {
+                    num_child_hits++;
+                    *sptr_node = cur; sptr_node++;
+                    *sptr_near = curDist; sptr_near++;
+                  }
+                  curDist = childDist;
+                  cur = child;
+                }
+
+                /* push hit child onto stack */
+                else {
+                  num_child_hits++;                  
+                  *sptr_node = child; sptr_node++;
+                  *sptr_near = childDist; sptr_near++;
+                }
+              }
+            }
+
+#if defined(__AVX__)
+            //STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
+#endif
+
+            if (unlikely(cur == BVH::emptyNode))
+              goto pop;
+            
+            /* improved distance sorting for 3 or more hits */
+            if (unlikely(num_child_hits >= 2))
+            {
+              if (any(sptr_near[-2] < sptr_near[-1]))
+              {
+                std::swap(sptr_near[-2],sptr_near[-1]);
+                std::swap(sptr_node[-2],sptr_node[-1]);
+              }
+              if (unlikely(num_child_hits >= 3))
+              {
+                if (any(sptr_near[-3] < sptr_near[-1]))
+                {
+                  std::swap(sptr_near[-3],sptr_near[-1]);
+                  std::swap(sptr_node[-3],sptr_node[-1]);
+                }
+                if (any(sptr_near[-3] < sptr_near[-2]))
+                {
+                  std::swap(sptr_near[-3],sptr_near[-2]);
+                  std::swap(sptr_node[-3],sptr_node[-2]);
+                }
+              }
+            }
+
+#if SWITCH_DURING_DOWN_TRAVERSAL == 1
+            if (single)
+            {
+              // seems to be the best place for testing utilization
+              if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
+              {
+                *sptr_node++ = cur;
+                *sptr_near++ = curDist;
+                goto pop;
+              }
+            }
+#endif
+          }
+
+          /* return if stack is empty */
+          if (unlikely(cur == BVH::invalidNode)) {
+            assert(sptr_node == stack_node);
+            break;
+          }
+
+          /* intersect leaf */
+          assert(cur != BVH::emptyNode);
+          const vbool<K> valid_leaf = tray.tfar > curDist;
+          STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
+          if (unlikely(none(valid_leaf))) continue;
+          size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+          size_t lazy_node = 0;
+          PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
+          tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
+
+          if (unlikely(lazy_node)) {
+            *sptr_node = lazy_node; sptr_node++;
+            *sptr_near = neg_inf;   sptr_near++;
+          }
+        }
+      } while(valid_bits);
+    }
+
+
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
+                                                                                                       Accel::Intersectors* __restrict__ This,
+                                                                                                       RayHitK<K>& __restrict__ ray,
+                                                                                                       IntersectContext* context)
+    {
+      BVH* __restrict__ bvh = (BVH*)This->ptr;
+      
+      /* filter out invalid rays */
+      vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+      valid &= ray.valid();
+#endif
+
+      /* return if there are no valid rays */
+      size_t valid_bits = movemask(valid);
+      if (unlikely(valid_bits == 0)) return;
+
+      /* verify correct input */
+      assert(all(valid, ray.valid()));
+      assert(all(valid, ray.tnear() >= 0.0f));
+      assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+      Precalculations pre(valid, ray);
+
+      /* load ray */
+      TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+      const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+      const vfloat<K> org_ray_tfar  = max(ray.tfar , 0.0f);
+
+      vint<K> octant = ray.octant();
+      octant = select(valid, octant, vint<K>(0xffffffff));
+
+      do
+      {
+        const size_t valid_index = bsf(valid_bits);
+        const vbool<K> octant_valid = octant[valid_index] == octant;
+        valid_bits &= ~(size_t)movemask(octant_valid);
+
+        tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+        tray.tfar  = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+        Frustum<robust> frustum;
+        frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
+
+        StackItemT<NodeRef> stack[stackSizeSingle];  // stack of nodes
+        StackItemT<NodeRef>* stackPtr = stack + 1;   // current stack pointer
+        stack[0].ptr  = bvh->root;
+        stack[0].dist = neg_inf;
+
+        while (1) pop:
+        {
+          /* pop next node from stack */
+          if (unlikely(stackPtr == stack)) break;
+
+          stackPtr--;
+          NodeRef cur = NodeRef(stackPtr->ptr);
+
+          /* cull node if behind closest hit point */
+          vfloat<K> curDist = *(float*)&stackPtr->dist;
+          const vbool<K> active = curDist < tray.tfar;
+          if (unlikely(none(active))) continue;
+
+          while (likely(!cur.isLeaf()))
+          {
+            /* process nodes */
+            //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+            const NodeRef nodeRef = cur;
+            const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
+
+            vfloat<N> fmin;
+            size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
+
+            if (unlikely(!m_frustum_node)) goto pop;
+            cur = BVH::emptyNode;
+            curDist = pos_inf;
+            
+#if defined(__AVX__)
+            //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
+#endif
+            size_t num_child_hits = 0;
+            do {
+              const size_t i = bscf(m_frustum_node);
+              vfloat<K> lnearP;
+              vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
+              STAT3(normal.trav_nodes, 1, 1, 1);
+              BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+              if (likely(any(lhit)))
+              {                                
+                const vfloat<K> childDist = fmin[i];
+                const NodeRef child = node->child(i);
+                BVHN<N>::prefetch(child);
+                if (any(childDist < curDist))
+                {
+                  if (likely(cur != BVH::emptyNode)) {
+                    num_child_hits++;
+                    stackPtr->ptr = cur;
+                    *(float*)&stackPtr->dist = toScalar(curDist);
+                    stackPtr++;
+                  }
+                  curDist = childDist;
+                  cur = child;
+                }
+                /* push hit child onto stack */
+                else {
+                  num_child_hits++;
+                  stackPtr->ptr = child;
+                  *(float*)&stackPtr->dist = toScalar(childDist);
+                  stackPtr++;
+                }                
+              }
+            } while(m_frustum_node);
+
+            if (unlikely(cur == BVH::emptyNode)) goto pop;
+
+            /* improved distance sorting for 3 or more hits */
+            if (unlikely(num_child_hits >= 2))
+            {
+              if (stackPtr[-2].dist < stackPtr[-1].dist)
+                std::swap(stackPtr[-2],stackPtr[-1]);
+              if (unlikely(num_child_hits >= 3))
+              {
+                if (stackPtr[-3].dist < stackPtr[-1].dist)
+                  std::swap(stackPtr[-3],stackPtr[-1]);
+                if (stackPtr[-3].dist < stackPtr[-2].dist)
+                  std::swap(stackPtr[-3],stackPtr[-2]);
+              }
+            }
+          }
+
+          /* intersect leaf */
+          assert(cur != BVH::invalidNode);
+          assert(cur != BVH::emptyNode);
+          const vbool<K> valid_leaf = tray.tfar > curDist;
+          STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
+          if (unlikely(none(valid_leaf))) continue;
+          size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+          size_t lazy_node = 0;
+          PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
+
+          /* reduce max distance interval on successful intersection */
+          if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
+          {
+            tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
+            frustum.template updateMaxDist<K>(tray.tfar);
+          }
+
+          if (unlikely(lazy_node)) {
+            stackPtr->ptr = lazy_node;
+            stackPtr->dist = neg_inf;
+            stackPtr++;
+          }
+        }
+        
+      } while(valid_bits);
+    }
+
+    // ===================================================================================================================================================================
+    // ===================================================================================================================================================================
+    // ===================================================================================================================================================================
+
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
+                                                                                               const BVH* bvh,
+                                                                                               NodeRef root,
+                                                                                               size_t k,
+                                                                                               Precalculations& pre,
+                                                                                               RayK<K>& ray,
+                                                                                               const TravRayK<K, robust>& tray,
+                                                                                               IntersectContext* context)
+      {
+        /* stack state */
+        NodeRef stack[stackSizeSingle];  // stack of nodes that still need to get traversed
+        NodeRef* stackPtr = stack+1;     // current stack pointer
+	NodeRef* stackEnd = stack+stackSizeSingle;
+        stack[0] = root;
+
+        /* load the ray into SIMD registers */
+        TravRay<N,robust> tray1;
+        tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
+
+	/* pop loop */
+	while (true) pop:
+	{
+          /* pop next node */
+	  if (unlikely(stackPtr == stack)) break;
+	  stackPtr--;
+          NodeRef cur = (NodeRef)*stackPtr;
+
+          /* downtraversal loop */
+          while (true)
+          {
+            /* intersect node */
+            size_t mask; vfloat<N> tNear;
+            STAT3(shadow.trav_nodes, 1, 1, 1);
+            bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
+            if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
+
+            /* if no child is hit, pop next node */
+            if (unlikely(mask == 0))
+              goto pop;
+
+            /* select next child and push other children */
+            BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
+          }
+
+          /* this is a leaf node */
+          assert(cur != BVH::emptyNode);
+          STAT3(shadow.trav_leaves, 1, 1, 1);
+          size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
+
+          size_t lazy_node = 0;
+          if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
+	    ray.tfar[k] = neg_inf;
+	    return true;
+	  }
+
+          if (unlikely(lazy_node)) {
+            *stackPtr = lazy_node;
+            stackPtr++;
+          }
+	}
+	return false;
+      }
+
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
+                                                                                              Accel::Intersectors* __restrict__ This,
+                                                                                              RayK<K>& __restrict__ ray,
+                                                                                              IntersectContext* context)
+    {
+      BVH* __restrict__ bvh = (BVH*)This->ptr;
+      
+      /* we may traverse an empty BVH in case all geometry was invalid */
+      if (bvh->root == BVH::emptyNode)
+        return;
+      
+#if ENABLE_FAST_COHERENT_CODEPATHS == 1
+      assert(context);
+      if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
+      {
+        occludedCoherent(valid_i, This, ray, context);
+        return;
+      }
+#endif
+
+      /* filter out already occluded and invalid rays */
+      vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+      valid &= ray.valid();
+#endif
+
+      /* return if there are no valid rays */
+      const size_t valid_bits = movemask(valid);
+      if (unlikely(valid_bits == 0)) return;
+
+      /* verify correct input */
+      assert(all(valid, ray.valid()));
+      assert(all(valid, ray.tnear() >= 0.0f));
+      assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+      Precalculations pre(valid, ray);
+
+      /* load ray */
+      TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+      const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+      const vfloat<K> org_ray_tfar  = max(ray.tfar , 0.0f);
+
+      tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
+      tray.tfar  = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
+
+      vbool<K> terminated = !valid;
+      const vfloat<K> inf = vfloat<K>(pos_inf);
+
+      /* determine switch threshold based on flags */
+      const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
+
+      /* allocate stack and push root node */
+      vfloat<K> stack_near[stackSizeChunk];
+      NodeRef stack_node[stackSizeChunk];
+      stack_node[0] = BVH::invalidNode;
+      stack_near[0] = inf;
+      stack_node[1] = bvh->root;
+      stack_near[1] = tray.tnear;
+      NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
+      NodeRef* __restrict__ sptr_node = stack_node + 2;
+      vfloat<K>* __restrict__ sptr_near = stack_near + 2;
+
+      while (1) pop:
+      {
+        /* pop next node from stack */
+        assert(sptr_node > stack_node);
+        sptr_node--;
+        sptr_near--;
+        NodeRef cur = *sptr_node;
+        if (unlikely(cur == BVH::invalidNode)) {
+          assert(sptr_node == stack_node);
+          break;
+        }
+
+        /* cull node if behind closest hit point */
+        vfloat<K> curDist = *sptr_near;
+        const vbool<K> active = curDist < tray.tfar;
+        if (unlikely(none(active)))
+          continue;
+
+        /* switch to single ray traversal */
+#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
+#if FORCE_SINGLE_MODE == 0
+        if (single)
+#endif
+        {
+          size_t bits = movemask(active);
+#if FORCE_SINGLE_MODE == 0
+          if (unlikely(popcnt(bits) <= switchThreshold)) 
+#endif
+          {
+            for (; bits!=0; ) {
+              const size_t i = bscf(bits);
+              if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
+                set(terminated, i);
+            }
+            if (all(terminated)) break;
+            tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
+            continue;
+          }
+        }
+#endif
+
+        while (likely(!cur.isLeaf()))
+        {
+          /* process nodes */
+          const vbool<K> valid_node = tray.tfar > curDist;
+          STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
+          const NodeRef nodeRef = cur;
+          const BaseNode* __restrict__ const node = nodeRef.baseNode();
+
+          /* set cur to invalid */
+          cur = BVH::emptyNode;
+          curDist = pos_inf;
+
+          for (unsigned i = 0; i < N; i++)
+          {
+            const NodeRef child = node->children[i];
+            if (unlikely(child == BVH::emptyNode)) break;
+            vfloat<K> lnearP;
+            vbool<K> lhit = valid_node;
+            BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+            /* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
+            if (likely(any(lhit)))
+            {
+              assert(sptr_node < stackEnd);
+              assert(child != BVH::emptyNode);
+              const vfloat<K> childDist = select(lhit, lnearP, inf);
+
+              /* push 'cur' node onto stack and continue with hit child */
+              if (likely(cur != BVH::emptyNode)) {
+                *sptr_node = cur; sptr_node++;
+                *sptr_near = curDist; sptr_near++;
+              }
+              curDist = childDist;
+              cur = child;
+            }
+          }
+          if (unlikely(cur == BVH::emptyNode))
+            goto pop;
+
+#if SWITCH_DURING_DOWN_TRAVERSAL == 1
+          if (single)
+          {
+            // seems to be the best place for testing utilization
+            if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
+            {
+              *sptr_node++ = cur;
+              *sptr_near++ = curDist;
+              goto pop;
+            }
+          }
+#endif
+	}
+
+        /* return if stack is empty */
+        if (unlikely(cur == BVH::invalidNode)) {
+          assert(sptr_node == stack_node);
+          break;
+        }
+
+
+        /* intersect leaf */
+        assert(cur != BVH::emptyNode);
+        const vbool<K> valid_leaf = tray.tfar > curDist;
+        STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
+        if (unlikely(none(valid_leaf))) continue;
+        size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
+
+        size_t lazy_node = 0;
+        terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
+        if (all(terminated)) break;
+        tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
+
+        if (unlikely(lazy_node)) {
+          *sptr_node = lazy_node; sptr_node++;
+          *sptr_near = neg_inf;   sptr_near++;
+        }
+      }
+
+      vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
+    }
+
+
+    template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
+    void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
+                                                                                                      Accel::Intersectors* __restrict__ This,
+                                                                                                      RayK<K>& __restrict__ ray,
+                                                                                                      IntersectContext* context)
+    {
+      BVH* __restrict__ bvh = (BVH*)This->ptr;
+      
+      /* filter out invalid rays */
+      vbool<K> valid = *valid_i == -1;
+#if defined(EMBREE_IGNORE_INVALID_RAYS)
+      valid &= ray.valid();
+#endif
+
+      /* return if there are no valid rays */
+      size_t valid_bits = movemask(valid);
+      if (unlikely(valid_bits == 0)) return;
+
+      /* verify correct input */
+      assert(all(valid, ray.valid()));
+      assert(all(valid, ray.tnear() >= 0.0f));
+      assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
+      Precalculations pre(valid,ray);
+
+      /* load ray */
+      TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
+      const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
+      const vfloat<K> org_ray_tfar  = max(ray.tfar , 0.0f);
+
+      vbool<K> terminated = !valid;
+
+      vint<K> octant = ray.octant();
+      octant = select(valid, octant, vint<K>(0xffffffff));
+
+      do
+      {
+        const size_t valid_index = bsf(valid_bits);
+        vbool<K> octant_valid = octant[valid_index] == octant;
+        valid_bits &= ~(size_t)movemask(octant_valid);
+
+        tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
+        tray.tfar  = select(octant_valid, org_ray_tfar,  vfloat<K>(neg_inf));
+
+        Frustum<robust> frustum;
+        frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
+
+        StackItemMaskT<NodeRef> stack[stackSizeSingle];  // stack of nodes
+        StackItemMaskT<NodeRef>* stackPtr = stack + 1;   // current stack pointer
+        stack[0].ptr  = bvh->root;
+        stack[0].mask = movemask(octant_valid);
+
+        while (1) pop:
+        {
+          /* pop next node from stack */
+          if (unlikely(stackPtr == stack)) break;
+
+          stackPtr--;
+          NodeRef cur = NodeRef(stackPtr->ptr);
+
+          /* cull node of active rays have already been terminated */
+          size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
+
+          if (unlikely(m_active == 0)) continue;
+
+          while (likely(!cur.isLeaf()))
+          {
+            /* process nodes */
+            //STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
+            const NodeRef nodeRef = cur;
+            const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
+
+            vfloat<N> fmin;
+            size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
+
+            if (unlikely(!m_frustum_node)) goto pop;
+            cur = BVH::emptyNode;
+            m_active = 0;
+
+#if defined(__AVX__)
+            //STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
+#endif
+            size_t num_child_hits = 0;
+            do {
+              const size_t i = bscf(m_frustum_node);
+              vfloat<K> lnearP;
+              vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
+              STAT3(normal.trav_nodes, 1, 1, 1);
+              BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
+
+              if (likely(any(lhit)))
+              {                                
+                const NodeRef child = node->child(i);
+                assert(child != BVH::emptyNode);
+                BVHN<N>::prefetch(child);
+                if (likely(cur != BVH::emptyNode)) {
+                  num_child_hits++;
+                  stackPtr->ptr  = cur;
+                  stackPtr->mask = m_active;
+                  stackPtr++;
+                }
+                cur = child;
+                m_active = movemask(lhit);
+              }
+            } while(m_frustum_node);
+
+            if (unlikely(cur == BVH::emptyNode)) goto pop;
+          }
+
+          /* intersect leaf */
+          assert(cur != BVH::invalidNode);
+          assert(cur != BVH::emptyNode);
+#if defined(__AVX__)
+          STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
+#endif
+          if (unlikely(!m_active)) continue;
+          size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
+
+          size_t lazy_node = 0;
+          terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
+          octant_valid &= !terminated;
+          if (unlikely(none(octant_valid))) break;
+          tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
+
+          if (unlikely(lazy_node)) {
+            stackPtr->ptr  = lazy_node;
+            stackPtr->mask = movemask(octant_valid);
+            stackPtr++;
+          }
+        }
+      } while(valid_bits);
+
+      vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
+    }
+  }
+}

+ 59 - 0
thirdparty/embree/kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp

@@ -0,0 +1,59 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "bvh_intersector_hybrid.cpp"
+
+namespace embree
+{
+  namespace isa
+  {
+    ////////////////////////////////////////////////////////////////////////////////
+    /// BVH4Intersector4 Definitions
+    ////////////////////////////////////////////////////////////////////////////////
+
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoeller,         BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller  <4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4Intersector4HybridMoellerNoFilter, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMIntersectorKMoeller  <4 COMMA 4 COMMA false> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridMoeller,        BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vIntersector4HybridPluecker,       BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true  COMMA ArrayIntersectorK_1<4 COMMA TriangleMvIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iIntersector4HybridPluecker,       BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true  COMMA ArrayIntersectorK_1<4 COMMA TriangleMiIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
+
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridMoeller,  BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridMoeller,  BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKMoeller <4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4vMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true  COMMA ArrayIntersectorK_1<4 COMMA TriangleMvMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
+    IF_ENABLED_TRIS(DEFINE_INTERSECTOR4(BVH4Triangle4iMBIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true  COMMA ArrayIntersectorK_1<4 COMMA TriangleMiMBIntersectorKPluecker<4 COMMA 4 COMMA true> > >));
+
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoeller,        BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridMoellerNoFilter,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKMoeller <4 COMMA 4 COMMA false> > >));
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridMoeller,        BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4vIntersector4HybridPluecker,       BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true  COMMA ArrayIntersectorK_1<4 COMMA QuadMvIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iIntersector4HybridPluecker,       BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true  COMMA ArrayIntersectorK_1<4 COMMA QuadMiIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
+
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKMoeller <4 COMMA 4 COMMA true > > >));
+    IF_ENABLED_QUADS(DEFINE_INTERSECTOR4(BVH4Quad4iMBIntersector4HybridPluecker,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true  COMMA ArrayIntersectorK_1<4 COMMA QuadMiMBIntersectorKPluecker<4 COMMA 4 COMMA true > > >));
+
+    IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA false COMMA VirtualCurveIntersectorK<4> >));
+    IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersector4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA false COMMA VirtualCurveIntersectorK<4> >));
+
+    IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4Hybrid, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersectorK<4> >));
+    IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR4(BVH4OBBVirtualCurveIntersectorRobust4HybridMB,BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersectorK<4> >));
+  
+    //IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
+    IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1Intersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubdivPatch1Intersector4>));
+    IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
+    //IF_ENABLED_SUBDIV(DEFINE_INTERSECTOR4(BVH4SubdivPatch1MBIntersector4, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA SubdivPatch1MBIntersector4>));
+
+    IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4> >));
+    IF_ENABLED_USER(DEFINE_INTERSECTOR4(BVH4VirtualMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA ObjectIntersector4MB> >));
+
+    IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorK<4>> >));
+    IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR4(BVH4InstanceMBIntersector4Chunk, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersectorK_1<4 COMMA InstanceIntersectorKMB<4>> >));
+
+    IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
+    //IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridMoeller, BVHNIntersectorKChunk<4 COMMA 4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersectorKMoeller <4 COMMA 4 COMMA true> >));
+    
+    IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridMBIntersector4HybridMoeller, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN2_AN4D COMMA true COMMA SubGridMBIntersectorKPluecker <4 COMMA 4 COMMA true> >));
+    IF_ENABLED_GRIDS(DEFINE_INTERSECTOR4(BVH4GridIntersector4HybridPluecker, BVHNIntersectorKHybrid<4 COMMA 4 COMMA BVH_AN1 COMMA true COMMA SubGridIntersectorKPluecker <4 COMMA 4 COMMA true> >));
+
+  }
+}
+

部分文件因为文件数量过多而无法显示