Browse Source

Merge pull request #86 from godlikepanos/android

Add Android support
Panagiotis Christopoulos Charitos 4 years ago
parent
commit
537f40e23c
100 changed files with 1973 additions and 738 deletions
  1. 1 1
      AnKi/Collision/CMakeLists.txt
  2. 39 3
      AnKi/Config.h.cmake
  3. 10 37
      AnKi/Core/App.cpp
  4. 0 7
      AnKi/Core/App.h
  5. 2 2
      AnKi/Core/CMakeLists.txt
  6. 5 1
      AnKi/Core/ConfigSet.cpp
  7. 62 0
      AnKi/Core/NativeWindowAndroid.cpp
  8. 2 0
      AnKi/Core/NativeWindowAndroid.h
  9. 5 1
      AnKi/Core/StagingGpuMemoryManager.h
  10. 10 0
      AnKi/Gr/Buffer.h
  11. 63 9
      AnKi/Gr/CMakeLists.txt
  12. 5 1
      AnKi/Gr/Common.h
  13. 3 1
      AnKi/Gr/ConfigDefs.h
  14. 2 0
      AnKi/Gr/Enums.h
  15. 5 2
      AnKi/Gr/GrObject.h
  16. 5 1
      AnKi/Gr/Utils/ClassGpuAllocator.h
  17. 5 1
      AnKi/Gr/Utils/FrameGpuAllocator.h
  18. 2 2
      AnKi/Gr/Utils/StackGpuAllocator.cpp
  19. 5 1
      AnKi/Gr/Utils/StackGpuAllocator.h
  20. 1 0
      AnKi/Gr/Vulkan/AccelerationStructureImpl.cpp
  21. 12 0
      AnKi/Gr/Vulkan/Buffer.cpp
  22. 44 28
      AnKi/Gr/Vulkan/BufferImpl.cpp
  23. 54 1
      AnKi/Gr/Vulkan/BufferImpl.h
  24. 32 10
      AnKi/Gr/Vulkan/CommandBufferFactory.cpp
  25. 16 11
      AnKi/Gr/Vulkan/CommandBufferFactory.h
  26. 4 3
      AnKi/Gr/Vulkan/CommandBufferImpl.cpp
  27. 2 2
      AnKi/Gr/Vulkan/CommandBufferImpl.inl.h
  28. 3 0
      AnKi/Gr/Vulkan/Common.cpp
  29. 25 12
      AnKi/Gr/Vulkan/Common.h
  30. 7 1
      AnKi/Gr/Vulkan/DeferredBarrierFactory.h
  31. 5 1
      AnKi/Gr/Vulkan/DescriptorSet.cpp
  32. 5 1
      AnKi/Gr/Vulkan/FenceFactory.h
  33. 5 1
      AnKi/Gr/Vulkan/GpuMemoryManager.h
  34. 232 91
      AnKi/Gr/Vulkan/GrManagerImpl.cpp
  35. 12 7
      AnKi/Gr/Vulkan/GrManagerImpl.h
  36. 25 0
      AnKi/Gr/Vulkan/GrManagerImplAndroid.cpp
  37. 62 41
      AnKi/Gr/Vulkan/Pipeline.cpp
  38. 105 129
      AnKi/Gr/Vulkan/Pipeline.h
  39. 3 3
      AnKi/Gr/Vulkan/PipelineCache.cpp
  40. 5 1
      AnKi/Gr/Vulkan/QueryFactory.h
  41. 5 1
      AnKi/Gr/Vulkan/SemaphoreFactory.h
  42. 3 2
      AnKi/Gr/Vulkan/ShaderImpl.cpp
  43. 39 17
      AnKi/Gr/Vulkan/SwapchainFactory.cpp
  44. 1 1
      AnKi/Gr/Vulkan/SwapchainFactory.h
  45. 34 2
      AnKi/Gr/Vulkan/TextureImpl.cpp
  46. 40 17
      AnKi/Gr/Vulkan/TextureImpl.h
  47. 2 2
      AnKi/Gr/Vulkan/TextureView.cpp
  48. 8 38
      AnKi/Gr/Vulkan/TextureViewImpl.cpp
  49. 4 4
      AnKi/Gr/Vulkan/TextureViewImpl.h
  50. 1 1
      AnKi/Importer/CMakeLists.txt
  51. 1 0
      AnKi/Importer/Common.h
  52. 230 32
      AnKi/Importer/ImageImporter.cpp
  53. 2 0
      AnKi/Importer/ImageImporter.h
  54. 3 1
      AnKi/Input/CMakeLists.txt
  55. 26 19
      AnKi/Input/InputAndroid.cpp
  56. 9 7
      AnKi/Input/InputDummy.cpp
  57. 1 1
      AnKi/Math/CMakeLists.txt
  58. 133 38
      AnKi/Math/Mat.h
  59. 178 6
      AnKi/Math/Vec.h
  60. 1 1
      AnKi/Physics/CMakeLists.txt
  61. 1 1
      AnKi/Physics/PhysicsCollisionShape.cpp
  62. 1 1
      AnKi/Renderer/CMakeLists.txt
  63. 2 2
      AnKi/Renderer/ConfigDefs.h
  64. 1 0
      AnKi/Renderer/DepthDownscale.h
  65. 1 1
      AnKi/Renderer/FinalComposite.cpp
  66. 1 1
      AnKi/Renderer/ProbeReflections.cpp
  67. 1 1
      AnKi/Renderer/RtShadows.cpp
  68. 4 1
      AnKi/Renderer/Scale.cpp
  69. 1 1
      AnKi/Renderer/Ssao.cpp
  70. 8 0
      AnKi/Renderer/Ssao.h
  71. 1 1
      AnKi/Renderer/Ssgi.cpp
  72. 1 1
      AnKi/Renderer/Ssr.cpp
  73. 7 1
      AnKi/Renderer/TileAllocator.h
  74. 1 1
      AnKi/Renderer/Tonemapping.cpp
  75. 1 1
      AnKi/Renderer/VolumetricLightingAccumulation.cpp
  76. 1 1
      AnKi/Resource/CMakeLists.txt
  77. 2 0
      AnKi/Resource/ConfigDefs.h
  78. 7 2
      AnKi/Resource/ImageBinary.h
  79. 5 2
      AnKi/Resource/ImageBinary.xml
  80. 60 16
      AnKi/Resource/ImageLoader.cpp
  81. 9 1
      AnKi/Resource/ImageLoader.h
  82. 27 1
      AnKi/Resource/ImageResource.cpp
  83. 12 2
      AnKi/Resource/MaterialResource.h
  84. 89 44
      AnKi/Resource/ResourceFilesystem.cpp
  85. 21 10
      AnKi/Resource/ResourceFilesystem.h
  86. 7 1
      AnKi/Resource/ShaderProgramResource.h
  87. 1 1
      AnKi/Resource/TransferGpuAllocator.cpp
  88. 4 0
      AnKi/Resource/TransferGpuAllocator.h
  89. 1 1
      AnKi/Scene/CMakeLists.txt
  90. 3 0
      AnKi/Scene/Components/GpuParticleEmitterComponent.cpp
  91. 2 0
      AnKi/Scene/DebugDrawer.cpp
  92. 12 2
      AnKi/Scene/Octree.h
  93. 1 1
      AnKi/Script/CMakeLists.txt
  94. 6 1
      AnKi/Script/LuaBinder.h
  95. 1 1
      AnKi/ShaderCompiler/CMakeLists.txt
  96. 7 4
      AnKi/ShaderCompiler/Glslang.cpp
  97. 5 1
      AnKi/ShaderCompiler/ShaderProgramCompiler.h
  98. 39 22
      AnKi/ShaderCompiler/ShaderProgramParser.cpp
  99. 5 1
      AnKi/ShaderCompiler/ShaderProgramParser.h
  100. 3 6
      AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

+ 1 - 1
AnKi/Collision/CMakeLists.txt

@@ -1,3 +1,3 @@
 file(GLOB SOURCES *.cpp)
 file(GLOB HEADERS *.h)
-addAnkiSourceFiles(${SOURCES} ${HEADERS})
+anki_add_source_files(${SOURCES} ${HEADERS})

+ 39 - 3
AnKi/Config.h.cmake

@@ -89,6 +89,9 @@
 #	error Unknown OS
 #endif
 
+// Mobile or not
+#define ANKI_PLATFORM_MOBILE (ANKI_OS_ANDROID || ANKI_OS_IOS)
+
 // POSIX system or not
 #if ANKI_OS_LINUX || ANKI_OS_ANDROID || ANKI_OS_MACOS || ANKI_OS_IOS
 #	define ANKI_POSIX 1
@@ -124,7 +127,7 @@
 #endif
 
 // SIMD
-#define ANKI_ENABLE_SIMD (${_ANKI_ENABLE_SIMD} && ANKI_CPU_ARCH_X86)
+#define ANKI_ENABLE_SIMD ${_ANKI_ENABLE_SIMD}
 
 #if !ANKI_ENABLE_SIMD
 #	define ANKI_SIMD_NONE 1
@@ -186,8 +189,8 @@
 #if ANKI_COMPILER_MSVC
 #	include <intrin.h>
 #	define __builtin_popcount __popcnt
-#	define __builtin_popcountl __popcnt64
-#	define __builtin_clzll(x) ((int)__lzcnt64(x))
+#	define __builtin_popcountl(x) int(__popcnt64(x))
+#	define __builtin_clzll(x) int(__lzcnt64(x))
 #endif
 
 // Constants
@@ -204,4 +207,37 @@
 #else
 #	define ANKI_INTERNAL [[deprecated("This is an AnKi internal interface. Don't use it")]]
 #endif
+
+// Define the main() function.
+#if ANKI_OS_ANDROID
+extern "C" {
+struct android_app;
+}
+
+namespace anki {
+extern android_app* g_androidApp;
+
+void* getAndroidCommandLineArguments(int& argc, char**& argv);
+void cleanupGetAndroidCommandLineArguments(void* ptr);
+}
+
+#	define ANKI_MAIN_FUNCTION(myMain) \
+	int myMain(int argc, char* argv[]); \
+	extern "C" void android_main(android_app* app) \
+	{ \
+		anki::g_androidApp = app; \
+		char** argv; \
+		int argc; \
+		void* cleanupToken = anki::getAndroidCommandLineArguments(argc, argv); \
+		myMain(argc, argv); \
+		anki::cleanupGetAndroidCommandLineArguments(cleanupToken); \
+	}
+#else
+#	define ANKI_MAIN_FUNCTION(myMain) \
+	int myMain(int argc, char* argv[]); \
+	int main(int argc, char* argv[]) \
+	{ \
+		return myMain(argc, argv); \
+	}
+#endif
 /// @}

+ 10 - 37
AnKi/Core/App.cpp

@@ -38,7 +38,7 @@ namespace anki
 
 #if ANKI_OS_ANDROID
 /// The one and only android hack
-android_app* gAndroidApp = nullptr;
+android_app* g_androidApp = nullptr;
 #endif
 
 class App::StatsUi
@@ -323,6 +323,8 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 {
 	setSignalHandlers();
 
+	Thread::setNameOfCurrentThread("anki_main");
+
 	ConfigSet config = config_;
 	m_displayStats = config.getNumberU32("core_displayStats");
 
@@ -504,12 +506,15 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 
 Error App::initDirs(const ConfigSet& cfg)
 {
-#if !ANKI_OS_ANDROID
 	// Settings path
+#if !ANKI_OS_ANDROID
 	StringAuto home(m_heapAlloc);
 	ANKI_CHECK(getHomeDirectory(home));
 
 	m_settingsDir.sprintf(m_heapAlloc, "%s/.anki", &home[0]);
+#else
+	m_settingsDir.sprintf(m_heapAlloc, "%s/.anki", g_androidApp->activity->internalDataPath);
+#endif
 
 	if(!directoryExists(m_settingsDir.toCString()))
 	{
@@ -537,28 +542,6 @@ Error App::initDirs(const ConfigSet& cfg)
 		ANKI_CHECK(createDirectory(m_cacheDir.toCString()));
 	}
 
-#else
-	// ANKI_ASSERT(gAndroidApp);
-	// ANativeActivity* activity = gAndroidApp->activity;
-
-	// Settings path
-	// settingsDir = String(activity->internalDataDir, alloc);
-	settingsDir = String("/sdcard/.anki/");
-	if(!directoryExists(settingsDir.c_str()))
-	{
-		createDirectory(settingsDir.c_str());
-	}
-
-	// Cache
-	cacheDir = settingsDir + "/cache";
-	if(directoryExists(cacheDir.c_str()))
-	{
-		removeDirectory(cacheDir.c_str());
-	}
-
-	createDirectory(cacheDir.c_str());
-#endif
-
 	return Error::NONE;
 }
 
@@ -750,20 +733,10 @@ void App::setSignalHandlers()
 		else
 			printf("Caught signal %d\n", signum);
 
-		class BW : public BackTraceWalker
-		{
-		public:
-			U32 m_c = 0;
-
-			void operator()(const char* symbol)
-			{
-				printf("%.2u: %s\n", m_c++, symbol);
-			}
-		};
-
-		BW bw;
+		U32 count = 0;
 		printf("Backtrace:\n");
-		getBacktrace(bw);
+		backtrace(HeapAllocator<U8>(allocAligned, nullptr),
+				  [&count](CString symbol) { printf("%.2u: %s\n", count++, symbol.cstr()); });
 
 		ANKI_DEBUG_BREAK();
 	};

+ 0 - 7
AnKi/Core/App.h

@@ -10,17 +10,10 @@
 #include <AnKi/Util/String.h>
 #include <AnKi/Util/Ptr.h>
 #include <AnKi/Ui/UiImmediateModeBuilder.h>
-#if ANKI_OS_ANDROID
-#	include <android_native_app_glue.h>
-#endif
 
 namespace anki
 {
 
-#if ANKI_OS_ANDROID
-extern android_app* gAndroidApp;
-#endif
-
 // Forward
 class CoreTracer;
 class ConfigSet;

+ 2 - 2
AnKi/Core/CMakeLists.txt

@@ -4,11 +4,11 @@ file(GLOB HEADERS *.h)
 if(SDL)
 	set(SOURCES ${SOURCES} NativeWindowSdl.cpp)
 elseif(ANDROID)
-set(SOURCES ${SOURCES} NativeWindowAndroid.cpp)
+	set(SOURCES ${SOURCES} NativeWindowAndroid.cpp)
 else()
 	message(FATAL_ERROR "Not implemented")
 endif()
 
 foreach(S ${SOURCES})
-	addAnkiSourceFiles("${CMAKE_CURRENT_SOURCE_DIR}/${S}" ${HEADERS})
+	anki_add_source_files("${CMAKE_CURRENT_SOURCE_DIR}/${S}" ${HEADERS})
 endforeach()

+ 5 - 1
AnKi/Core/ConfigSet.cpp

@@ -16,7 +16,7 @@
 namespace anki
 {
 
-class ConfigSet::Option : public NonCopyable
+class ConfigSet::Option
 {
 public:
 	enum Type
@@ -42,6 +42,8 @@ public:
 
 	Option() = default;
 
+	Option(const Option&) = delete; // Non-copyable
+
 	Option(Option&& b)
 		: m_name(std::move(b.m_name))
 		, m_helpMsg(std::move(b.m_helpMsg))
@@ -58,6 +60,8 @@ public:
 
 	~Option() = default;
 
+	Option& operator=(const Option&) = delete; // Non-copyable
+
 	Option& operator=(Option&& b) = delete;
 };
 

+ 62 - 0
AnKi/Core/NativeWindowAndroid.cpp

@@ -8,4 +8,66 @@
 namespace anki
 {
 
+Error NativeWindow::init(NativeWindowInitInfo& init, HeapAllocator<U8>& alloc)
+{
+	ANKI_CORE_LOGI("Initializing Android window");
+
+	m_alloc = alloc;
+	m_impl = m_alloc.newInstance<NativeWindowImpl>();
+
+	// Loop until the window is ready
+	while(g_androidApp->window == nullptr)
+	{
+		int ident;
+		int events;
+		android_poll_source* source;
+
+		const int timeoutMs = 5;
+		while((ident = ALooper_pollAll(timeoutMs, nullptr, &events, reinterpret_cast<void**>(&source))) >= 0)
+		{
+			if(source != nullptr)
+			{
+				source->process(g_androidApp, source);
+			}
+		}
+	}
+
+	m_impl->m_nativeWindow = g_androidApp->window;
+
+	// Set some stuff
+	m_width = ANativeWindow_getWidth(g_androidApp->window);
+	m_height = ANativeWindow_getHeight(g_androidApp->window);
+
+	return Error::NONE;
+}
+
+void NativeWindow::destroy()
+{
+	ANKI_CORE_LOGI("Destroying Android window");
+	ANativeActivity_finish(g_androidApp->activity);
+
+	// Loop until destroyRequested is set
+	while(!g_androidApp->destroyRequested)
+	{
+		int ident;
+		int events;
+		android_poll_source* source;
+
+		while((ident = ALooper_pollAll(0, nullptr, &events, reinterpret_cast<void**>(&source))) >= 0)
+		{
+			if(source != nullptr)
+			{
+				source->process(g_androidApp, source);
+			}
+		}
+	}
+
+	m_alloc.deleteInstance(m_impl);
+}
+
+void NativeWindow::setWindowTitle(CString title)
+{
+	// Nothing
+}
+
 } // end namespace anki

+ 2 - 0
AnKi/Core/NativeWindowAndroid.h

@@ -16,6 +16,8 @@ namespace anki
 /// Native window implementation for Android
 class NativeWindowImpl
 {
+public:
+	ANativeWindow* m_nativeWindow = nullptr;
 };
 
 } // end namespace anki

+ 5 - 1
AnKi/Core/StagingGpuMemoryManager.h

@@ -58,13 +58,17 @@ public:
 };
 
 /// Manages staging GPU memory.
-class StagingGpuMemoryManager : public NonCopyable
+class StagingGpuMemoryManager
 {
 public:
 	StagingGpuMemoryManager() = default;
 
+	StagingGpuMemoryManager(const StagingGpuMemoryManager&) = delete; // Non-copyable
+
 	~StagingGpuMemoryManager();
 
+	StagingGpuMemoryManager& operator=(const StagingGpuMemoryManager&) = delete; // Non-copyable
+
 	ANKI_USE_RESULT Error init(GrManager* gr, const ConfigSet& cfg);
 
 	void endFrame();

+ 10 - 0
AnKi/Gr/Buffer.h

@@ -76,6 +76,16 @@ public:
 	/// @param access The access to the buffer.
 	void* map(PtrSize offset, PtrSize range, BufferMapAccessBit access);
 
+	/// Flush the buffer from the CPU caches. Call it to make the buffer memory available to the GPU.
+	/// @param offset The starting offset.
+	/// @param range The range to map or MAX_PTR_SIZE to map until the end.
+	void flush(PtrSize offset, PtrSize range) const;
+
+	/// Invalidate the buffer from the CPU caches. Call it to ready the buffer to see GPU updates.
+	/// @param offset The starting offset.
+	/// @param range The range to map or MAX_PTR_SIZE to map until the end.
+	void invalidate(PtrSize offset, PtrSize range) const;
+
 	/// Convenience map method.
 	/// @param offset The starting offset.
 	/// @param elementCount The number of T element sto map.

+ 63 - 9
AnKi/Gr/CMakeLists.txt

@@ -1,12 +1,66 @@
-file(GLOB SOURCES *.cpp Utils/*.cpp)
+set(COMMON
+	"Common.cpp"
+	"GrObject.cpp"
+	"RenderGraph.cpp"
+	"ShaderProgram.cpp"
+	"Utils/ClassGpuAllocator.cpp"
+	"Utils/FrameGpuAllocator.cpp"
+	"Utils/Functions.cpp"
+	"Utils/StackGpuAllocator.cpp")
 
-if(GL)
-	set(GR_BACKEND "Gl")
-else()
-	set(GR_BACKEND "Vulkan")
-endif()
+foreach(S ${COMMON})
+	anki_add_source_files("${CMAKE_CURRENT_SOURCE_DIR}/${S}")
+endforeach()
+
+if(VULKAN)
+	set(VKCPP
+		"Vulkan/AccelerationStructure.cpp"
+		"Vulkan/AccelerationStructureImpl.cpp"
+		"Vulkan/Buffer.cpp"
+		"Vulkan/BufferImpl.cpp"
+		"Vulkan/CommandBuffer.cpp"
+		"Vulkan/CommandBufferFactory.cpp"
+		"Vulkan/CommandBufferImpl.cpp"
+		"Vulkan/Common.cpp"
+		"Vulkan/DescriptorSet.cpp"
+		"Vulkan/Fence.cpp"
+		"Vulkan/FenceFactory.cpp"
+		"Vulkan/Framebuffer.cpp"
+		"Vulkan/FramebufferImpl.cpp"
+		"Vulkan/GpuMemoryManager.cpp"
+		"Vulkan/GrManager.cpp"
+		"Vulkan/GrManagerImpl.cpp"
+		"Vulkan/OcclusionQuery.cpp"
+		"Vulkan/OcclusionQueryImpl.cpp"
+		"Vulkan/PipelineCache.cpp"
+		"Vulkan/Pipeline.cpp"
+		"Vulkan/PipelineLayout.cpp"
+		"Vulkan/QueryFactory.cpp"
+		"Vulkan/Sampler.cpp"
+		"Vulkan/SamplerFactory.cpp"
+		"Vulkan/SamplerImpl.cpp"
+		"Vulkan/Shader.cpp"
+		"Vulkan/ShaderImpl.cpp"
+		"Vulkan/ShaderProgram.cpp"
+		"Vulkan/ShaderProgramImpl.cpp"
+		"Vulkan/SwapchainFactory.cpp"
+		"Vulkan/Texture.cpp"
+		"Vulkan/TextureImpl.cpp"
+		"Vulkan/TextureView.cpp"
+		"Vulkan/TextureViewImpl.cpp"
+		"Vulkan/TimestampQuery.cpp"
+		"Vulkan/TimestampQueryImpl.cpp"
+		"Vulkan/VulkanObject.cpp")
 
-file(GLOB GR_BACKEND_SOURCES ${GR_BACKEND}/*.cpp)
+	if(SDL)
+		set(VKCPP ${VKCPP} "Vulkan/GrManagerImplSdl.cpp")
+	endif()
 
-addAnkiSourceFiles(${SOURCES})
-addAnkiSourceFiles(${GR_BACKEND_SOURCES})
+	if(ANDROID)
+	set(VKCPP ${VKCPP} "Vulkan/GrManagerImplAndroid.cpp")
+	endif()
+
+	foreach(S ${VKCPP})
+		anki_add_source_files("${CMAKE_CURRENT_SOURCE_DIR}/${S}")
+	endforeach()
+endif()

+ 5 - 1
AnKi/Gr/Common.h

@@ -153,9 +153,13 @@ public:
 
 	/// RT.
 	Bool m_rayTracingEnabled = false;
+
+	/// 64 bit atomics.
+	Bool m_64bitAtomics = false;
 };
 ANKI_END_PACKED_STRUCT
-static_assert(sizeof(GpuDeviceCapabilities) == sizeof(PtrSize) * 4 + sizeof(U32) * 5 + sizeof(U8) * 3 + sizeof(Bool),
+static_assert(sizeof(GpuDeviceCapabilities)
+				  == sizeof(PtrSize) * 4 + sizeof(U32) * 5 + sizeof(U8) * 3 + sizeof(Bool) * 2,
 			  "Should be packed");
 
 /// Bindless related info.

+ 3 - 1
AnKi/Gr/ConfigDefs.h

@@ -10,8 +10,10 @@ ANKI_CONFIG_OPTION(gr_vsync, 0, 0, 1)
 ANKI_CONFIG_OPTION(gr_maxBindlessTextures, 256, 8, 1024)
 ANKI_CONFIG_OPTION(gr_maxBindlessImages, 32, 8, 1024)
 ANKI_CONFIG_OPTION(gr_rayTracing, 0, 0, 1, "Try enabling ray tracing")
+ANKI_CONFIG_OPTION(gr_64bitAtomics, 1, 0, 1)
 
 // Vulkan
 ANKI_CONFIG_OPTION(gr_diskShaderCacheMaxSize, 128_MB, 1_MB, 1_GB)
-ANKI_CONFIG_OPTION(gr_vkminor, 2, 2, 2)
+ANKI_CONFIG_OPTION(gr_vkminor, 1, 1, 1)
 ANKI_CONFIG_OPTION(gr_vkmajor, 1, 1, 1)
+ANKI_CONFIG_OPTION(gr_asyncCompute, 1, 0, 1, "Enable or not async compute")

+ 2 - 0
AnKi/Gr/Enums.h

@@ -155,6 +155,7 @@ public:
 	U8 m_blockSize; ///< The size of the block of a compressed format. Zero otherwise.
 	U8 m_shaderType; ///< It's 0 if the shader sees it as float, 1 if uint and 2 if signed int.
 	DepthStencilAspectBit m_depthStencil; ///< Depth/stencil mask.
+	const char* m_name;
 };
 
 /// Get info for a specific Format.
@@ -174,6 +175,7 @@ inline FormatInfo getFormatInfo(Format fmt)
 		out.m_blockSize = blockSize; \
 		out.m_shaderType = shaderType; \
 		out.m_depthStencil = DepthStencilAspectBit::depthStencil; \
+		out.m_name = #type; \
 		break;
 #include <AnKi/Gr/FormatDefs.h>
 #undef ANKI_FORMAT_DEF

+ 5 - 2
AnKi/Gr/GrObject.h

@@ -7,7 +7,6 @@
 
 #include <AnKi/Gr/Common.h>
 #include <AnKi/Util/Atomic.h>
-#include <AnKi/Util/NonCopyable.h>
 
 namespace anki
 {
@@ -37,13 +36,17 @@ enum class GrObjectType : U8
 };
 
 /// Base of all graphics objects.
-class GrObject : public NonCopyable
+class GrObject
 {
 public:
 	GrObject(GrManager* manager, GrObjectType type, CString name);
 
+	GrObject(const GrObject&) = delete; // Non-copyable
+
 	virtual ~GrObject();
 
+	GrObject& operator=(const GrObject&) = delete; // Non-copyable
+
 	GrObjectType getType() const
 	{
 		return m_type;

+ 5 - 1
AnKi/Gr/Utils/ClassGpuAllocator.h

@@ -68,15 +68,19 @@ private:
 };
 
 /// Class based allocator.
-class ClassGpuAllocator : public NonCopyable
+class ClassGpuAllocator
 {
 public:
 	ClassGpuAllocator()
 	{
 	}
 
+	ClassGpuAllocator(const ClassGpuAllocator&) = delete; // Non-copyable
+
 	~ClassGpuAllocator();
 
+	ClassGpuAllocator& operator=(const ClassGpuAllocator&) = delete; // Non-copyable
+
 	void init(GenericMemoryPoolAllocator<U8> alloc, ClassGpuAllocatorInterface* iface);
 
 	/// Allocate memory.

+ 5 - 1
AnKi/Gr/Utils/FrameGpuAllocator.h

@@ -14,7 +14,7 @@ namespace anki
 /// @{
 
 /// Manages pre-allocated GPU memory for per frame usage.
-class FrameGpuAllocator : public NonCopyable
+class FrameGpuAllocator
 {
 	friend class DynamicMemorySerializeCommand;
 
@@ -23,10 +23,14 @@ public:
 	{
 	}
 
+	FrameGpuAllocator(const FrameGpuAllocator&) = delete; // Non-copyable
+
 	~FrameGpuAllocator()
 	{
 	}
 
+	FrameGpuAllocator& operator=(const FrameGpuAllocator&) = delete; // Non-copyable
+
 	/// Initialize with pre-allocated always mapped memory.
 	/// @param size The size of the GPU buffer.
 	/// @param alignment The working alignment.

+ 2 - 2
AnKi/Gr/Utils/StackGpuAllocator.cpp

@@ -44,8 +44,8 @@ void StackGpuAllocator::init(GenericMemoryPoolAllocator<U8> alloc, StackGpuAlloc
 
 	m_alignment = iface->getMaxAlignment();
 	ANKI_ASSERT(m_alignment > 0);
-	ANKI_ASSERT(m_initialSize >= m_alignment);
-	ANKI_ASSERT((m_initialSize % m_alignment) == 0);
+
+	alignRoundUp(m_alignment, m_initialSize);
 }
 
 Error StackGpuAllocator::allocate(PtrSize size, StackGpuAllocatorHandle& handle)

+ 5 - 1
AnKi/Gr/Utils/StackGpuAllocator.h

@@ -57,13 +57,17 @@ public:
 };
 
 /// Linear based allocator.
-class StackGpuAllocator : public NonCopyable
+class StackGpuAllocator
 {
 public:
 	StackGpuAllocator() = default;
 
+	StackGpuAllocator(const StackGpuAllocator&) = delete; // Non-copyable
+
 	~StackGpuAllocator();
 
+	StackGpuAllocator& operator=(const StackGpuAllocator&) = delete; // Non-copyable
+
 	void init(GenericMemoryPoolAllocator<U8> alloc, StackGpuAllocatorInterface* iface);
 
 	/// Allocate memory.

+ 1 - 0
AnKi/Gr/Vulkan/AccelerationStructureImpl.cpp

@@ -120,6 +120,7 @@ Error AccelerationStructureImpl::init(const AccelerationStructureInitInfo& inf)
 			m_topLevelInfo.m_blas.emplaceBack(getAllocator(), inf.m_topLevel.m_instances[i].m_bottomLevel);
 		}
 
+		m_topLevelInfo.m_instancesBuffer->flush(0, MAX_PTR_SIZE);
 		m_topLevelInfo.m_instancesBuffer->unmap();
 
 		// Geom

+ 12 - 0
AnKi/Gr/Vulkan/Buffer.cpp

@@ -34,4 +34,16 @@ void Buffer::unmap()
 	self.unmap();
 }
 
+void Buffer::flush(PtrSize offset, PtrSize range) const
+{
+	ANKI_VK_SELF_CONST(BufferImpl);
+	self.flush(offset, range);
+}
+
+void Buffer::invalidate(PtrSize offset, PtrSize range) const
+{
+	ANKI_VK_SELF_CONST(BufferImpl);
+	self.invalidate(offset, range);
+}
+
 } // end namespace anki

+ 44 - 28
AnKi/Gr/Vulkan/BufferImpl.cpp

@@ -22,12 +22,24 @@ BufferImpl::~BufferImpl()
 	{
 		getGrManagerImpl().getGpuMemoryManager().freeMemory(m_memHandle);
 	}
+
+#if ANKI_EXTRA_CHECKS
+	if(m_needsFlush && m_flushCount.load() == 0)
+	{
+		ANKI_VK_LOGW("Buffer needed flushing but you never flushed: %s", getName().cstr());
+	}
+
+	if(m_needsInvalidate && m_invalidateCount.load() == 0)
+	{
+		ANKI_VK_LOGW("Buffer needed invalidation but you never invalidated: %s", getName().cstr());
+	}
+#endif
 }
 
 Error BufferImpl::init(const BufferInitInfo& inf)
 {
 	ANKI_ASSERT(!isCreated());
-	const Bool exposeGpuAddress = !!(getGrManagerImpl().getExtensions() & VulkanExtensions::KHR_RAY_TRACING)
+	const Bool exposeGpuAddress = !!(getGrManagerImpl().getExtensions() & VulkanExtensions::KHR_BUFFER_DEVICE_ADDRESS)
 								  && !!(inf.m_usage & ~BufferUsageBit::ALL_TRANSFER);
 
 	PtrSize size = inf.m_size;
@@ -37,9 +49,14 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	ANKI_ASSERT(size > 0);
 	ANKI_ASSERT(usage != BufferUsageBit::NONE);
 
+	m_mappedMemoryRangeAlignment = getGrManagerImpl().getPhysicalDeviceProperties().limits.nonCoherentAtomSize;
+
 	// Align the size to satisfy fill buffer
 	alignRoundUp(4, size);
 
+	// Align to satisfy the flush and invalidate
+	alignRoundUp(m_mappedMemoryRangeAlignment, size);
+
 	// Create the buffer
 	VkBufferCreateInfo ci = {};
 	ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
@@ -47,11 +64,11 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	ci.usage = convertBufferUsageBit(usage);
 	if(exposeGpuAddress)
 	{
-		ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+		ci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR;
 	}
-	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
 	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
 	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
+	ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 	ANKI_VK_CHECK(vkCreateBuffer(getDevice(), &ci, nullptr, &m_handle));
 	getGrManagerImpl().trySetVulkanHandleName(inf.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, m_handle);
 
@@ -64,8 +81,9 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	{
 		// Only write, probably for uploads
 
-		VkMemoryPropertyFlags preferDeviceLocal;
-		VkMemoryPropertyFlags avoidDeviceLocal;
+		VkMemoryPropertyFlags preferDeviceLocal = 0;
+		VkMemoryPropertyFlags avoidDeviceLocal = 0;
+#if !ANKI_PLATFORM_MOBILE
 		if((usage & (~BufferUsageBit::ALL_TRANSFER)) != BufferUsageBit::NONE)
 		{
 			// Will be used for something other than transfer, try to put it in the device
@@ -78,6 +96,7 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 			preferDeviceLocal = 0;
 			avoidDeviceLocal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
 		}
+#endif
 
 		// Device & host & coherent but not cached
 		memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
@@ -88,19 +107,13 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 		// Fallback: host & coherent and not cached
 		if(memIdx == MAX_U32)
 		{
+#if !ANKI_PLATFORM_MOBILE
 			ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
+#endif
 			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
 				req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
 				VK_MEMORY_PROPERTY_HOST_CACHED_BIT | avoidDeviceLocal);
 		}
-
-		// Fallback: just host
-		if(memIdx == MAX_U32)
-		{
-			ANKI_VK_LOGW("Using a fallback mode for write-only buffer");
-			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
-																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
-		}
 	}
 	else if(!!(access & BufferMapAccessBit::READ))
 	{
@@ -116,18 +129,12 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 		// Fallback: Just cached
 		if(memIdx == MAX_U32)
 		{
+#if !ANKI_PLATFORM_MOBILE
 			ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
+#endif
 			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(
 				req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 0);
 		}
-
-		// Fallback: Just host
-		if(memIdx == MAX_U32)
-		{
-			ANKI_VK_LOGW("Using a fallback mode for read/write buffer");
-			memIdx = getGrManagerImpl().getGpuMemoryManager().findMemoryType(req.memoryTypeBits,
-																			 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
-		}
 	}
 	else
 	{
@@ -152,8 +159,19 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	const VkPhysicalDeviceMemoryProperties& props = getGrManagerImpl().getMemoryProperties();
 	m_memoryFlags = props.memoryTypes[memIdx].propertyFlags;
 
+	if(!!(access & BufferMapAccessBit::READ) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+	{
+		m_needsInvalidate = true;
+	}
+
+	if(!!(access & BufferMapAccessBit::WRITE) && !(m_memoryFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+	{
+		m_needsFlush = true;
+	}
+
 	// Allocate
-	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, U32(req.alignment), true, m_memHandle);
+	const U32 alignment = U32(max(m_mappedMemoryRangeAlignment, req.alignment));
+	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, alignment, true, m_memHandle);
 
 	// Bind mem to buffer
 	{
@@ -164,14 +182,14 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	// Get GPU buffer address
 	if(exposeGpuAddress)
 	{
-		VkBufferDeviceAddressInfo info = {};
-		info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
+		VkBufferDeviceAddressInfoKHR info = {};
+		info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR;
 		info.buffer = m_handle;
-		m_gpuAddress = vkGetBufferDeviceAddress(getDevice(), &info);
+		m_gpuAddress = vkGetBufferDeviceAddressKHR(getDevice(), &info);
 
 		if(m_gpuAddress == 0)
 		{
-			ANKI_VK_LOGE("vkGetBufferDeviceAddress() failed");
+			ANKI_VK_LOGE("vkGetBufferDeviceAddressKHR() failed");
 			return Error::FUNCTION_FAILED;
 		}
 	}
@@ -203,8 +221,6 @@ void* BufferImpl::map(PtrSize offset, PtrSize range, BufferMapAccessBit access)
 	m_mapped = true;
 #endif
 
-	// TODO Flush or invalidate caches
-
 	return static_cast<void*>(static_cast<U8*>(ptr) + offset);
 }
 

+ 54 - 1
AnKi/Gr/Vulkan/BufferImpl.h

@@ -21,6 +21,8 @@ class BufferImpl final : public Buffer, public VulkanObject<Buffer, BufferImpl>
 public:
 	BufferImpl(GrManager* manager, CString name)
 		: Buffer(manager, name)
+		, m_needsFlush(false)
+		, m_needsInvalidate(false)
 	{
 	}
 
@@ -38,7 +40,6 @@ public:
 #if ANKI_EXTRA_CHECKS
 		m_mapped = false;
 #endif
-		// TODO Flush or invalidate caches
 	}
 
 	VkBuffer getHandle() const
@@ -62,14 +63,45 @@ public:
 							VkAccessFlags& srcAccesses, VkPipelineStageFlags& dstStages,
 							VkAccessFlags& dstAccesses) const;
 
+	ANKI_FORCE_INLINE void flush(PtrSize offset, PtrSize range) const
+	{
+		ANKI_ASSERT(!!(m_access & BufferMapAccessBit::WRITE) && "No need to flush when the CPU doesn't write");
+		if(m_needsFlush)
+		{
+			VkMappedMemoryRange vkrange = setVkMappedMemoryRange(offset, range);
+			ANKI_VK_CHECKF(vkFlushMappedMemoryRanges(getDevice(), 1, &vkrange));
+#if ANKI_EXTRA_CHECKS
+			m_flushCount.fetchAdd(1);
+#endif
+		}
+	}
+
+	ANKI_FORCE_INLINE void invalidate(PtrSize offset, PtrSize range) const
+	{
+		ANKI_ASSERT(!!(m_access & BufferMapAccessBit::READ) && "No need to invalidate when the CPU doesn't read");
+		if(m_needsInvalidate)
+		{
+			VkMappedMemoryRange vkrange = setVkMappedMemoryRange(offset, range);
+			ANKI_VK_CHECKF(vkInvalidateMappedMemoryRanges(getDevice(), 1, &vkrange));
+#if ANKI_EXTRA_CHECKS
+			m_invalidateCount.fetchAdd(1);
+#endif
+		}
+	}
+
 private:
 	VkBuffer m_handle = VK_NULL_HANDLE;
 	GpuMemoryHandle m_memHandle;
 	VkMemoryPropertyFlags m_memoryFlags = 0;
 	PtrSize m_actualSize = 0;
+	PtrSize m_mappedMemoryRangeAlignment = 0; ///< Cache this value.
+	Bool m_needsFlush : 1;
+	Bool m_needsInvalidate : 1;
 
 #if ANKI_EXTRA_CHECKS
 	Bool m_mapped = false;
+	mutable Atomic<U32> m_flushCount = {0};
+	mutable Atomic<U32> m_invalidateCount = {0};
 #endif
 
 	Bool isCreated() const
@@ -79,6 +111,27 @@ private:
 
 	static VkPipelineStageFlags computePplineStage(BufferUsageBit usage);
 	static VkAccessFlags computeAccessMask(BufferUsageBit usage);
+
+	ANKI_FORCE_INLINE VkMappedMemoryRange setVkMappedMemoryRange(PtrSize offset, PtrSize range) const
+	{
+		// First the offset
+		ANKI_ASSERT(offset < m_size);
+		offset += m_memHandle.m_offset; // Move from buffer offset to memory offset
+		alignRoundDown(m_mappedMemoryRangeAlignment, offset);
+
+		// And the range
+		range = (range == MAX_PTR_SIZE) ? m_actualSize : range;
+		alignRoundUp(m_mappedMemoryRangeAlignment, range);
+		ANKI_ASSERT(offset + range <= m_memHandle.m_offset + m_actualSize);
+
+		VkMappedMemoryRange vkrange = {};
+		vkrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+		vkrange.memory = m_memHandle.m_memory;
+		vkrange.offset = offset;
+		vkrange.size = range;
+
+		return vkrange;
+	}
 };
 /// @}
 

+ 32 - 10
AnKi/Gr/Vulkan/CommandBufferFactory.cpp

@@ -9,14 +9,28 @@
 namespace anki
 {
 
+static VulkanQueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags,
+														  const VulkanQueueFamilies& queueFamilies)
+{
+	ANKI_ASSERT(!!(flags & CommandBufferFlag::GENERAL_WORK) ^ !!(flags & CommandBufferFlag::COMPUTE_WORK));
+	if(!(flags & CommandBufferFlag::GENERAL_WORK) && queueFamilies[VulkanQueueType::COMPUTE] != MAX_U32)
+	{
+		return VulkanQueueType::COMPUTE;
+	}
+	else
+	{
+		ANKI_ASSERT(queueFamilies[VulkanQueueType::GENERAL] != MAX_U32);
+		return VulkanQueueType::GENERAL;
+	}
+}
+
 void MicroCommandBuffer::destroy()
 {
 	reset();
 
 	if(m_handle)
 	{
-		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev,
-							 m_threadAlloc->m_pools[getQueueTypeFromCommandBufferFlags(m_flags)], 1, &m_handle);
+		vkFreeCommandBuffers(m_threadAlloc->m_factory->m_dev, m_threadAlloc->m_pools[m_queue], 1, &m_handle);
 		m_handle = {};
 	}
 }
@@ -40,8 +54,13 @@ void MicroCommandBuffer::reset()
 
 Error CommandBufferThreadAllocator::init()
 {
-	for(QueueType qtype : EnumIterable<QueueType>())
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 	{
+		if(m_factory->m_queueFamilies[qtype] == MAX_U32)
+		{
+			continue;
+		}
+
 		VkCommandPoolCreateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
 		ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
@@ -72,7 +91,7 @@ void CommandBufferThreadAllocator::destroyLists()
 	{
 		for(U j = 0; j < 2; ++j)
 		{
-			for(QueueType qtype : EnumIterable<QueueType>())
+			for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 			{
 				CmdbType& type = m_types[i][j][qtype];
 
@@ -86,12 +105,12 @@ void CommandBufferThreadAllocator::destroyLists()
 
 void CommandBufferThreadAllocator::destroy()
 {
-	for(VkCommandPool pool : m_pools)
+	for(VkCommandPool& pool : m_pools)
 	{
 		if(pool)
 		{
 			vkDestroyCommandPool(m_factory->m_dev, pool, nullptr);
-			pool = {};
+			pool = VK_NULL_HANDLE;
 		}
 	}
 
@@ -106,7 +125,9 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 	const Bool secondLevel = !!(cmdbFlags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(cmdbFlags & CommandBufferFlag::SMALL_BATCH);
-	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(cmdbFlags)];
+	const VulkanQueueType queue = getQueueTypeFromCommandBufferFlags(cmdbFlags, m_factory->m_queueFamilies);
+
+	CmdbType& type = m_types[secondLevel][smallBatch][queue];
 
 	// Move the deleted to (possibly) in-use or ready
 	{
@@ -187,7 +208,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 		VkCommandBufferAllocateInfo ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-		ci.commandPool = m_pools[getQueueTypeFromCommandBufferFlags(cmdbFlags)];
+		ci.commandPool = m_pools[queue];
 		ci.level = (secondLevel) ? VK_COMMAND_BUFFER_LEVEL_SECONDARY : VK_COMMAND_BUFFER_LEVEL_PRIMARY;
 		ci.commandBufferCount = 1;
 
@@ -207,6 +228,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 
 		newCmdb->m_handle = cmdb;
 		newCmdb->m_flags = cmdbFlags;
+		newCmdb->m_queue = queue;
 
 		out = newCmdb;
 
@@ -230,13 +252,13 @@ void CommandBufferThreadAllocator::deleteCommandBuffer(MicroCommandBuffer* ptr)
 	const Bool secondLevel = !!(ptr->m_flags & CommandBufferFlag::SECOND_LEVEL);
 	const Bool smallBatch = !!(ptr->m_flags & CommandBufferFlag::SMALL_BATCH);
 
-	CmdbType& type = m_types[secondLevel][smallBatch][getQueueTypeFromCommandBufferFlags(ptr->m_flags)];
+	CmdbType& type = m_types[secondLevel][smallBatch][ptr->m_queue];
 
 	LockGuard<Mutex> lock(type.m_deletedMtx);
 	type.m_deletedCmdbs.pushBack(ptr);
 }
 
-Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies)
+Error CommandBufferFactory::init(GrAllocator<U8> alloc, VkDevice dev, const VulkanQueueFamilies& queueFamilies)
 {
 	ANKI_ASSERT(dev);
 

+ 16 - 11
AnKi/Gr/Vulkan/CommandBufferFactory.h

@@ -19,12 +19,6 @@ class CommandBufferThreadAllocator;
 /// @addtogroup vulkan
 /// @{
 
-inline QueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags)
-{
-	ANKI_ASSERT(!!(flags & CommandBufferFlag::GENERAL_WORK) ^ !!(flags & CommandBufferFlag::COMPUTE_WORK));
-	return !!(flags & CommandBufferFlag::GENERAL_WORK) ? QueueType::GENERAL : QueueType::COMPUTE;
-}
-
 class MicroCommandBuffer : public IntrusiveListEnabled<MicroCommandBuffer>
 {
 	friend class CommandBufferThreadAllocator;
@@ -73,6 +67,12 @@ public:
 		return m_flags;
 	}
 
+	VulkanQueueType getVulkanQueueType() const
+	{
+		ANKI_ASSERT(m_queue != VulkanQueueType::COUNT);
+		return m_queue;
+	}
+
 private:
 	static constexpr U32 MAX_REF_OBJECT_SEARCH = 16;
 
@@ -87,6 +87,7 @@ private:
 	CommandBufferThreadAllocator* m_threadAlloc;
 	Atomic<I32> m_refcount = {0};
 	CommandBufferFlag m_flags = CommandBufferFlag::NONE;
+	VulkanQueueType m_queue = VulkanQueueType::COUNT;
 
 	void destroy();
 	void reset();
@@ -161,7 +162,7 @@ public:
 private:
 	CommandBufferFactory* m_factory;
 	ThreadId m_tid;
-	Array<VkCommandPool, U(QueueType::COUNT)> m_pools = {};
+	Array<VkCommandPool, U(VulkanQueueType::COUNT)> m_pools = {};
 
 	class CmdbType
 	{
@@ -177,14 +178,14 @@ private:
 	Atomic<U32> m_createdCmdbs = {0};
 #endif
 
-	Array3d<CmdbType, 2, 2, U(QueueType::COUNT)> m_types;
+	Array3d<CmdbType, 2, 2, U(VulkanQueueType::COUNT)> m_types;
 
 	void destroyList(IntrusiveList<MicroCommandBuffer>& list);
 	void destroyLists();
 };
 
 /// Command bufffer object recycler.
-class CommandBufferFactory : public NonCopyable
+class CommandBufferFactory
 {
 	friend class CommandBufferThreadAllocator;
 	friend class MicroCommandBuffer;
@@ -192,9 +193,13 @@ class CommandBufferFactory : public NonCopyable
 public:
 	CommandBufferFactory() = default;
 
+	CommandBufferFactory(const CommandBufferFactory&) = delete; // Non-copyable
+
 	~CommandBufferFactory() = default;
 
-	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, Array<U32, U(QueueType::COUNT)> queueFamilies);
+	CommandBufferFactory& operator=(const CommandBufferFactory&) = delete; // Non-copyable
+
+	ANKI_USE_RESULT Error init(GrAllocator<U8> alloc, VkDevice dev, const VulkanQueueFamilies& queueFamilies);
 
 	void destroy();
 
@@ -210,7 +215,7 @@ public:
 private:
 	GrAllocator<U8> m_alloc;
 	VkDevice m_dev = VK_NULL_HANDLE;
-	Array<U32, U(QueueType::COUNT)> m_queueFamilies;
+	VulkanQueueFamilies m_queueFamilies;
 
 	DynamicArray<CommandBufferThreadAllocator*> m_threadAllocs;
 	RWMutex m_threadAllocMtx;

+ 4 - 3
AnKi/Gr/Vulkan/CommandBufferImpl.cpp

@@ -52,7 +52,8 @@ Error CommandBufferImpl::init(const CommandBufferInitInfo& init)
 		m_activeFb = init.m_framebuffer;
 		m_colorAttachmentUsages = init.m_colorAttachmentUsages;
 		m_depthStencilAttachmentUsage = init.m_depthStencilAttachmentUsage;
-		m_state.beginRenderPass(m_activeFb);
+		m_state.beginRenderPass(static_cast<FramebufferImpl*>(m_activeFb.get()));
+		m_microCmdb->pushObjectRef(m_activeFb);
 	}
 
 	for(DescriptorSetState& state : m_dsetState)
@@ -148,10 +149,10 @@ void CommandBufferImpl::beginRenderPass(FramebufferPtr fb,
 
 void CommandBufferImpl::beginRenderPassInternal()
 {
-	m_state.beginRenderPass(m_activeFb);
-
 	FramebufferImpl& impl = static_cast<FramebufferImpl&>(*m_activeFb);
 
+	m_state.beginRenderPass(&impl);
+
 	VkRenderPassBeginInfo bi = {};
 	bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
 	bi.clearValueCount = impl.getAttachmentCount();

+ 2 - 2
AnKi/Gr/Vulkan/CommandBufferImpl.inl.h

@@ -631,7 +631,7 @@ inline void CommandBufferImpl::drawcallCommon()
 	// Get or create ppline
 	Pipeline ppline;
 	Bool stateDirty;
-	m_graphicsProg->getPipelineFactory().newPipeline(m_state, ppline, stateDirty);
+	m_graphicsProg->getPipelineFactory().getOrCreatePipeline(m_state, ppline, stateDirty);
 
 	if(stateDirty)
 	{
@@ -840,7 +840,7 @@ inline void CommandBufferImpl::bindShaderProgram(ShaderProgramPtr& prog)
 		m_graphicsProg = &impl;
 		m_computeProg = nullptr; // Unbind the compute prog. Doesn't work like vulkan
 		m_rtProg = nullptr; // See above
-		m_state.bindShaderProgram(prog);
+		m_state.bindShaderProgram(&impl);
 	}
 	else if(!!(impl.getStages() & ShaderTypeBit::COMPUTE))
 	{

+ 3 - 0
AnKi/Gr/Vulkan/Common.cpp

@@ -5,6 +5,9 @@
 
 #include <AnKi/Gr/Vulkan/Common.h>
 
+#define VOLK_IMPLEMENTATION
+#include <Volk/volk.h>
+
 namespace anki
 {
 

+ 25 - 12
AnKi/Gr/Vulkan/Common.h

@@ -12,8 +12,10 @@
 #	define VK_USE_PLATFORM_XLIB_KHR 1
 #elif ANKI_OS_WINDOWS
 #	define VK_USE_PLATFORM_WIN32_KHR 1
+#elif ANKI_OS_ANDROID
+#	define VK_USE_PLATFORM_ANDROID_KHR 1
 #else
-#	error TODO
+#	error Not implemented
 #endif
 #include <Volk/volk.h>
 
@@ -55,24 +57,33 @@ enum class DescriptorType : U8
 	COUNT
 };
 
-enum class VulkanExtensions : U16
+enum class VulkanExtensions : U32
 {
 	NONE = 0,
 	KHR_XCB_SURFACE = 1 << 1,
 	KHR_XLIB_SURFACE = 1 << 2,
 	KHR_WIN32_SURFACE = 1 << 3,
-	KHR_SWAPCHAIN = 1 << 4,
-	KHR_SURFACE = 1 << 5,
-	EXT_DEBUG_MARKER = 1 << 6,
-	EXT_DEBUG_REPORT = 1 << 9,
-	AMD_SHADER_INFO = 1 << 10,
-	AMD_RASTERIZATION_ORDER = 1 << 11,
-	KHR_RAY_TRACING = 1 << 12,
-	PIPELINE_EXECUTABLE_PROPERTIES = 1 << 13,
+	KHR_ANDROID_SURFACE = 1 << 4,
+	KHR_SWAPCHAIN = 1 << 5,
+	KHR_SURFACE = 1 << 6,
+	EXT_DEBUG_MARKER = 1 << 7,
+	EXT_DEBUG_REPORT = 1 << 8,
+	AMD_SHADER_INFO = 1 << 9,
+	AMD_RASTERIZATION_ORDER = 1 << 10,
+	KHR_RAY_TRACING = 1 << 11,
+	KHR_PIPELINE_EXECUTABLE_PROPERTIES = 1 << 12,
+	EXT_DESCRIPTOR_INDEXING = 1 << 13,
+	KHR_BUFFER_DEVICE_ADDRESS = 1 << 14,
+	EXT_SCALAR_BLOCK_LAYOUT = 1 << 15,
+	KHR_TIMELINE_SEMAPHORE = 1 << 16,
+	KHR_SHADER_FLOAT16_INT8 = 1 << 17,
+	KHR_SHADER_ATOMIC_INT64 = 1 << 18,
+	KHR_SPIRV_1_4 = 1 << 19,
+	KHR_SHADER_FLOAT_CONTROLS = 1 << 20,
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VulkanExtensions)
 
-enum class QueueType : U8
+enum class VulkanQueueType : U8
 {
 	GENERAL,
 	COMPUTE,
@@ -80,7 +91,9 @@ enum class QueueType : U8
 	COUNT,
 	FIRST = 0
 };
-ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(QueueType)
+ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VulkanQueueType)
+
+using VulkanQueueFamilies = Array<U32, U32(VulkanQueueType::COUNT)>;
 
 /// @name Constants
 /// @{

+ 7 - 1
AnKi/Gr/Vulkan/DeferredBarrierFactory.h

@@ -71,12 +71,18 @@ public:
 using MicroDeferredBarrierPtr = IntrusivePtr<MicroDeferredBarrier, MicroDeferredBarrierPtrDeleter>;
 
 /// MicroDeferredBarrier factory.
-class DeferredBarrierFactory : public NonCopyable
+class DeferredBarrierFactory
 {
 	friend class MicroDeferredBarrierPtrDeleter;
 	friend class MicroDeferredBarrier;
 
 public:
+	DeferredBarrierFactory() = default;
+
+	DeferredBarrierFactory(const DeferredBarrierFactory&) = delete; // Non-copyable
+
+	DeferredBarrierFactory& operator=(const DeferredBarrierFactory&) = delete; // Non-copyable
+
 	void init(GrAllocator<U8> alloc, VkDevice dev)
 	{
 		ANKI_ASSERT(dev);

+ 5 - 1
AnKi/Gr/Vulkan/DescriptorSet.cpp

@@ -283,9 +283,13 @@ public:
 };
 
 /// Per thread allocator.
-class alignas(ANKI_CACHE_LINE_SIZE) DSThreadAllocator : public NonCopyable
+class alignas(ANKI_CACHE_LINE_SIZE) DSThreadAllocator
 {
 public:
+	DSThreadAllocator(const DSThreadAllocator&) = delete; // Non-copyable
+
+	DSThreadAllocator& operator=(const DSThreadAllocator&) = delete; // Non-copyable
+
 	const DSLayoutCacheEntry* m_layoutEntry; ///< Know your father.
 
 	ThreadId m_tid;

+ 5 - 1
AnKi/Gr/Vulkan/FenceFactory.h

@@ -17,7 +17,7 @@ class FenceFactory;
 /// @{
 
 /// Fence wrapper over VkFence.
-class MicroFence : public NonCopyable
+class MicroFence
 {
 	friend class FenceFactory;
 	friend class MicroFencePtrDeleter;
@@ -25,8 +25,12 @@ class MicroFence : public NonCopyable
 public:
 	MicroFence(FenceFactory* f);
 
+	MicroFence(const MicroFence&) = delete; // Non-copyable
+
 	~MicroFence();
 
+	MicroFence& operator=(const MicroFence&) = delete; // Non-copyable
+
 	const VkFence& getHandle() const
 	{
 		ANKI_ASSERT(m_handle);

+ 5 - 1
AnKi/Gr/Vulkan/GpuMemoryManager.h

@@ -35,13 +35,17 @@ private:
 };
 
 /// Dynamic GPU memory allocator for all types.
-class GpuMemoryManager : public NonCopyable
+class GpuMemoryManager
 {
 public:
 	GpuMemoryManager() = default;
 
+	GpuMemoryManager(const GpuMemoryManager&) = delete; // Non-copyable
+
 	~GpuMemoryManager();
 
+	GpuMemoryManager& operator=(const GpuMemoryManager&) = delete; // Non-copyable
+
 	void init(VkPhysicalDevice pdev, VkDevice dev, GrAllocator<U8> alloc, Bool exposeBufferGpuAddress);
 
 	void destroy();

+ 232 - 91
AnKi/Gr/Vulkan/GrManagerImpl.cpp

@@ -98,7 +98,7 @@ GrManagerImpl::~GrManagerImpl()
 
 Error GrManagerImpl::init(const GrManagerInitInfo& init)
 {
-	Error err = initInternal(init);
+	const Error err = initInternal(init);
 	if(err)
 	{
 		ANKI_VK_LOGE("Vulkan initialization failed");
@@ -115,9 +115,16 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	ANKI_CHECK(initSurface(init));
 	ANKI_CHECK(initDevice(init));
 
-	for(QueueType qtype : EnumIterable<QueueType>())
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
 	{
-		vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
+		if(m_queueFamilyIndices[qtype] != MAX_U32)
+		{
+			vkGetDeviceQueue(m_device, m_queueFamilyIndices[qtype], 0, &m_queues[qtype]);
+		}
+		else
+		{
+			m_queues[qtype] = VK_NULL_HANDLE;
+		}
 	}
 
 	m_swapchainFactory.init(this, init.m_config->getBool("gr_vsync"));
@@ -146,7 +153,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	// Set m_r8g8b8ImagesSupported
 	{
 		VkImageFormatProperties props = {};
-		VkResult res = vkGetPhysicalDeviceImageFormatProperties(
+		const VkResult res = vkGetPhysicalDeviceImageFormatProperties(
 			m_physicalDevice, VK_FORMAT_R8G8B8_UNORM, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
 			VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &props);
 
@@ -166,7 +173,7 @@ Error GrManagerImpl::initInternal(const GrManagerInitInfo& init)
 	// Set m_s8ImagesSupported
 	{
 		VkImageFormatProperties props = {};
-		VkResult res = vkGetPhysicalDeviceImageFormatProperties(
+		const VkResult res = vkGetPhysicalDeviceImageFormatProperties(
 			m_physicalDevice, VK_FORMAT_S8_UINT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
 			VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &props);
 
@@ -341,8 +348,14 @@ Error GrManagerImpl::initInstance(const GrManagerInitInfo& init)
 				m_extensions |= VulkanExtensions::KHR_WIN32_SURFACE;
 				instExtensions[instExtensionCount++] = VK_KHR_WIN32_SURFACE_EXTENSION_NAME;
 			}
+#elif ANKI_OS_ANDROID
+			if(CString(instExtensionInf[i].extensionName) == VK_KHR_ANDROID_SURFACE_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_ANDROID_SURFACE;
+				instExtensions[instExtensionCount++] = VK_KHR_ANDROID_SURFACE_EXTENSION_NAME;
+			}
 #else
-#	error TODO
+#	error Not implemented
 #endif
 			else if(CString(instExtensionInf[i].extensionName) == VK_KHR_SURFACE_EXTENSION_NAME)
 			{
@@ -493,45 +506,58 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		{
 			if((queueInfos[i].queueFlags & GENERAL_QUEUE_FLAGS) == GENERAL_QUEUE_FLAGS)
 			{
-				m_queueFamilyIndices[QueueType::GENERAL] = i;
+				m_queueFamilyIndices[VulkanQueueType::GENERAL] = i;
 			}
 			else if((queueInfos[i].queueFlags & VK_QUEUE_COMPUTE_BIT)
 					&& !(queueInfos[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
 			{
 				// This must be the async compute
-				m_queueFamilyIndices[QueueType::COMPUTE] = i;
+				m_queueFamilyIndices[VulkanQueueType::COMPUTE] = i;
 			}
 		}
 	}
 
-	if(m_queueFamilyIndices[QueueType::GENERAL] == MAX_U32)
+	if(m_queueFamilyIndices[VulkanQueueType::GENERAL] == MAX_U32)
 	{
 		ANKI_VK_LOGE("Couldn't find a queue family with graphics+compute+transfer+present. "
 					 "Something is wrong");
 		return Error::FUNCTION_FAILED;
 	}
 
-	if(m_queueFamilyIndices[QueueType::COMPUTE] == MAX_U32)
+	if(!init.m_config->getBool("gr_asyncCompute"))
 	{
-		ANKI_VK_LOGE("Couldn't find an async compute queue");
-		return Error::FUNCTION_FAILED;
+		m_queueFamilyIndices[VulkanQueueType::COMPUTE] = MAX_U32;
 	}
 
-	const F32 priority = 1.0;
-	Array<VkDeviceQueueCreateInfo, U32(QueueType::COUNT)> q = {};
-	for(QueueType qtype : EnumIterable<QueueType>())
+	if(m_queueFamilyIndices[VulkanQueueType::COMPUTE] == MAX_U32)
 	{
-		q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-		q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
-		q[qtype].queueCount = 1;
-		q[qtype].pQueuePriorities = &priority;
+		ANKI_VK_LOGW("Couldn't find an async compute queue. Will try to use the general queue instead");
 	}
+	else
+	{
+		ANKI_VK_LOGI("Async compute is enabled");
+	}
+
+	const F32 priority = 1.0f;
+	Array<VkDeviceQueueCreateInfo, U32(VulkanQueueType::COUNT)> q = {};
 
 	VkDeviceCreateInfo ci = {};
 	ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-	ci.queueCreateInfoCount = q.getSize();
 	ci.pQueueCreateInfos = &q[0];
 
+	for(VulkanQueueType qtype : EnumIterable<VulkanQueueType>())
+	{
+		if(m_queueFamilyIndices[qtype] != MAX_U32)
+		{
+			q[qtype].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+			q[qtype].queueFamilyIndex = m_queueFamilyIndices[qtype];
+			q[qtype].queueCount = 1;
+			q[qtype].pQueuePriorities = &priority;
+
+			++ci.queueCreateInfoCount;
+		}
+	}
+
 	// Extensions
 	U32 extCount = 0;
 	vkEnumerateDeviceExtensionProperties(m_physicalDevice, nullptr, &extCount, nullptr);
@@ -603,7 +629,7 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 			else if(extensionName == VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME
 					&& init.m_config->getBool("core_displayStats"))
 			{
-				m_extensions |= VulkanExtensions::PIPELINE_EXECUTABLE_PROPERTIES;
+				m_extensions |= VulkanExtensions::KHR_PIPELINE_EXECUTABLE_PROPERTIES;
 				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
 			}
 			else if(extensionName == VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME
@@ -611,6 +637,47 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 			{
 				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
 			}
+			else if(extensionName == VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::EXT_DESCRIPTOR_INDEXING;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_BUFFER_DEVICE_ADDRESS;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::EXT_SCALAR_BLOCK_LAYOUT;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_TIMELINE_SEMAPHORE;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_SHADER_FLOAT16_INT8;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME
+					&& init.m_config->getBool("gr_64bitAtomics"))
+			{
+				m_extensions |= VulkanExtensions::KHR_SHADER_ATOMIC_INT64;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_SPIRV_1_4_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_SPIRV_1_4;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
+			else if(extensionName == VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
+			{
+				m_extensions |= VulkanExtensions::KHR_SHADER_FLOAT_CONTROLS;
+				extensionsToEnable[extensionsToEnableCount++] = extensionName.cstr();
+			}
 		}
 
 		ANKI_VK_LOGI("Will enable the following device extensions:");
@@ -636,100 +703,115 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		ci.pEnabledFeatures = &m_devFeatures;
 	}
 
-	// Enable 1.1 features
+	// Descriptor indexing
+	if(!(m_extensions & VulkanExtensions::EXT_DESCRIPTOR_INDEXING))
 	{
-		m_11Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
-
-		VkPhysicalDeviceFeatures2 features = {};
-		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
-		features.pNext = &m_11Features;
-		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
-
-		if(!m_11Features.storageBuffer16BitAccess || !m_11Features.uniformAndStorageBuffer16BitAccess)
-		{
-			ANKI_VK_LOGE("16bit buffer access is not supported");
-			return Error::FUNCTION_FAILED;
-		}
-
-		// Disable a few things
-		m_11Features.storagePushConstant16 = false; // Because AMD doesn't support it
-		m_11Features.protectedMemory = false;
-		m_11Features.multiview = false;
-		m_11Features.multiviewGeometryShader = false;
-		m_11Features.multiviewTessellationShader = false;
-		m_11Features.samplerYcbcrConversion = false;
-
-		m_11Features.pNext = const_cast<void*>(ci.pNext);
-		ci.pNext = &m_11Features;
+		ANKI_VK_LOGE(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
 	}
-
-	// Enable a few 1.2 features
+	else
 	{
-		m_12Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
+		m_descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
 
 		VkPhysicalDeviceFeatures2 features = {};
 		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
-		features.pNext = &m_12Features;
-
+		features.pNext = &m_descriptorIndexingFeatures;
 		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
 
-		// Descriptor indexing
-		if(!m_12Features.shaderSampledImageArrayNonUniformIndexing
-		   || !m_12Features.shaderStorageImageArrayNonUniformIndexing)
+		if(!m_descriptorIndexingFeatures.shaderSampledImageArrayNonUniformIndexing
+		   || !m_descriptorIndexingFeatures.shaderStorageImageArrayNonUniformIndexing)
 		{
 			ANKI_VK_LOGE("Non uniform indexing is not supported by the device");
 			return Error::FUNCTION_FAILED;
 		}
 
-		if(!m_12Features.descriptorBindingSampledImageUpdateAfterBind
-		   || !m_12Features.descriptorBindingStorageImageUpdateAfterBind)
+		if(!m_descriptorIndexingFeatures.descriptorBindingSampledImageUpdateAfterBind
+		   || !m_descriptorIndexingFeatures.descriptorBindingStorageImageUpdateAfterBind)
 		{
 			ANKI_VK_LOGE("Update descriptors after bind is not supported by the device");
 			return Error::FUNCTION_FAILED;
 		}
 
-		if(!m_12Features.descriptorBindingUpdateUnusedWhilePending)
+		if(!m_descriptorIndexingFeatures.descriptorBindingUpdateUnusedWhilePending)
 		{
 			ANKI_VK_LOGE("Update descriptors while cmd buffer is pending is not supported by the device");
 			return Error::FUNCTION_FAILED;
 		}
 
-		// Buffer address
-		if(!!(m_extensions & VulkanExtensions::KHR_RAY_TRACING))
-		{
-			if(!m_12Features.bufferDeviceAddress)
-			{
-				ANKI_VK_LOGE("Buffer device address is not supported by the device");
-				return Error::FUNCTION_FAILED;
-			}
+		m_descriptorIndexingFeatures.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_descriptorIndexingFeatures;
+	}
 
-			m_12Features.bufferDeviceAddressCaptureReplay =
-				m_12Features.bufferDeviceAddressCaptureReplay && init.m_config->getBool("gr_debugMarkers");
-			m_12Features.bufferDeviceAddressMultiDevice = false;
-		}
-		else
-		{
-			m_12Features.bufferDeviceAddress = false;
-			m_12Features.bufferDeviceAddressCaptureReplay = false;
-			m_12Features.bufferDeviceAddressMultiDevice = false;
-		}
+	// Buffer address
+	if(!(m_extensions & VulkanExtensions::KHR_BUFFER_DEVICE_ADDRESS))
+	{
+		ANKI_VK_LOGW(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME " is not supported");
+	}
+	else
+	{
+		m_deviceBufferFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR;
 
-		// Scalar block layout
-		if(!m_12Features.scalarBlockLayout)
+		VkPhysicalDeviceFeatures2 features = {};
+		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+		features.pNext = &m_deviceBufferFeatures;
+		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+		m_deviceBufferFeatures.bufferDeviceAddressCaptureReplay =
+			m_deviceBufferFeatures.bufferDeviceAddressCaptureReplay && init.m_config->getBool("gr_debugMarkers");
+		m_deviceBufferFeatures.bufferDeviceAddressMultiDevice = false;
+
+		m_deviceBufferFeatures.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_deviceBufferFeatures;
+	}
+
+	// Scalar block layout
+	if(!(m_extensions & VulkanExtensions::EXT_SCALAR_BLOCK_LAYOUT))
+	{
+		ANKI_VK_LOGE(VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
+	}
+	else
+	{
+		m_scalarBlockLayout.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT;
+
+		VkPhysicalDeviceFeatures2 features = {};
+		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+		features.pNext = &m_scalarBlockLayout;
+		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+		if(!m_scalarBlockLayout.scalarBlockLayout)
 		{
 			ANKI_VK_LOGE("Scalar block layout is not supported by the device");
 			return Error::FUNCTION_FAILED;
 		}
 
-		// Timeline semaphores
-		if(!m_12Features.timelineSemaphore)
+		m_scalarBlockLayout.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_scalarBlockLayout;
+	}
+
+	// Timeline semaphore
+	if(!(m_extensions & VulkanExtensions::KHR_TIMELINE_SEMAPHORE))
+	{
+		ANKI_VK_LOGE(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
+	}
+	else
+	{
+		m_timelineSemaphoreFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR;
+
+		VkPhysicalDeviceFeatures2 features = {};
+		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+		features.pNext = &m_timelineSemaphoreFeatures;
+		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+		if(!m_timelineSemaphoreFeatures.timelineSemaphore)
 		{
 			ANKI_VK_LOGE("Timeline semaphores are not supported by the device");
 			return Error::FUNCTION_FAILED;
 		}
 
-		m_12Features.pNext = const_cast<void*>(ci.pNext);
-		ci.pNext = &m_12Features;
+		m_timelineSemaphoreFeatures.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_timelineSemaphoreFeatures;
 	}
 
 	// Set RT features
@@ -767,7 +849,7 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 	}
 
 	// Pipeline features
-	if(!!(m_extensions & VulkanExtensions::PIPELINE_EXECUTABLE_PROPERTIES))
+	if(!!(m_extensions & VulkanExtensions::KHR_PIPELINE_EXECUTABLE_PROPERTIES))
 	{
 		m_pplineExecutablePropertiesFeatures.sType =
 			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR;
@@ -777,6 +859,46 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		ci.pNext = &m_pplineExecutablePropertiesFeatures;
 	}
 
+	// F16 I8
+	if(!(m_extensions & VulkanExtensions::KHR_SHADER_FLOAT16_INT8))
+	{
+		ANKI_VK_LOGE(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
+	}
+	else
+	{
+		m_float16Int8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
+
+		VkPhysicalDeviceFeatures2 features = {};
+		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+		features.pNext = &m_float16Int8Features;
+		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+		m_float16Int8Features.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_float16Int8Features;
+	}
+
+	// 64bit atomics
+	if(!(m_extensions & VulkanExtensions::KHR_SHADER_ATOMIC_INT64))
+	{
+		ANKI_VK_LOGW(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME " is not supported or disabled");
+		m_capabilities.m_64bitAtomics = false;
+	}
+	else
+	{
+		m_capabilities.m_64bitAtomics = true;
+
+		m_atomicInt64Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR;
+
+		VkPhysicalDeviceFeatures2 features = {};
+		features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+		features.pNext = &m_atomicInt64Features;
+		vkGetPhysicalDeviceFeatures2(m_physicalDevice, &features);
+
+		m_atomicInt64Features.pNext = const_cast<void*>(ci.pNext);
+		ci.pNext = &m_atomicInt64Features;
+	}
+
 	ANKI_VK_CHECK(vkCreateDevice(m_physicalDevice, &ci, nullptr, &m_device));
 
 	// Get debug marker
@@ -815,6 +937,18 @@ Error GrManagerImpl::initDevice(const GrManagerInitInfo& init)
 		}
 	}
 
+	if(!(m_extensions & VulkanExtensions::KHR_SPIRV_1_4))
+	{
+		ANKI_VK_LOGE(VK_KHR_SPIRV_1_4_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
+	}
+
+	if(!(m_extensions & VulkanExtensions::KHR_SHADER_FLOAT_CONTROLS))
+	{
+		ANKI_VK_LOGE(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME " is not supported");
+		return Error::FUNCTION_FAILED;
+	}
+
 	return Error::NONE;
 }
 
@@ -836,7 +970,7 @@ Error GrManagerImpl::initMemory(const ConfigSet& cfg)
 	}
 
 	m_gpuMemManager.init(m_physicalDevice, m_device, getAllocator(),
-						 !!(m_extensions & VulkanExtensions::KHR_RAY_TRACING));
+						 !!(m_extensions & VulkanExtensions::KHR_BUFFER_DEVICE_ADDRESS));
 
 	return Error::NONE;
 }
@@ -927,7 +1061,10 @@ TexturePtr GrManagerImpl::acquireNextPresentableTexture()
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queue and create a new one");
 		for(VkQueue queue : m_queues)
 		{
-			vkQueueWaitIdle(queue);
+			if(queue)
+			{
+				vkQueueWaitIdle(queue);
+			}
 		}
 		m_crntSwapchain.reset(nullptr);
 		m_crntSwapchain = m_swapchainFactory.newInstance();
@@ -941,7 +1078,6 @@ TexturePtr GrManagerImpl::acquireNextPresentableTexture()
 		ANKI_VK_CHECKF(res);
 	}
 
-	ANKI_ASSERT(imageIdx < MAX_FRAMES_IN_FLIGHT);
 	m_acquiredImageIdx = U8(imageIdx);
 	return m_crntSwapchain->m_textures[imageIdx];
 }
@@ -987,7 +1123,10 @@ void GrManagerImpl::endFrame()
 		ANKI_VK_LOGW("Swapchain is out of date. Will wait for the queues and create a new one");
 		for(VkQueue queue : m_queues)
 		{
-			vkQueueWaitIdle(queue);
+			if(queue)
+			{
+				vkQueueWaitIdle(queue);
+			}
 		}
 		vkDeviceWaitIdle(m_device);
 		m_crntSwapchain.reset(nullptr);
@@ -1106,19 +1245,18 @@ void GrManagerImpl::flushCommandBuffer(MicroCommandBufferPtr cmdb, Bool cmdbRend
 		// Update the swapchain's fence
 		m_crntSwapchain->setFence(fence);
 
-		frame.m_queueWroteToSwapchainImage = getQueueTypeFromCommandBufferFlags(cmdb->getFlags());
+		frame.m_queueWroteToSwapchainImage = cmdb->getVulkanQueueType();
 	}
 
 	// Submit
 	{
 		ANKI_TRACE_SCOPED_EVENT(VK_QUEUE_SUBMIT);
-		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())], 1, &submit,
-									 fence->getHandle()));
+		ANKI_VK_CHECKF(vkQueueSubmit(m_queues[cmdb->getVulkanQueueType()], 1, &submit, fence->getHandle()));
 	}
 
 	if(wait)
 	{
-		vkQueueWaitIdle(m_queues[getQueueTypeFromCommandBufferFlags(cmdb->getFlags())]);
+		vkQueueWaitIdle(m_queues[cmdb->getVulkanQueueType()]);
 	}
 }
 
@@ -1127,7 +1265,10 @@ void GrManagerImpl::finish()
 	LockGuard<Mutex> lock(m_globalMtx);
 	for(VkQueue queue : m_queues)
 	{
-		vkQueueWaitIdle(queue);
+		if(queue)
+		{
+			vkQueueWaitIdle(queue);
+		}
 	}
 }
 
@@ -1263,7 +1404,7 @@ Error GrManagerImpl::printPipelineShaderInfoInternal(VkPipeline ppline, CString
 		ANKI_CHECK(m_shaderStatsFile.flush());
 	}
 
-	if(!!(m_extensions & VulkanExtensions::PIPELINE_EXECUTABLE_PROPERTIES))
+	if(!!(m_extensions & VulkanExtensions::KHR_PIPELINE_EXECUTABLE_PROPERTIES))
 	{
 		StringListAuto log(m_alloc);
 

+ 12 - 7
AnKi/Gr/Vulkan/GrManagerImpl.h

@@ -47,9 +47,10 @@ public:
 
 	ANKI_USE_RESULT Error init(const GrManagerInitInfo& cfg);
 
-	const Array<U32, U(QueueType::COUNT)>& getQueueFamilies() const
+	ConstWeakArray<U32> getQueueFamilies() const
 	{
-		return m_queueFamilyIndices;
+		const Bool hasAsyncCompute = m_queueFamilyIndices[VulkanQueueType::COMPUTE] != MAX_U32;
+		return (hasAsyncCompute) ? m_queueFamilyIndices : ConstWeakArray<U32>(&m_queueFamilyIndices[0], 1);
 	}
 
 	const VkPhysicalDeviceProperties& getPhysicalDeviceProperties() const
@@ -239,8 +240,8 @@ private:
 	VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
 	VulkanExtensions m_extensions = VulkanExtensions::NONE;
 	VkDevice m_device = VK_NULL_HANDLE;
-	Array<U32, U32(QueueType::COUNT)> m_queueFamilyIndices = {MAX_U32, MAX_U32};
-	Array<VkQueue, U32(QueueType::COUNT)> m_queues = {};
+	VulkanQueueFamilies m_queueFamilyIndices = {MAX_U32, MAX_U32};
+	Array<VkQueue, U32(VulkanQueueType::COUNT)> m_queues = {};
 	Mutex m_globalMtx;
 
 	VkPhysicalDeviceProperties2 m_devProps = {};
@@ -250,9 +251,13 @@ private:
 	VkPhysicalDeviceAccelerationStructureFeaturesKHR m_accelerationStructureFeatures = {};
 	VkPhysicalDeviceRayTracingPipelineFeaturesKHR m_rtPipelineFeatures = {};
 	VkPhysicalDeviceRayQueryFeaturesKHR m_rayQueryFeatures = {};
-	VkPhysicalDeviceVulkan11Features m_11Features = {};
-	VkPhysicalDeviceVulkan12Features m_12Features = {};
 	VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR m_pplineExecutablePropertiesFeatures = {};
+	VkPhysicalDeviceDescriptorIndexingFeatures m_descriptorIndexingFeatures = {};
+	VkPhysicalDeviceBufferDeviceAddressFeaturesKHR m_deviceBufferFeatures = {};
+	VkPhysicalDeviceScalarBlockLayoutFeaturesEXT m_scalarBlockLayout = {};
+	VkPhysicalDeviceTimelineSemaphoreFeaturesKHR m_timelineSemaphoreFeatures = {};
+	VkPhysicalDeviceShaderFloat16Int8FeaturesKHR m_float16Int8Features = {};
+	VkPhysicalDeviceShaderAtomicInt64FeaturesKHR m_atomicInt64Features = {};
 
 	PFN_vkDebugMarkerSetObjectNameEXT m_pfnDebugMarkerSetObjectNameEXT = nullptr;
 	PFN_vkCmdDebugMarkerBeginEXT m_pfnCmdDebugMarkerBeginEXT = nullptr;
@@ -272,7 +277,7 @@ private:
 		/// Signaled by the submit that renders to the default FB. Present waits for it.
 		MicroSemaphorePtr m_renderSemaphore;
 
-		QueueType m_queueWroteToSwapchainImage = QueueType::COUNT;
+		VulkanQueueType m_queueWroteToSwapchainImage = VulkanQueueType::COUNT;
 	};
 
 	VkSurfaceKHR m_surface = VK_NULL_HANDLE;

+ 25 - 0
AnKi/Gr/Vulkan/GrManagerImplAndroid.cpp

@@ -0,0 +1,25 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Gr/Vulkan/GrManagerImpl.h>
+#include <AnKi/Gr/GrManager.h>
+#include <AnKi/Core/NativeWindow.h>
+#include <AnKi/Core/NativeWindowAndroid.h>
+
+namespace anki
+{
+
+Error GrManagerImpl::initSurface(const GrManagerInitInfo& init)
+{
+	VkAndroidSurfaceCreateInfoKHR createInfo = {};
+	createInfo.sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR;
+	createInfo.window = init.m_window->getNative().m_nativeWindow;
+
+	ANKI_VK_CHECK(vkCreateAndroidSurfaceKHR(m_instance, &createInfo, nullptr, &m_surface));
+
+	return Error::NONE;
+}
+
+} // end namespace anki

+ 62 - 41
AnKi/Gr/Vulkan/Pipeline.cpp

@@ -23,8 +23,6 @@ void PipelineStateTracker::reset()
 	m_fbStencil = false;
 	m_defaultFb = false;
 	m_fbColorAttachmentMask.unsetAll();
-	m_rpass = VK_NULL_HANDLE;
-	m_fb.reset(nullptr);
 }
 
 Bool PipelineStateTracker::updateHashes()
@@ -32,13 +30,21 @@ Bool PipelineStateTracker::updateHashes()
 	Bool stateDirty = false;
 
 	// Prog
-	if(!!(m_dirty.m_other & DirtyBit::PROG))
+	if(m_dirty.m_prog)
 	{
-		m_dirty.m_other &= ~DirtyBit::PROG;
+		m_dirty.m_prog = false;
 		stateDirty = true;
 		m_hashes.m_prog = m_state.m_prog->getUuid();
 	}
 
+	// Rpass
+	if(m_dirty.m_rpass)
+	{
+		m_dirty.m_rpass = false;
+		stateDirty = true;
+		m_hashes.m_rpass = ptrToNumber(m_state.m_rpass);
+	}
+
 	// Vertex
 	if(m_dirty.m_attribs.getAny() || m_dirty.m_vertBindings.getAny())
 	{
@@ -56,6 +62,8 @@ Bool PipelineStateTracker::updateHashes()
 				}
 
 				const U binding = m_state.m_vertex.m_attributes[i].m_binding;
+				ANKI_ASSERT(m_set.m_vertBindings.get(binding) && "Forgot to set a vertex binding");
+
 				if(m_dirty.m_vertBindings.get(binding))
 				{
 					m_dirty.m_vertBindings.unset(binding);
@@ -77,33 +85,33 @@ Bool PipelineStateTracker::updateHashes()
 	}
 
 	// IA
-	if(!!(m_dirty.m_other & DirtyBit::IA))
+	if(m_dirty.m_inputAssembler)
 	{
-		m_dirty.m_other &= ~DirtyBit::IA;
-		m_hashes.m_ia = computeHash(&m_state.m_inputAssembler, sizeof(m_state.m_inputAssembler));
+		m_dirty.m_inputAssembler = false;
 		stateDirty = true;
+		m_hashes.m_ia = computeHash(&m_state.m_inputAssembler, sizeof(m_state.m_inputAssembler));
 	}
 
 	// Rasterizer
-	if(!!(m_dirty.m_other & DirtyBit::RASTER))
+	if(m_dirty.m_rasterizer)
 	{
-		m_dirty.m_other &= ~DirtyBit::RASTER;
+		m_dirty.m_rasterizer = false;
 		stateDirty = true;
 		m_hashes.m_raster = computeHash(&m_state.m_rasterizer, sizeof(m_state.m_rasterizer));
 	}
 
 	// Depth
-	if(m_fbDepth && !!(m_dirty.m_other & DirtyBit::DEPTH))
+	if(m_fbDepth && m_dirty.m_depth)
 	{
-		m_dirty.m_other &= ~DirtyBit::DEPTH;
+		m_dirty.m_depth = false;
 		stateDirty = true;
 		m_hashes.m_depth = computeHash(&m_state.m_depth, sizeof(m_state.m_depth));
 	}
 
 	// Stencil
-	if(m_fbStencil && !!(m_dirty.m_other & DirtyBit::STENCIL))
+	if(m_fbStencil && m_dirty.m_stencil)
 	{
-		m_dirty.m_other &= ~DirtyBit::STENCIL;
+		m_dirty.m_stencil = false;
 		stateDirty = true;
 		m_hashes.m_stencil = computeHash(&m_state.m_stencil, sizeof(m_state.m_stencil));
 	}
@@ -114,9 +122,9 @@ Bool PipelineStateTracker::updateHashes()
 		ANKI_ASSERT(m_fbColorAttachmentMask == m_shaderColorAttachmentWritemask
 					&& "Shader and fb should have same attachment mask");
 
-		if(!!(m_dirty.m_other & DirtyBit::COLOR))
+		if(m_dirty.m_color)
 		{
-			m_dirty.m_other &= ~DirtyBit::COLOR;
+			m_dirty.m_color = false;
 			m_hashes.m_color = m_state.m_color.m_alphaToCoverageEnabled ? 1 : 2;
 			stateDirty = true;
 		}
@@ -147,6 +155,9 @@ void PipelineStateTracker::updateSuperHash()
 	// Prog
 	buff[count++] = m_hashes.m_prog;
 
+	// Rpass
+	buff[count++] = m_hashes.m_rpass;
+
 	// Vertex
 	if(!!m_shaderAttributeMask)
 	{
@@ -343,7 +354,7 @@ const VkGraphicsPipelineCreateInfo& PipelineStateTracker::updatePipelineCreateIn
 		{
 			ANKI_ASSERT(m_shaderColorAttachmentWritemask.get(i) && "No gaps are allowed");
 			VkPipelineColorBlendAttachmentState& out = m_ci.m_colAttachments[i];
-			const PPColorAttachmentStateInfo& in = m_state.m_color.m_attachments[i];
+			const ColorAttachmentState& in = m_state.m_color.m_attachments[i];
 
 			out.blendEnable = !blendingDisabled(in.m_srcBlendFactorRgb, in.m_dstBlendFactorRgb, in.m_srcBlendFactorA,
 												in.m_dstBlendFactorA, in.m_blendFunctionRgb, in.m_blendFunctionA);
@@ -377,7 +388,7 @@ const VkGraphicsPipelineCreateInfo& PipelineStateTracker::updatePipelineCreateIn
 
 	// The rest
 	ci.layout = static_cast<const ShaderProgramImpl&>(*m_state.m_prog).getPipelineLayout().getHandle();
-	ci.renderPass = m_rpass;
+	ci.renderPass = m_state.m_rpass;
 	ci.subpass = 0;
 
 	return ci;
@@ -387,10 +398,6 @@ class PipelineFactory::PipelineInternal
 {
 public:
 	VkPipeline m_handle = VK_NULL_HANDLE;
-
-	/// The pipeline needs a render pass and the framebuffers are the owners of that. So the internal pipeline will
-	/// hold a ref to the FB in order to hold a ref to the render pass.
-	FramebufferPtr m_fb;
 };
 
 class PipelineFactory::Hasher
@@ -409,15 +416,16 @@ void PipelineFactory::destroy()
 		if(it.m_handle)
 		{
 			vkDestroyPipeline(m_dev, it.m_handle, nullptr);
-			it.m_fb.reset(nullptr);
 		}
 	}
 
 	m_pplines.destroy(m_alloc);
 }
 
-void PipelineFactory::newPipeline(PipelineStateTracker& state, Pipeline& ppline, Bool& stateDirty)
+void PipelineFactory::getOrCreatePipeline(PipelineStateTracker& state, Pipeline& ppline, Bool& stateDirty)
 {
+	ANKI_TRACE_SCOPED_EVENT(VK_PIPELINE_GET_OR_CREATE);
+
 	U64 hash;
 	state.flush(hash, stateDirty);
 
@@ -427,34 +435,47 @@ void PipelineFactory::newPipeline(PipelineStateTracker& state, Pipeline& ppline,
 		return;
 	}
 
-	LockGuard<SpinLock> lock(m_pplinesMtx);
+	// Check if ppline exists
+	{
+		RLockGuard<RWMutex> lock(m_pplinesMtx);
+		auto it = m_pplines.find(hash);
+		if(it != m_pplines.getEnd())
+		{
+			ppline.m_handle = (*it).m_handle;
+			ANKI_TRACE_INC_COUNTER(VK_PIPELINES_CACHE_HIT, 1);
+			return;
+		}
+	}
+
+	// Doesnt exist. Need to create it
 
+	WLockGuard<RWMutex> lock(m_pplinesMtx);
+
+	// Check again
 	auto it = m_pplines.find(hash);
 	if(it != m_pplines.getEnd())
 	{
 		ppline.m_handle = (*it).m_handle;
+		return;
 	}
-	else
-	{
-		PipelineInternal pp;
-		const VkGraphicsPipelineCreateInfo& ci = state.updatePipelineCreateInfo();
-		pp.m_fb = state.getFb();
 
-		{
-			ANKI_TRACE_SCOPED_EVENT(VK_PIPELINE_CREATE);
-			ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
-		}
+	// Create it for real
+	PipelineInternal pp;
+	const VkGraphicsPipelineCreateInfo& ci = state.updatePipelineCreateInfo();
+
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_PIPELINE_CREATE);
+		ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
+	}
 
-		ANKI_TRACE_INC_COUNTER(VK_PIPELINE_CREATE, 1);
+	ANKI_TRACE_INC_COUNTER(VK_PIPELINES_CACHE_MISS, 1);
 
-		m_pplines.emplace(m_alloc, hash, pp);
-		ppline.m_handle = pp.m_handle;
+	m_pplines.emplace(m_alloc, hash, pp);
+	ppline.m_handle = pp.m_handle;
 
-		// Print shader info
-		const ShaderProgramImpl& shaderImpl = static_cast<const ShaderProgramImpl&>(*state.m_state.m_prog);
-		shaderImpl.getGrManagerImpl().printPipelineShaderInfo(pp.m_handle, shaderImpl.getName(), shaderImpl.getStages(),
-															  hash);
-	}
+	// Print shader info
+	state.m_state.m_prog->getGrManagerImpl().printPipelineShaderInfo(pp.m_handle, state.m_state.m_prog->getName(),
+																	 state.m_state.m_prog->getStages(), hash);
 }
 
 } // end namespace anki

+ 105 - 129
AnKi/Gr/Vulkan/Pipeline.h

@@ -18,89 +18,88 @@ namespace anki
 /// @addtogroup vulkan
 /// @{
 
-/// @note Non copyable because that complicates the hashing.
-class PPVertexBufferBinding : public NonCopyable
+class VertexBufferBindingPipelineState
 {
 public:
 	U32 m_stride = MAX_U32; ///< Vertex stride.
 	VertexStepRate m_stepRate = VertexStepRate::VERTEX;
+	Array<U8, 3> m_padding = {};
 
-	Bool operator==(const PPVertexBufferBinding& b) const
+	Bool operator==(const VertexBufferBindingPipelineState& b) const
 	{
 		return m_stride == b.m_stride && m_stepRate == b.m_stepRate;
 	}
 
-	Bool operator!=(const PPVertexBufferBinding& b) const
+	Bool operator!=(const VertexBufferBindingPipelineState& b) const
 	{
 		return !(*this == b);
 	}
 };
+static_assert(sizeof(VertexBufferBindingPipelineState) == 2 * sizeof(U32), "Packed because it will be hashed");
 
-class PPVertexAttributeBinding : public NonCopyable
+class VertexAttributeBindingPipelineState
 {
 public:
 	PtrSize m_offset = 0;
 	Format m_format = Format::NONE;
 	U8 m_binding = 0;
+	Array<U8, 3> m_padding = {};
 
-	Bool operator==(const PPVertexAttributeBinding& b) const
+	Bool operator==(const VertexAttributeBindingPipelineState& b) const
 	{
 		return m_format == b.m_format && m_offset == b.m_offset && m_binding == b.m_binding;
 	}
 
-	Bool operator!=(const PPVertexAttributeBinding& b) const
+	Bool operator!=(const VertexAttributeBindingPipelineState& b) const
 	{
 		return !(*this == b);
 	}
 };
+static_assert(sizeof(VertexAttributeBindingPipelineState) == 2 * sizeof(PtrSize), "Packed because it will be hashed");
 
-class PPVertexStateInfo : public NonCopyable
+class VertexPipelineState
 {
 public:
-	Array<PPVertexBufferBinding, MAX_VERTEX_ATTRIBUTES> m_bindings;
-	Array<PPVertexAttributeBinding, MAX_VERTEX_ATTRIBUTES> m_attributes;
+	Array<VertexBufferBindingPipelineState, MAX_VERTEX_ATTRIBUTES> m_bindings;
+	Array<VertexAttributeBindingPipelineState, MAX_VERTEX_ATTRIBUTES> m_attributes;
 };
+static_assert(sizeof(VertexPipelineState)
+				  == sizeof(VertexBufferBindingPipelineState) * MAX_VERTEX_ATTRIBUTES
+						 + sizeof(VertexAttributeBindingPipelineState) * MAX_VERTEX_ATTRIBUTES,
+			  "Packed because it will be hashed");
 
-class PPInputAssemblerStateInfo : public NonCopyable
+class InputAssemblerPipelineState
 {
 public:
 	PrimitiveTopology m_topology = PrimitiveTopology::TRIANGLES;
 	Bool m_primitiveRestartEnabled = false;
 };
+static_assert(sizeof(InputAssemblerPipelineState) == sizeof(U8) * 2, "Packed because it will be hashed");
 
-class PPTessellationStateInfo : public NonCopyable
-{
-public:
-	U32 m_patchControlPointCount = 3;
-};
-
-class PPViewportStateInfo : public NonCopyable
-{
-public:
-	Bool m_scissorEnabled = false;
-};
-
-class PPRasterizerStateInfo : public NonCopyable
+class RasterizerPipelineState
 {
 public:
 	FillMode m_fillMode = FillMode::SOLID;
 	FaceSelectionBit m_cullMode = FaceSelectionBit::BACK;
 	RasterizationOrder m_rasterizationOrder = RasterizationOrder::ORDERED;
+	U8 m_padding = 0;
 	F32 m_depthBiasConstantFactor = 0.0f;
 	F32 m_depthBiasSlopeFactor = 0.0f;
 };
+static_assert(sizeof(RasterizerPipelineState) == sizeof(U32) * 3, "Packed because it will be hashed");
 
-class PPDepthStateInfo : public NonCopyable
+class DepthPipelineState
 {
 public:
 	Bool m_depthWriteEnabled = true;
 	CompareOperation m_depthCompareFunction = CompareOperation::LESS;
 };
+static_assert(sizeof(DepthPipelineState) == sizeof(U8) * 2, "Packed because it will be hashed");
 
-class PPStencilStateInfo : public NonCopyable
+class StencilPipelineState
 {
 public:
-	class S : public NonCopyable
+	class S
 	{
 	public:
 		StencilOperation m_stencilFailOperation = StencilOperation::KEEP;
@@ -111,8 +110,9 @@ public:
 
 	Array<S, 2> m_face;
 };
+static_assert(sizeof(StencilPipelineState) == sizeof(U32) * 2, "Packed because it will be hashed");
 
-class PPColorAttachmentStateInfo : public NonCopyable
+class ColorAttachmentState
 {
 public:
 	BlendFactor m_srcBlendFactorRgb = BlendFactor::ONE;
@@ -123,58 +123,38 @@ public:
 	BlendOperation m_blendFunctionA = BlendOperation::ADD;
 	ColorBit m_channelWriteMask = ColorBit::ALL;
 };
+static_assert(sizeof(ColorAttachmentState) == sizeof(U8) * 7, "Packed because it will be hashed");
 
-class PPColorStateInfo : public NonCopyable
+class ColorPipelineState
 {
 public:
 	Bool m_alphaToCoverageEnabled = false;
-	Array<PPColorAttachmentStateInfo, MAX_COLOR_ATTACHMENTS> m_attachments;
+	Array<ColorAttachmentState, MAX_COLOR_ATTACHMENTS> m_attachments;
 };
+static_assert(sizeof(ColorPipelineState) == sizeof(ColorAttachmentState) * MAX_COLOR_ATTACHMENTS + sizeof(U8),
+			  "Packed because it will be hashed");
 
-class PipelineInfoState : public NonCopyable
+class AllPipelineState
 {
 public:
-	PipelineInfoState()
-	{
-		reset();
-	}
+	const ShaderProgramImpl* m_prog = nullptr;
+	VkRenderPass m_rpass = VK_NULL_HANDLE;
 
-	ShaderProgramPtr m_prog;
-	PPVertexStateInfo m_vertex;
-	PPInputAssemblerStateInfo m_inputAssembler;
-	PPTessellationStateInfo m_tessellation;
-	PPViewportStateInfo m_viewport;
-	PPRasterizerStateInfo m_rasterizer;
-	PPDepthStateInfo m_depth;
-	PPStencilStateInfo m_stencil;
-	PPColorStateInfo m_color;
+	VertexPipelineState m_vertex;
+	InputAssemblerPipelineState m_inputAssembler;
+	RasterizerPipelineState m_rasterizer;
+	DepthPipelineState m_depth;
+	StencilPipelineState m_stencil;
+	ColorPipelineState m_color;
 
 	void reset()
 	{
-		m_prog.reset(nullptr);
-
-		// Do a special construction. The state will be hashed and the padding may contain garbage. With this trick
-		// zero the padding
-		zeroMemory(*this);
-
-#define ANKI_CONSTRUCT_AND_ZERO_PADDING(memb_) new(&memb_) decltype(memb_)()
-
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_prog);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_vertex);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_inputAssembler);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_tessellation);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_viewport);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_rasterizer);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_depth);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_stencil);
-		ANKI_CONSTRUCT_AND_ZERO_PADDING(m_color);
-
-#undef ANKI_CONSTRUCT_AND_ZERO_PADDING
+		::new(this) AllPipelineState();
 	}
 };
 
 /// Track changes in the static state.
-class PipelineStateTracker : public NonCopyable
+class PipelineStateTracker
 {
 	friend class PipelineFactory;
 
@@ -183,15 +163,18 @@ public:
 	{
 	}
 
+	PipelineStateTracker(const PipelineStateTracker&) = delete; // Non-copyable
+
+	PipelineStateTracker& operator=(const PipelineStateTracker&) = delete; // Non-copyable
+
 	void bindVertexBuffer(U32 binding, PtrSize stride, VertexStepRate stepRate)
 	{
-		PPVertexBufferBinding b;
+		VertexBufferBindingPipelineState b;
 		b.m_stride = U32(stride);
 		b.m_stepRate = stepRate;
 		if(m_state.m_vertex.m_bindings[binding] != b)
 		{
-			m_state.m_vertex.m_bindings[binding].m_stride = b.m_stride;
-			m_state.m_vertex.m_bindings[binding].m_stepRate = b.m_stepRate;
+			m_state.m_vertex.m_bindings[binding] = b;
 			m_dirty.m_vertBindings.set(binding);
 		}
 		m_set.m_vertBindings.set(binding);
@@ -199,15 +182,13 @@ public:
 
 	void setVertexAttribute(U32 location, U32 buffBinding, const Format fmt, PtrSize relativeOffset)
 	{
-		PPVertexAttributeBinding b;
+		VertexAttributeBindingPipelineState b;
 		b.m_binding = U8(buffBinding);
 		b.m_format = fmt;
 		b.m_offset = relativeOffset;
 		if(m_state.m_vertex.m_attributes[location] != b)
 		{
-			m_state.m_vertex.m_attributes[location].m_binding = U8(buffBinding);
-			m_state.m_vertex.m_attributes[location].m_format = fmt;
-			m_state.m_vertex.m_attributes[location].m_offset = relativeOffset;
+			m_state.m_vertex.m_attributes[location] = b;
 			m_dirty.m_attribs.set(location);
 		}
 		m_set.m_attribs.set(location);
@@ -218,7 +199,7 @@ public:
 		if(m_state.m_inputAssembler.m_primitiveRestartEnabled != enable)
 		{
 			m_state.m_inputAssembler.m_primitiveRestartEnabled = enable;
-			m_dirty.m_other |= DirtyBit::IA;
+			m_dirty.m_inputAssembler = true;
 		}
 	}
 
@@ -227,7 +208,7 @@ public:
 		if(m_state.m_rasterizer.m_fillMode != mode)
 		{
 			m_state.m_rasterizer.m_fillMode = mode;
-			m_dirty.m_other |= DirtyBit::RASTER;
+			m_dirty.m_rasterizer = true;
 		}
 	}
 
@@ -236,7 +217,7 @@ public:
 		if(m_state.m_rasterizer.m_cullMode != mode)
 		{
 			m_state.m_rasterizer.m_cullMode = mode;
-			m_dirty.m_other |= DirtyBit::RASTER;
+			m_dirty.m_rasterizer = true;
 		}
 	}
 
@@ -247,7 +228,7 @@ public:
 		{
 			m_state.m_rasterizer.m_depthBiasConstantFactor = factor;
 			m_state.m_rasterizer.m_depthBiasSlopeFactor = units;
-			m_dirty.m_other |= DirtyBit::RASTER;
+			m_dirty.m_rasterizer = true;
 		}
 	}
 
@@ -256,7 +237,7 @@ public:
 		if(m_state.m_rasterizer.m_rasterizationOrder != order)
 		{
 			m_state.m_rasterizer.m_rasterizationOrder = order;
-			m_dirty.m_other |= DirtyBit::RASTER;
+			m_dirty.m_rasterizer = true;
 		}
 	}
 
@@ -271,7 +252,7 @@ public:
 			m_state.m_stencil.m_face[0].m_stencilFailOperation = stencilFail;
 			m_state.m_stencil.m_face[0].m_stencilPassDepthFailOperation = stencilPassDepthFail;
 			m_state.m_stencil.m_face[0].m_stencilPassDepthPassOperation = stencilPassDepthPass;
-			m_dirty.m_other |= DirtyBit::STENCIL;
+			m_dirty.m_stencil = true;
 		}
 
 		if(!!(face & FaceSelectionBit::BACK)
@@ -282,7 +263,7 @@ public:
 			m_state.m_stencil.m_face[1].m_stencilFailOperation = stencilFail;
 			m_state.m_stencil.m_face[1].m_stencilPassDepthFailOperation = stencilPassDepthFail;
 			m_state.m_stencil.m_face[1].m_stencilPassDepthPassOperation = stencilPassDepthPass;
-			m_dirty.m_other |= DirtyBit::STENCIL;
+			m_dirty.m_stencil = true;
 		}
 	}
 
@@ -291,13 +272,13 @@ public:
 		if(!!(face & FaceSelectionBit::FRONT) && m_state.m_stencil.m_face[0].m_compareFunction != comp)
 		{
 			m_state.m_stencil.m_face[0].m_compareFunction = comp;
-			m_dirty.m_other |= DirtyBit::STENCIL;
+			m_dirty.m_stencil = true;
 		}
 
 		if(!!(face & FaceSelectionBit::BACK) && m_state.m_stencil.m_face[1].m_compareFunction != comp)
 		{
 			m_state.m_stencil.m_face[1].m_compareFunction = comp;
-			m_dirty.m_other |= DirtyBit::STENCIL;
+			m_dirty.m_stencil = true;
 		}
 	}
 
@@ -306,7 +287,7 @@ public:
 		if(m_state.m_depth.m_depthWriteEnabled != enable)
 		{
 			m_state.m_depth.m_depthWriteEnabled = enable;
-			m_dirty.m_other |= DirtyBit::DEPTH;
+			m_dirty.m_depth = true;
 		}
 	}
 
@@ -315,7 +296,7 @@ public:
 		if(m_state.m_depth.m_depthCompareFunction != op)
 		{
 			m_state.m_depth.m_depthCompareFunction = op;
-			m_dirty.m_other |= DirtyBit::DEPTH;
+			m_dirty.m_depth = true;
 		}
 	}
 
@@ -324,7 +305,7 @@ public:
 		if(m_state.m_color.m_alphaToCoverageEnabled != enable)
 		{
 			m_state.m_color.m_alphaToCoverageEnabled = enable;
-			m_dirty.m_other |= DirtyBit::COLOR;
+			m_dirty.m_color = true;
 		}
 	}
 
@@ -339,7 +320,7 @@ public:
 
 	void setBlendFactors(U32 attachment, BlendFactor srcRgb, BlendFactor dstRgb, BlendFactor srcA, BlendFactor dstA)
 	{
-		PPColorAttachmentStateInfo& c = m_state.m_color.m_attachments[attachment];
+		ColorAttachmentState& c = m_state.m_color.m_attachments[attachment];
 		if(c.m_srcBlendFactorRgb != srcRgb || c.m_dstBlendFactorRgb != dstRgb || c.m_srcBlendFactorA != srcA
 		   || c.m_dstBlendFactorA != dstA)
 		{
@@ -353,7 +334,7 @@ public:
 
 	void setBlendOperation(U32 attachment, BlendOperation funcRgb, BlendOperation funcA)
 	{
-		PPColorAttachmentStateInfo& c = m_state.m_color.m_attachments[attachment];
+		ColorAttachmentState& c = m_state.m_color.m_attachments[attachment];
 		if(c.m_blendFunctionRgb != funcRgb || c.m_blendFunctionA != funcA)
 		{
 			c.m_blendFunctionRgb = funcRgb;
@@ -362,36 +343,34 @@ public:
 		}
 	}
 
-	void bindShaderProgram(const ShaderProgramPtr& prog)
+	void bindShaderProgram(const ShaderProgramImpl* prog)
 	{
 		if(prog != m_state.m_prog)
 		{
-			const ShaderProgramImpl& impl = static_cast<const ShaderProgramImpl&>(*prog);
-			m_shaderColorAttachmentWritemask = impl.getReflectionInfo().m_colorAttachmentWritemask;
-			m_shaderAttributeMask = impl.getReflectionInfo().m_attributeMask;
+			m_shaderColorAttachmentWritemask = prog->getReflectionInfo().m_colorAttachmentWritemask;
+			m_shaderAttributeMask = prog->getReflectionInfo().m_attributeMask;
 			m_state.m_prog = prog;
-			m_dirty.m_other |= DirtyBit::PROG;
+			m_dirty.m_prog = true;
 		}
 	}
 
-	void beginRenderPass(const FramebufferPtr& fb)
+	void beginRenderPass(const FramebufferImpl* fb)
 	{
-		ANKI_ASSERT(m_rpass == VK_NULL_HANDLE);
+		ANKI_ASSERT(m_state.m_rpass == VK_NULL_HANDLE);
 		Bool d, s;
-		const FramebufferImpl& fbimpl = static_cast<const FramebufferImpl&>(*fb);
-		fbimpl.getAttachmentInfo(m_fbColorAttachmentMask, d, s);
+		fb->getAttachmentInfo(m_fbColorAttachmentMask, d, s);
 		m_fbDepth = d;
 		m_fbStencil = s;
-		m_rpass = fbimpl.getCompatibleRenderPass();
-		m_defaultFb = fbimpl.hasPresentableTexture();
-		m_fb = fb;
+		m_defaultFb = fb->hasPresentableTexture();
+
+		m_state.m_rpass = fb->getCompatibleRenderPass();
+		m_dirty.m_rpass = true;
 	}
 
 	void endRenderPass()
 	{
-		ANKI_ASSERT(m_rpass);
-		m_rpass = VK_NULL_HANDLE;
-		m_fb.reset(nullptr);
+		ANKI_ASSERT(m_state.m_rpass);
+		m_state.m_rpass = VK_NULL_HANDLE;
 	}
 
 	void setPrimitiveTopology(PrimitiveTopology topology)
@@ -399,7 +378,7 @@ public:
 		if(m_state.m_inputAssembler.m_topology != topology)
 		{
 			m_state.m_inputAssembler.m_topology = topology;
-			m_dirty.m_other |= DirtyBit::IA;
+			m_dirty.m_inputAssembler = true;
 		}
 	}
 
@@ -411,7 +390,7 @@ public:
 	/// Flush state
 	void flush(U64& pipelineHash, Bool& stateDirty)
 	{
-		Bool dirtyHashes = updateHashes();
+		const Bool dirtyHashes = updateHashes();
 		if(dirtyHashes)
 		{
 			updateSuperHash();
@@ -434,40 +413,38 @@ public:
 	/// Populate the internal pipeline create info structure.
 	const VkGraphicsPipelineCreateInfo& updatePipelineCreateInfo();
 
-	FramebufferPtr getFb() const
-	{
-		ANKI_ASSERT(m_fb.isCreated());
-		return m_fb;
-	}
-
 	void reset();
 
 private:
-	PipelineInfoState m_state;
-
-	enum class DirtyBit : U8
-	{
-		PROG = 1 << 0,
-		IA = 1 << 1,
-		RASTER = 1 << 2,
-		STENCIL = 1 << 3,
-		DEPTH = 1 << 4,
-		COLOR = 1 << 5,
-
-		NONE = 0,
-		ALL = PROG | IA | RASTER | STENCIL | DEPTH | COLOR
-	};
-	ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS_FRIEND(DirtyBit)
+	AllPipelineState m_state;
 
 	class DirtyBits
 	{
 	public:
-		DirtyBit m_other = DirtyBit::ALL;
-
+		Bool m_prog : 1;
+		Bool m_rpass : 1;
+		Bool m_inputAssembler : 1;
+		Bool m_rasterizer : 1;
+		Bool m_depth : 1;
+		Bool m_stencil : 1;
+		Bool m_color : 1;
+
+		// Vertex
 		BitSet<MAX_VERTEX_ATTRIBUTES, U8> m_attribs = {true};
 		BitSet<MAX_VERTEX_ATTRIBUTES, U8> m_vertBindings = {true};
 
 		BitSet<MAX_COLOR_ATTACHMENTS, U8> m_colAttachments = {true};
+
+		DirtyBits()
+			: m_prog(true)
+			, m_rpass(true)
+			, m_inputAssembler(true)
+			, m_rasterizer(true)
+			, m_depth(true)
+			, m_stencil(true)
+			, m_color(true)
+		{
+		}
 	} m_dirty;
 
 	class SetBits
@@ -482,8 +459,6 @@ private:
 	BitSet<MAX_COLOR_ATTACHMENTS, U8> m_shaderColorAttachmentWritemask = {false};
 
 	// Renderpass info
-	VkRenderPass m_rpass = VK_NULL_HANDLE;
-	FramebufferPtr m_fb; ///< Hold the reference.
 	Bool m_fbDepth = false;
 	Bool m_fbStencil = false;
 	Bool m_defaultFb = false;
@@ -493,6 +468,7 @@ private:
 	{
 	public:
 		U64 m_prog;
+		U64 m_rpass;
 		Array<U64, MAX_VERTEX_ATTRIBUTES> m_vertexAttribs;
 		U64 m_ia;
 		U64 m_raster;
@@ -572,7 +548,7 @@ public:
 	void destroy();
 
 	/// @note Thread-safe.
-	void newPipeline(PipelineStateTracker& state, Pipeline& ppline, Bool& stateDirty);
+	void getOrCreatePipeline(PipelineStateTracker& state, Pipeline& ppline, Bool& stateDirty);
 
 private:
 	class PipelineInternal;
@@ -583,7 +559,7 @@ private:
 	VkPipelineCache m_pplineCache = VK_NULL_HANDLE;
 
 	HashMap<U64, PipelineInternal, Hasher> m_pplines;
-	SpinLock m_pplinesMtx;
+	RWMutex m_pplinesMtx;
 };
 /// @}
 

+ 3 - 3
AnKi/Gr/Vulkan/PipelineCache.cpp

@@ -72,11 +72,13 @@ Error PipelineCache::init(VkDevice dev, VkPhysicalDevice pdev, CString cacheDir,
 
 void PipelineCache::destroy(VkDevice dev, VkPhysicalDevice pdev, GrAllocator<U8> alloc)
 {
-	Error err = destroyInternal(dev, pdev, alloc);
+	const Error err = destroyInternal(dev, pdev, alloc);
 	if(err)
 	{
 		ANKI_VK_LOGE("An error occurred while storing the pipeline cache to disk. Will ignore");
 	}
+
+	m_dumpFilename.destroy(alloc);
 }
 
 Error PipelineCache::destroyInternal(VkDevice dev, VkPhysicalDevice pdev, GrAllocator<U8> alloc)
@@ -115,8 +117,6 @@ Error PipelineCache::destroyInternal(VkDevice dev, VkPhysicalDevice pdev, GrAllo
 		m_cacheHandle = VK_NULL_HANDLE;
 	}
 
-	m_dumpFilename.destroy(alloc);
-
 	return Error::NONE;
 }
 

+ 5 - 1
AnKi/Gr/Vulkan/QueryFactory.h

@@ -62,15 +62,19 @@ private:
 };
 
 /// Batch allocator of queries.
-class QueryFactory : public NonCopyable
+class QueryFactory
 {
 public:
 	QueryFactory()
 	{
 	}
 
+	QueryFactory(const QueryFactory&) = delete; // Non-copyable
+
 	~QueryFactory();
 
+	QueryFactory& operator=(const QueryFactory&) = delete; // Non-copyable
+
 	void init(GrAllocator<U8> alloc, VkDevice dev, VkQueryType poolType)
 	{
 		m_alloc = alloc;

+ 5 - 1
AnKi/Gr/Vulkan/SemaphoreFactory.h

@@ -18,7 +18,7 @@ class SemaphoreFactory;
 /// @{
 
 /// Simple semaphore wrapper.
-class MicroSemaphore : public NonCopyable
+class MicroSemaphore
 {
 	friend class SemaphoreFactory;
 	friend class MicroSemaphorePtrDeleter;
@@ -26,6 +26,10 @@ class MicroSemaphore : public NonCopyable
 	friend class GenericPoolAllocator;
 
 public:
+	MicroSemaphore(const MicroSemaphore&) = delete; // Non-copyable
+
+	MicroSemaphore& operator=(const MicroSemaphore&) = delete; // Non-copyable
+
 	const VkSemaphore& getHandle() const
 	{
 		ANKI_ASSERT(m_handle);

+ 3 - 2
AnKi/Gr/Vulkan/ShaderImpl.cpp

@@ -8,7 +8,7 @@
 #include <AnKi/Gr/Utils/Functions.h>
 #include <SprivCross/spirv_cross.hpp>
 
-#define ANKI_DUMP_SHADERS ANKI_EXTRA_CHECKS
+#define ANKI_DUMP_SHADERS 0
 
 #if ANKI_DUMP_SHADERS
 #	include <AnKi/Util/File.h>
@@ -61,7 +61,8 @@ Error ShaderImpl::init(const ShaderInitInfo& inf)
 		fnameSpirv.sprintf("%s/%05u.spv", getManager().getCacheDirectory().cstr(), getUuid());
 
 		File fileSpirv;
-		ANKI_CHECK(fileSpirv.open(fnameSpirv.toCString(), FileOpenFlag::BINARY | FileOpenFlag::WRITE));
+		ANKI_CHECK(
+			fileSpirv.open(fnameSpirv.toCString(), FileOpenFlag::BINARY | FileOpenFlag::WRITE | FileOpenFlag::SPECIAL));
 		ANKI_CHECK(fileSpirv.write(&inf.m_binary[0], inf.m_binary.getSize()));
 	}
 #endif

+ 39 - 17
AnKi/Gr/Vulkan/SwapchainFactory.cpp

@@ -24,10 +24,7 @@ MicroSwapchain::~MicroSwapchain()
 {
 	const VkDevice dev = m_factory->m_gr->getDevice();
 
-	for(TexturePtr& tex : m_textures)
-	{
-		tex.reset(nullptr);
-	}
+	m_textures.destroy(getAllocator());
 
 	if(m_swapchain)
 	{
@@ -69,12 +66,13 @@ Error MicroSwapchain::initInternal()
 		ANKI_VK_CHECK(vkGetPhysicalDeviceSurfaceFormatsKHR(m_factory->m_gr->getPhysicalDevice(),
 														   m_factory->m_gr->getSurface(), &formatCount, &formats[0]));
 
-		while(formatCount--)
+		for(U32 i = 0; i < formatCount; ++i)
 		{
-			if(formats[formatCount].format == VK_FORMAT_B8G8R8A8_UNORM)
+			if(formats[i].format == VK_FORMAT_R8G8B8A8_UNORM || formats[i].format == VK_FORMAT_B8G8R8A8_UNORM
+			   || formats[i].format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)
 			{
-				surfaceFormat = formats[formatCount].format;
-				colorspace = formats[formatCount].colorSpace;
+				surfaceFormat = formats[i].format;
+				colorspace = formats[i].colorSpace;
 				break;
 			}
 		}
@@ -137,6 +135,29 @@ Error MicroSwapchain::initInternal()
 
 	// Create swapchain
 	{
+		VkCompositeAlphaFlagBitsKHR compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+		if(surfaceProperties.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR)
+		{
+			compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+		}
+		else if(surfaceProperties.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR)
+		{
+			compositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
+		}
+		else if(surfaceProperties.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR)
+		{
+			compositeAlpha = VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR;
+		}
+		else if(surfaceProperties.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR)
+		{
+			compositeAlpha = VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
+		}
+		else
+		{
+			ANKI_VK_LOGE("Failed to set compositeAlpha");
+			return Error::FUNCTION_FAILED;
+		}
+
 		VkSwapchainCreateInfoKHR ci = {};
 		ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
 		ci.surface = m_factory->m_gr->getSurface();
@@ -146,11 +167,11 @@ Error MicroSwapchain::initInternal()
 		ci.imageExtent = surfaceProperties.currentExtent;
 		ci.imageArrayLayers = 1;
 		ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
-		ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
 		ci.queueFamilyIndexCount = m_factory->m_gr->getQueueFamilies().getSize();
 		ci.pQueueFamilyIndices = &m_factory->m_gr->getQueueFamilies()[0];
+		ci.imageSharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 		ci.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
-		ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+		ci.compositeAlpha = compositeAlpha;
 		ci.presentMode = presentMode;
 		ci.clipped = false;
 		ci.oldSwapchain = VK_NULL_HANDLE;
@@ -160,26 +181,27 @@ Error MicroSwapchain::initInternal()
 
 	// Get images
 	{
-		uint32_t count = 0;
+		U32 count = 0;
 		ANKI_VK_CHECK(vkGetSwapchainImagesKHR(dev, m_swapchain, &count, nullptr));
 		if(count != MAX_FRAMES_IN_FLIGHT)
 		{
-			ANKI_VK_LOGE("Requested a swapchain with %u images but got one with %u", MAX_FRAMES_IN_FLIGHT, count);
-			return Error::FUNCTION_FAILED;
+			ANKI_VK_LOGI("Requested a swapchain with %u images but got one with %u", MAX_FRAMES_IN_FLIGHT, count);
 		}
 
+		m_textures.create(getAllocator(), count);
+
 		ANKI_VK_LOGI("Created a swapchain. Image count: %u, present mode: %u, size: %ux%u, vsync: %u", count,
 					 presentMode, surfaceWidth, surfaceHeight, U32(m_factory->m_vsync));
 
-		Array<VkImage, MAX_FRAMES_IN_FLIGHT> images;
+		Array<VkImage, 64> images;
+		ANKI_ASSERT(count <= 64);
 		ANKI_VK_CHECK(vkGetSwapchainImagesKHR(dev, m_swapchain, &count, &images[0]));
-		for(U i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i)
+		for(U32 i = 0; i < count; ++i)
 		{
 			TextureInitInfo init("SwapchainImg");
 			init.m_width = surfaceWidth;
 			init.m_height = surfaceHeight;
-			init.m_format = Format::B8G8R8A8_UNORM;
-			ANKI_ASSERT(surfaceFormat == VK_FORMAT_B8G8R8A8_UNORM);
+			init.m_format = Format(surfaceFormat); // anki::Format is compatible with VkFormat
 			init.m_usage = TextureUsageBit::IMAGE_COMPUTE_WRITE | TextureUsageBit::IMAGE_TRACE_RAYS_WRITE
 						   | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ
 						   | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE | TextureUsageBit::PRESENT;

+ 1 - 1
AnKi/Gr/Vulkan/SwapchainFactory.h

@@ -27,7 +27,7 @@ class MicroSwapchain
 public:
 	VkSwapchainKHR m_swapchain = {};
 
-	Array<TexturePtr, MAX_FRAMES_IN_FLIGHT> m_textures;
+	DynamicArray<TexturePtr> m_textures;
 
 	MicroSwapchain(SwapchainFactory* factory);
 

+ 34 - 2
AnKi/Gr/Vulkan/TextureImpl.cpp

@@ -14,6 +14,38 @@
 namespace anki
 {
 
+U32 MicroImageView::getOrCreateBindlessIndex(VkImageLayout layout, GrManagerImpl& gr) const
+{
+	ANKI_ASSERT(layout == VK_IMAGE_LAYOUT_GENERAL || layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+	const U32 arrayIdx = (layout == VK_IMAGE_LAYOUT_GENERAL) ? 1 : 0;
+
+	LockGuard<SpinLock> lock(m_lock);
+
+	U32 outIdx;
+	if(m_bindlessIndices[arrayIdx] != MAX_U32)
+	{
+		outIdx = m_bindlessIndices[arrayIdx];
+	}
+	else
+	{
+		// Needs binding to the bindless descriptor set
+
+		if(layout == VK_IMAGE_LAYOUT_GENERAL)
+		{
+			outIdx = gr.getDescriptorSetFactory().bindBindlessImage(m_handle);
+		}
+		else
+		{
+			outIdx = gr.getDescriptorSetFactory().bindBindlessTexture(m_handle, layout);
+		}
+
+		m_bindlessIndices[arrayIdx] = outIdx;
+	}
+
+	return outIdx;
+}
+
 TextureImpl::~TextureImpl()
 {
 #if ANKI_ENABLE_ASSERTIONS
@@ -23,7 +55,7 @@ TextureImpl::~TextureImpl()
 	}
 #endif
 
-	for(auto it : m_viewsMap)
+	for(MicroImageView& it : m_viewsMap)
 	{
 		destroyMicroImageView(it);
 	}
@@ -299,9 +331,9 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 	ci.samples = VK_SAMPLE_COUNT_1_BIT;
 	ci.tiling = VK_IMAGE_TILING_OPTIMAL;
 	ci.usage = convertTextureUsage(init.m_usage, init.m_format);
-	ci.sharingMode = VK_SHARING_MODE_CONCURRENT;
 	ci.queueFamilyIndexCount = getGrManagerImpl().getQueueFamilies().getSize();
 	ci.pQueueFamilyIndices = &getGrManagerImpl().getQueueFamilies()[0];
+	ci.sharingMode = (ci.queueFamilyIndexCount > 1) ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
 	ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
 	ANKI_VK_CHECK(vkCreateImage(getDevice(), &ci, nullptr, &m_imageHandle));

+ 40 - 17
AnKi/Gr/Vulkan/TextureImpl.h

@@ -33,20 +33,17 @@ ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(TextureImplWorkaround)
 /// A Vulkan image view with some extra data.
 class MicroImageView
 {
-public:
-	VkImageView m_handle = VK_NULL_HANDLE;
-
-	/// Index 0: Sampled image with SHADER_READ_ONLY layout.
-	/// Index 1: Storage image with ofcource GENERAL layout.
-	mutable Array<U32, 2> m_bindlessIndices = {MAX_U32, MAX_U32};
+	friend class TextureImpl;
 
-	/// Protect the m_bindlessIndices.
-	mutable SpinLock m_lock;
+public:
+	MicroImageView() = default;
 
-	/// Because for example a single surface view of a cube texture will be a 2D view.
-	TextureType m_derivedTextureType = TextureType::COUNT;
+	MicroImageView(MicroImageView&& b)
+	{
+		*this = std::move(b);
+	}
 
-	MicroImageView()
+	~MicroImageView()
 	{
 		for(U32 idx : m_bindlessIndices)
 		{
@@ -56,18 +53,44 @@ public:
 		ANKI_ASSERT(m_handle == VK_NULL_HANDLE);
 	}
 
-	MicroImageView(const MicroImageView& b)
-	{
-		*this = std::move(b);
-	}
-
-	MicroImageView& operator=(const MicroImageView& b)
+	MicroImageView& operator=(MicroImageView&& b)
 	{
 		m_handle = b.m_handle;
+		b.m_handle = VK_NULL_HANDLE;
 		m_bindlessIndices = b.m_bindlessIndices;
+		b.m_bindlessIndices = {MAX_U32, MAX_U32};
 		m_derivedTextureType = b.m_derivedTextureType;
+		b.m_derivedTextureType = TextureType::COUNT;
 		return *this;
 	}
+
+	VkImageView getHandle() const
+	{
+		ANKI_ASSERT(m_handle);
+		return m_handle;
+	}
+
+	/// @note It's thread-safe.
+	U32 getOrCreateBindlessIndex(VkImageLayout layout, GrManagerImpl& gr) const;
+
+	TextureType getDerivedTextureType() const
+	{
+		ANKI_ASSERT(m_derivedTextureType != TextureType::COUNT);
+		return m_derivedTextureType;
+	}
+
+private:
+	VkImageView m_handle = VK_NULL_HANDLE;
+
+	/// Index 0: Sampled image with SHADER_READ_ONLY layout.
+	/// Index 1: Storage image with ofcource GENERAL layout.
+	mutable Array<U32, 2> m_bindlessIndices = {MAX_U32, MAX_U32};
+
+	/// Protect the m_bindlessIndices.
+	mutable SpinLock m_lock;
+
+	/// Because for example a single surface view of a cube texture will be a 2D view.
+	TextureType m_derivedTextureType = TextureType::COUNT;
 };
 
 /// Texture container.

+ 2 - 2
AnKi/Gr/Vulkan/TextureView.cpp

@@ -27,14 +27,14 @@ U32 TextureView::getOrCreateBindlessTextureIndex()
 	ANKI_VK_SELF(TextureViewImpl);
 	ANKI_ASSERT(self.getTextureImpl().computeLayout(TextureUsageBit::ALL_SAMPLED, 0)
 				== VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-	return self.getOrCreateBindlessIndex(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, DescriptorType::TEXTURE);
+	return self.getOrCreateBindlessIndex(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
 }
 
 U32 TextureView::getOrCreateBindlessImageIndex()
 {
 	ANKI_VK_SELF(TextureViewImpl);
 	ANKI_ASSERT(self.getTextureImpl().computeLayout(TextureUsageBit::ALL_IMAGE, 0) == VK_IMAGE_LAYOUT_GENERAL);
-	return self.getOrCreateBindlessIndex(VK_IMAGE_LAYOUT_GENERAL, DescriptorType::IMAGE);
+	return self.getOrCreateBindlessIndex(VK_IMAGE_LAYOUT_GENERAL);
 }
 
 } // end namespace anki

+ 8 - 38
AnKi/Gr/Vulkan/TextureViewImpl.cpp

@@ -27,8 +27,8 @@ Error TextureViewImpl::init(const TextureViewInitInfo& inf)
 
 	// Ask the texture for a view
 	m_microImageView = &tex.getOrCreateView(inf);
-	m_handle = m_microImageView->m_handle;
-	m_texType = m_microImageView->m_derivedTextureType;
+	m_handle = m_microImageView->getHandle();
+	m_texType = m_microImageView->getDerivedTextureType();
 
 	// Create the hash
 	Array<U64, 2> toHash = {tex.getUuid(), ptrToNumber(m_handle)};
@@ -37,47 +37,17 @@ Error TextureViewImpl::init(const TextureViewInitInfo& inf)
 	return Error::NONE;
 }
 
-U32 TextureViewImpl::getOrCreateBindlessIndex(VkImageLayout layout, DescriptorType resourceType)
+U32 TextureViewImpl::getOrCreateBindlessIndex(VkImageLayout layout)
 {
-	ANKI_ASSERT(layout == VK_IMAGE_LAYOUT_GENERAL || layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-	ANKI_ASSERT(resourceType == DescriptorType::TEXTURE || resourceType == DescriptorType::IMAGE);
-	if(resourceType == DescriptorType::IMAGE)
-	{
-		ANKI_ASSERT(layout == VK_IMAGE_LAYOUT_GENERAL);
-	}
-	else
-	{
-		ANKI_ASSERT(layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
-	}
-
-	ANKI_ASSERT(m_microImageView);
-
-	const U32 arrayIdx = (resourceType == DescriptorType::IMAGE) ? 1 : 0;
+	const U32 arrayIdx = (layout == VK_IMAGE_LAYOUT_GENERAL) ? 1 : 0;
+	U32& bindlessIdx = m_bindlessIndices[arrayIdx];
 
-	LockGuard<SpinLock> lock(m_microImageView->m_lock);
-
-	U32 outIdx;
-	if(m_microImageView->m_bindlessIndices[arrayIdx] != MAX_U32)
-	{
-		outIdx = m_microImageView->m_bindlessIndices[arrayIdx];
-	}
-	else
+	if(bindlessIdx == MAX_U32)
 	{
-		// Needs binding to the bindless descriptor set
-
-		if(resourceType == DescriptorType::TEXTURE)
-		{
-			outIdx = getGrManagerImpl().getDescriptorSetFactory().bindBindlessTexture(m_handle, layout);
-		}
-		else
-		{
-			outIdx = getGrManagerImpl().getDescriptorSetFactory().bindBindlessImage(m_handle);
-		}
-
-		m_microImageView->m_bindlessIndices[arrayIdx] = outIdx;
+		bindlessIdx = m_microImageView->getOrCreateBindlessIndex(layout, getGrManagerImpl());
 	}
 
-	return outIdx;
+	return bindlessIdx;
 }
 
 } // end namespace anki

+ 4 - 4
AnKi/Gr/Vulkan/TextureViewImpl.h

@@ -52,12 +52,12 @@ public:
 		return static_cast<const TextureImpl&>(*m_tex);
 	}
 
-	/// @param resourceType Texture or image.
-	/// @note It's thread-safe.
-	U32 getOrCreateBindlessIndex(VkImageLayout layout, DescriptorType resourceType);
+	U32 getOrCreateBindlessIndex(VkImageLayout layout);
 
 private:
-	VkImageView m_handle = {}; /// Cache the handle.
+	VkImageView m_handle = {}; ///< Cache the handle.
+
+	Array<U32, 2> m_bindlessIndices = {MAX_U32, MAX_U32}; ///< Cache it.
 
 	/// This is a hash that depends on the Texture and the VkImageView. It's used as a replacement of
 	/// TextureView::m_uuid since it creates less unique IDs.

+ 1 - 1
AnKi/Importer/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 1 - 0
AnKi/Importer/Common.h

@@ -12,6 +12,7 @@ namespace anki
 /// @{
 
 #define ANKI_IMPORTER_LOGI(...) ANKI_LOG("IMPR", NORMAL, __VA_ARGS__)
+#define ANKI_IMPORTER_LOGV(...) ANKI_LOG("IMPR", VERBOSE, __VA_ARGS__)
 #define ANKI_IMPORTER_LOGE(...) ANKI_LOG("IMPR", ERROR, __VA_ARGS__)
 #define ANKI_IMPORTER_LOGW(...) ANKI_LOG("IMPR", WARNING, __VA_ARGS__)
 #define ANKI_IMPORTER_LOGF(...) ANKI_LOG("IMPR", FATAL, __VA_ARGS__)

+ 230 - 32
AnKi/Importer/ImageImporter.cpp

@@ -20,10 +20,12 @@ class SurfaceOrVolumeData
 public:
 	DynamicArrayAuto<U8, PtrSize> m_pixels;
 	DynamicArrayAuto<U8, PtrSize> m_s3tcPixels;
+	DynamicArrayAuto<U8, PtrSize> m_astcPixels;
 
 	SurfaceOrVolumeData(GenericMemoryPoolAllocator<U8> alloc)
 		: m_pixels(alloc)
 		, m_s3tcPixels(alloc)
+		, m_astcPixels(alloc)
 	{
 	}
 };
@@ -96,6 +98,43 @@ public:
 	U32 m_dwReserved2;
 };
 
+class AstcHeader
+{
+public:
+	Array<U8, 4> m_magic;
+	U8 m_blockX;
+	U8 m_blockY;
+	U8 m_blockZ;
+	Array<U8, 3> m_dimX;
+	Array<U8, 3> m_dimY;
+	Array<U8, 3> m_dimZ;
+};
+
+/// Simple class to delete a file when it goes out of scope.
+class CleanupFile
+{
+public:
+	StringAuto m_fileToDelete;
+
+	CleanupFile(GenericMemoryPoolAllocator<U8> alloc, CString filename)
+		: m_fileToDelete(alloc, filename)
+	{
+	}
+
+	~CleanupFile()
+	{
+		if(!m_fileToDelete.isEmpty())
+		{
+			const int err = std::remove(m_fileToDelete.cstr());
+			if(err)
+			{
+				ANKI_IMPORTER_LOGE("Couldn't delete file: %s", m_fileToDelete.cstr());
+			}
+			ANKI_IMPORTER_LOGV("Deleted %s", m_fileToDelete.cstr());
+		}
+	}
+};
+
 } // namespace
 
 static ANKI_USE_RESULT Error checkConfig(const ImageImporterConfig& config)
@@ -134,6 +173,13 @@ static ANKI_USE_RESULT Error checkConfig(const ImageImporterConfig& config)
 	ANKI_CFG_ASSERT(config.m_compressions == ImageBinaryDataCompression::RAW || config.m_type != ImageBinaryType::_3D,
 					"Can't compress 3D textures");
 
+	// ASTC
+	if(!!(config.m_compressions & ImageBinaryDataCompression::ASTC))
+	{
+		ANKI_CFG_ASSERT(config.m_astcBlockSize == UVec2(4u) || config.m_astcBlockSize == UVec2(8u),
+						"Incorrect ASTC block sizes");
+	}
+
 	// Mip size
 	ANKI_CFG_ASSERT(config.m_minMipmapDimension >= 4, "Mimpap min dimension can be less than 4");
 
@@ -211,6 +257,7 @@ static ANKI_USE_RESULT Error loadFirstMipmap(const ImageImporterConfig& config,
 	for(U32 i = 0; i < config.m_inputFilenames.getSize(); ++i)
 	{
 		I32 width, height, c;
+		stbi_set_flip_vertically_on_load_thread(true);
 		void* data = stbi_load(config.m_inputFilenames[i].cstr(), &width, &height, &c, ctx.m_channelCount);
 		ANKI_ASSERT(U32(c) == ctx.m_channelCount);
 		if(!data)
@@ -297,38 +344,16 @@ static ANKI_USE_RESULT Error compressS3tc(GenericMemoryPoolAllocator<U8> alloc,
 	ANKI_ASSERT(inWidth > 0 && isPowerOfTwo(inWidth) && inHeight > 0 && isPowerOfTwo(inHeight));
 	ANKI_ASSERT(outPixels.getSizeInBytes() == PtrSize((channelCount == 3) ? 8 : 16) * (inWidth / 4) * (inHeight / 4));
 
-	class CleanupFile
+	// Create a PNG image to feed to the compressor
+	StringAuto pngFilename(alloc);
+	pngFilename.sprintf("%s/AnKiImageImporter_%u.png", tempDirectory.cstr(), U32(std::rand()));
+	ANKI_IMPORTER_LOGV("Will store: %s", pngFilename.cstr());
+	if(!stbi_write_png(pngFilename.cstr(), inWidth, inHeight, channelCount, inPixels.getBegin(), 0))
 	{
-	public:
-		StringAuto m_fileToDelete;
-
-		CleanupFile(GenericMemoryPoolAllocator<U8> alloc, CString filename)
-			: m_fileToDelete(alloc, filename)
-		{
-		}
-
-		~CleanupFile()
-		{
-			if(!m_fileToDelete.isEmpty())
-			{
-				const int err = std::remove(m_fileToDelete.cstr());
-				if(err)
-				{
-					ANKI_IMPORTER_LOGE("Couldn't delete file: %s", m_fileToDelete.cstr());
-				}
-			}
-		}
-	};
-
-	// Create a BMP image to feed to the compressor
-	StringAuto bmpFilename(alloc);
-	bmpFilename.sprintf("%s/AnKiImageImporter_%u.bmp", tempDirectory.cstr(), U32(std::rand()));
-	if(!stbi_write_bmp(bmpFilename.cstr(), inWidth, inHeight, channelCount, inPixels.getBegin()))
-	{
-		ANKI_IMPORTER_LOGE("STB failed to create: %s", bmpFilename.cstr());
+		ANKI_IMPORTER_LOGE("STB failed to create: %s", pngFilename.cstr());
 		return Error::FUNCTION_FAILED;
 	}
-	CleanupFile bmpCleanup(alloc, bmpFilename);
+	CleanupFile pngCleanup(alloc, pngFilename);
 
 	// Invoke the compressor process
 	StringAuto ddsFilename(alloc);
@@ -339,9 +364,11 @@ static ANKI_USE_RESULT Error compressS3tc(GenericMemoryPoolAllocator<U8> alloc,
 	args[argCount++] = "-nomipmap";
 	args[argCount++] = "-fd";
 	args[argCount++] = (channelCount == 3) ? "BC1" : "BC3";
-	args[argCount++] = bmpFilename;
+	args[argCount++] = pngFilename;
 	args[argCount++] = ddsFilename;
 
+	ANKI_IMPORTER_LOGV("Will invoke process: CompressonatorCLI %s %s %s %s %s", args[0].cstr(), args[1].cstr(),
+					   args[2].cstr(), args[3].cstr(), args[4].cstr());
 	ANKI_CHECK(proc.start("CompressonatorCLI", args,
 						  (compressonatorPath.isEmpty()) ? ConstWeakArray<CString>()
 														 : Array<CString, 2>{{"PATH", compressonatorPath}}));
@@ -396,8 +423,114 @@ static ANKI_USE_RESULT Error compressS3tc(GenericMemoryPoolAllocator<U8> alloc,
 	return Error::NONE;
 }
 
+static ANKI_USE_RESULT Error compressAstc(GenericMemoryPoolAllocator<U8> alloc, CString tempDirectory,
+										  CString astcencPath, ConstWeakArray<U8, PtrSize> inPixels, U32 inWidth,
+										  U32 inHeight, U32 inChannelCount, UVec2 blockSize,
+										  WeakArray<U8, PtrSize> outPixels)
+{
+	const PtrSize blockBytes = 16;
+	(void)blockBytes;
+	ANKI_ASSERT(inPixels.getSizeInBytes() == PtrSize(inWidth) * inHeight * inChannelCount);
+	ANKI_ASSERT(inWidth > 0 && isPowerOfTwo(inWidth) && inHeight > 0 && isPowerOfTwo(inHeight));
+	ANKI_ASSERT(outPixels.getSizeInBytes() == blockBytes * (inWidth / blockSize.x()) * (inHeight / blockSize.y()));
+
+	// Create a BMP image to feed to the astcebc
+	StringAuto pngFilename(alloc);
+	pngFilename.sprintf("%s/AnKiImageImporter_%u.png", tempDirectory.cstr(), U32(std::rand()));
+	ANKI_IMPORTER_LOGV("Will store: %s", pngFilename.cstr());
+	if(!stbi_write_png(pngFilename.cstr(), inWidth, inHeight, inChannelCount, inPixels.getBegin(), 0))
+	{
+		ANKI_IMPORTER_LOGE("STB failed to create: %s", pngFilename.cstr());
+		return Error::FUNCTION_FAILED;
+	}
+	CleanupFile pngCleanup(alloc, pngFilename);
+
+	// Invoke the compressor process
+	StringAuto astcFilename(alloc);
+	astcFilename.sprintf("%s/AnKiImageImporter_%u.astc", tempDirectory.cstr(), U32(std::rand()));
+	StringAuto blockStr(alloc);
+	blockStr.sprintf("%ux%u", blockSize.x(), blockSize.y());
+	Process proc;
+	Array<CString, 5> args;
+	U32 argCount = 0;
+	args[argCount++] = "-cl";
+	args[argCount++] = pngFilename;
+	args[argCount++] = astcFilename;
+	args[argCount++] = blockStr;
+	args[argCount++] = "-fast";
+
+	ANKI_IMPORTER_LOGV("Will invoke process: astcenc-avx2 %s %s %s %s %s", args[0].cstr(), args[1].cstr(),
+					   args[2].cstr(), args[3].cstr(), args[4].cstr());
+	ANKI_CHECK(
+		proc.start("astcenc-avx2", args,
+				   (astcencPath.isEmpty()) ? ConstWeakArray<CString>() : Array<CString, 2>{{"PATH", astcencPath}}));
+
+	CleanupFile astcCleanup(alloc, astcFilename);
+	ProcessStatus status;
+	I32 exitCode;
+	ANKI_CHECK(proc.wait(60.0, &status, &exitCode));
+
+	if(status != ProcessStatus::NORMAL_EXIT || exitCode != 0)
+	{
+		StringAuto errStr(alloc);
+		if(exitCode != 0)
+		{
+			ANKI_CHECK(proc.readFromStdout(errStr));
+		}
+
+		if(errStr.isEmpty())
+		{
+			errStr = "Unknown error";
+		}
+
+		ANKI_IMPORTER_LOGE("Invoking astcenc-avx2 process failed: %s", errStr.cstr());
+		return Error::FUNCTION_FAILED;
+	}
+
+	// Read the astc file
+	File astcFile;
+	ANKI_CHECK(astcFile.open(astcFilename, FileOpenFlag::READ | FileOpenFlag::BINARY));
+	AstcHeader header;
+	ANKI_CHECK(astcFile.read(&header, sizeof(header)));
+
+	auto unpackBytes = [](U8 a, U8 b, U8 c, U8 d) -> U32 {
+		return (U32(a)) + (U32(b) << 8) + (U32(c) << 16) + (U32(d) << 24);
+	};
+
+	const U32 magicval = unpackBytes(header.m_magic[0], header.m_magic[1], header.m_magic[2], header.m_magic[3]);
+	if(magicval != 0x5CA1AB13)
+	{
+		ANKI_IMPORTER_LOGE("astcenc-avx2 produced a file with wrong magic");
+		return Error::FUNCTION_FAILED;
+	}
+
+	const U32 blockx = max<U32>(header.m_blockX, 1u);
+	const U32 blocky = max<U32>(header.m_blockY, 1u);
+	const U32 blockz = max<U32>(header.m_blockZ, 1u);
+	if(blockx != blockSize.x() || blocky != blockSize.y() || blockz != 1)
+	{
+		ANKI_IMPORTER_LOGE("astcenc-avx2 with wrong block size");
+		return Error::FUNCTION_FAILED;
+	}
+
+	const U32 dimx = unpackBytes(header.m_dimX[0], header.m_dimX[1], header.m_dimX[2], 0);
+	const U32 dimy = unpackBytes(header.m_dimY[0], header.m_dimY[1], header.m_dimY[2], 0);
+	const U32 dimz = unpackBytes(header.m_dimZ[0], header.m_dimZ[1], header.m_dimZ[2], 0);
+	if(dimx != inWidth || dimy != inHeight || dimz != 1)
+	{
+		ANKI_IMPORTER_LOGE("astcenc-avx2 with wrong image size");
+		return Error::FUNCTION_FAILED;
+	}
+
+	ANKI_CHECK(astcFile.read(outPixels.getBegin(), outPixels.getSizeInBytes()));
+
+	return Error::NONE;
+}
+
 static ANKI_USE_RESULT Error storeAnkiImage(const ImageImporterConfig& config, const ImageImporterContext& ctx)
 {
+	ANKI_IMPORTER_LOGV("Storing to %s", config.m_outFilename.cstr());
+
 	File outFile;
 	ANKI_CHECK(outFile.open(config.m_outFilename, FileOpenFlag::BINARY | FileOpenFlag::WRITE));
 
@@ -412,11 +545,15 @@ static ANKI_USE_RESULT Error storeAnkiImage(const ImageImporterConfig& config, c
 	header.m_compressionMask = config.m_compressions;
 	header.m_isNormal = false;
 	header.m_mipmapCount = ctx.m_mipmaps.getSize();
+	header.m_astcBlockSizeX = config.m_astcBlockSize.x();
+	header.m_astcBlockSizeY = config.m_astcBlockSize.y();
 	ANKI_CHECK(outFile.write(&header, sizeof(header)));
 
 	// Write RAW
 	if(!!(config.m_compressions & ImageBinaryDataCompression::RAW))
 	{
+		ANKI_IMPORTER_LOGV("Storing RAW");
+
 		// for(I32 mip = I32(ctx.m_mipmaps.getSize()) - 1; mip >= 0; --mip)
 		for(U32 mip = 0; mip < ctx.m_mipmaps.getSize(); ++mip)
 		{
@@ -435,6 +572,8 @@ static ANKI_USE_RESULT Error storeAnkiImage(const ImageImporterConfig& config, c
 	// Write S3TC
 	if(!!(config.m_compressions & ImageBinaryDataCompression::S3TC))
 	{
+		ANKI_IMPORTER_LOGV("Storing S3TC");
+
 		// for(I32 mip = I32(ctx.m_mipmaps.getSize()) - 1; mip >= 0; --mip)
 		for(U32 mip = 0; mip < ctx.m_mipmaps.getSize(); ++mip)
 		{
@@ -450,6 +589,26 @@ static ANKI_USE_RESULT Error storeAnkiImage(const ImageImporterConfig& config, c
 		}
 	}
 
+	// Write ASTC
+	if(!!(config.m_compressions & ImageBinaryDataCompression::ASTC))
+	{
+		ANKI_IMPORTER_LOGV("Storing ASTC");
+
+		// for(I32 mip = I32(ctx.m_mipmaps.getSize()) - 1; mip >= 0; --mip)
+		for(U32 mip = 0; mip < ctx.m_mipmaps.getSize(); ++mip)
+		{
+			for(U32 l = 0; l < ctx.m_layerCount; ++l)
+			{
+				for(U32 f = 0; f < ctx.m_faceCount; ++f)
+				{
+					const U32 idx = l * ctx.m_faceCount + f;
+					const ConstWeakArray<U8, PtrSize> pixels = ctx.m_mipmaps[mip].m_surfacesOrVolume[idx].m_astcPixels;
+					ANKI_CHECK(outFile.write(&pixels[0], pixels.getSizeInBytes()));
+				}
+			}
+		}
+	}
+
 	return Error::NONE;
 }
 
@@ -493,10 +652,17 @@ static ANKI_USE_RESULT Error importImageInternal(const ImageImporterConfig& conf
 	ANKI_CHECK(loadFirstMipmap(config, ctx));
 
 	// Generate mipmaps
+	U32 minMipDimension = max(config.m_minMipmapDimension, 4u);
+	if(!!(config.m_compressions & ImageBinaryDataCompression::ASTC))
+	{
+		minMipDimension = max(minMipDimension, config.m_astcBlockSize.x());
+		minMipDimension = max(minMipDimension, config.m_astcBlockSize.y());
+	}
+
 	const U32 mipCount =
 		min(config.m_mipmapCount, (config.m_type == ImageBinaryType::_3D)
-									  ? computeMaxMipmapCount3d(width, height, ctx.m_depth, config.m_minMipmapDimension)
-									  : computeMaxMipmapCount2d(width, height, config.m_minMipmapDimension));
+									  ? computeMaxMipmapCount3d(width, height, ctx.m_depth, minMipDimension)
+									  : computeMaxMipmapCount2d(width, height, minMipDimension));
 	for(U32 mip = 1; mip < mipCount; ++mip)
 	{
 		ctx.m_mipmaps.emplaceBack(alloc);
@@ -538,6 +704,8 @@ static ANKI_USE_RESULT Error importImageInternal(const ImageImporterConfig& conf
 	// Compress
 	if(!!(config.m_compressions & ImageBinaryDataCompression::S3TC))
 	{
+		ANKI_IMPORTER_LOGV("Will compress in S3TC");
+
 		for(U32 mip = 0; mip < mipCount; ++mip)
 		{
 			for(U32 l = 0; l < ctx.m_layerCount; ++l)
@@ -562,6 +730,36 @@ static ANKI_USE_RESULT Error importImageInternal(const ImageImporterConfig& conf
 		}
 	}
 
+	if(!!(config.m_compressions & ImageBinaryDataCompression::ASTC))
+	{
+		ANKI_IMPORTER_LOGV("Will compress in ASTC");
+
+		for(U32 mip = 0; mip < mipCount; ++mip)
+		{
+			for(U32 l = 0; l < ctx.m_layerCount; ++l)
+			{
+				for(U32 f = 0; f < ctx.m_faceCount; ++f)
+				{
+					const U32 idx = l * ctx.m_faceCount + f;
+					SurfaceOrVolumeData& surface = ctx.m_mipmaps[mip].m_surfacesOrVolume[idx];
+
+					const U32 width = ctx.m_width >> mip;
+					const U32 height = ctx.m_height >> mip;
+					const PtrSize blockSize = 16;
+					const PtrSize astcImageSize =
+						blockSize * (width / config.m_astcBlockSize.x()) * (height / config.m_astcBlockSize.y());
+
+					surface.m_astcPixels.create(astcImageSize);
+
+					ANKI_CHECK(compressAstc(alloc, config.m_tempDirectory, config.m_astcencPath,
+											ConstWeakArray<U8, PtrSize>(surface.m_pixels), width, height,
+											ctx.m_channelCount, config.m_astcBlockSize,
+											WeakArray<U8, PtrSize>(surface.m_astcPixels)));
+				}
+			}
+		}
+	}
+
 	if(!!(config.m_compressions & ImageBinaryDataCompression::ETC))
 	{
 		ANKI_ASSERT(!"TODO");

+ 2 - 0
AnKi/Importer/ImageImporter.h

@@ -29,6 +29,8 @@ public:
 	Bool m_noAlpha = true;
 	CString m_tempDirectory;
 	CString m_compressonatorPath; ///< Optional.
+	CString m_astcencPath; ///< Optional.
+	UVec2 m_astcBlockSize = UVec2(8u);
 };
 
 /// Converts images to AnKi's specific format.

+ 3 - 1
AnKi/Input/CMakeLists.txt

@@ -2,10 +2,12 @@ set(SOURCES Input.cpp)
 
 if(SDL)
 	set(SOURCES ${SOURCES} InputSdl.cpp)
+elseif(ANDROID)
+set(SOURCES ${SOURCES} InputAndroid.cpp)
 else()
 	set(SOURCES ${SOURCES} InputDummy.cpp)
 endif()
 
 foreach(F ${SOURCES})
-	addAnkiSourceFiles("${CMAKE_CURRENT_SOURCE_DIR}/${F}")
+	anki_add_source_files("${CMAKE_CURRENT_SOURCE_DIR}/${F}")
 endforeach()

+ 26 - 19
AnKi/Input/InputAndroid.cpp

@@ -7,56 +7,63 @@
 #include <AnKi/Core/NativeWindowAndroid.h>
 #include <AnKi/Util/Logger.h>
 #include <AnKi/Core/App.h>
+#if ANKI_OS_ANDROID
+#	include <android_native_app_glue.h>
+#endif
 
 namespace anki
 {
 
 static void handleAndroidEvents(android_app* app, int32_t cmd)
 {
-	Input* input = (Input*)app->userData;
+	Input* input = static_cast<Input*>(app->userData);
 	ANKI_ASSERT(input != nullptr);
 
 	switch(cmd)
 	{
 	case APP_CMD_TERM_WINDOW:
 	case APP_CMD_LOST_FOCUS:
-		ANKI_LOGI("New event 0x%x", cmd);
-		input->addEvent(Input::WINDOW_CLOSED_EVENT);
+		input->addEvent(InputEvent::WINDOW_CLOSED);
 		break;
 	}
 }
 
-Input::~Input()
-{
-}
-
-void Input::handleEvents()
+Error Input::handleEvents()
 {
 	int ident;
-	int outEvents;
+	int events;
 	android_poll_source* source;
 
-	zeroMemory(events);
-
-	while((ident = ALooper_pollAll(0, NULL, &outEvents, (void**)&source)) >= 0)
+	while((ident = ALooper_pollAll(0, nullptr, &events, reinterpret_cast<void**>(&source))) >= 0)
 	{
-		if(source != NULL)
+		if(source != nullptr)
 		{
-			source->process(gAndroidApp, source);
+			source->process(g_androidApp, source);
 		}
 	}
+
+	return Error::NONE;
 }
 
-void Input::init(NativeWindow* /*nativeWindow*/)
+Error Input::initInternal(NativeWindow* window)
+{
+	ANKI_ASSERT(window);
+	g_androidApp->userData = this;
+	g_androidApp->onAppCmd = handleAndroidEvents;
+	m_nativeWindow = window;
+
+	return Error::NONE;
+}
+
+void Input::destroy()
 {
-	ANKI_ASSERT(gAndroidApp);
-	gAndroidApp->userData = this;
-	gAndroidApp->onAppCmd = handleAndroidEvents;
 }
 
 void Input::moveCursor(const Vec2& posNdc)
 {
-	// do nothing
+	m_mousePosNdc = posNdc;
+	m_mousePosWin =
+		UVec2((posNdc * 0.5f + 0.5f) * Vec2(F32(m_nativeWindow->getWidth()), F32(m_nativeWindow->getHeight())));
 }
 
 void Input::hideCursor(Bool hide)

+ 9 - 7
AnKi/Input/InputDummy.cpp

@@ -8,24 +8,26 @@
 namespace anki
 {
 
-void Input::handleEvents()
+Error Input::initInternal(NativeWindow* nativeWindow)
 {
-	// You are dummy... do nothing
+	return Error::NONE;
 }
 
-void Input::init(NativeWindow* nativeWindow)
+void Input::destroy()
 {
-	// You are dummy... do nothing
 }
 
-void Input::moveCursor(const Vec2& posNdc)
+Error Input::handleEvents()
+{
+	return Error::NONE;
+}
+
+void Input::moveCursor(const Vec2& pos)
 {
-	// You are dummy... do nothing
 }
 
 void Input::hideCursor(Bool hide)
 {
-	// You are dummy... do nothing
 }
 
 } // end namespace anki

+ 1 - 1
AnKi/Math/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 133 - 38
AnKi/Math/Mat.h

@@ -24,16 +24,25 @@ class alignas(MathSimd<T, I>::ALIGNMENT) TMat
 public:
 	using Scalar = T;
 	using Simd = typename MathSimd<T, I>::Type;
+
+#if ANKI_COMPILER_GCC_COMPATIBLE
+#	pragma GCC diagnostic push
+#	pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
 	using SimdArray = Array<Simd, J>;
+#if ANKI_COMPILER_GCC_COMPATIBLE
+#	pragma GCC diagnostic pop
+#endif
+
 	using RowVec = TVec<T, I>;
 	using ColumnVec = TVec<T, J>;
 
 	static constexpr U ROW_SIZE = J; ///< Number of rows
 	static constexpr U COLUMN_SIZE = I; ///< Number of columns
 	static constexpr U SIZE = J * I; ///< Number of total elements
-	static constexpr Bool HAS_SIMD = I == 4 && std::is_same<T, F32>::value && ANKI_SIMD_SSE;
-	static constexpr Bool HAS_MAT4_SIMD = J == 4 && I == 4 && std::is_same<T, F32>::value && ANKI_SIMD_SSE;
-	static constexpr Bool HAS_MAT3X4_SIMD = J == 3 && I == 4 && std::is_same<T, F32>::value && ANKI_SIMD_SSE;
+	static constexpr Bool HAS_SIMD = I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
+	static constexpr Bool HAS_MAT4_SIMD = J == 4 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
+	static constexpr Bool HAS_MAT3X4_SIMD = J == 3 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
 
 	/// @name Constructors
 	/// @{
@@ -68,14 +77,20 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	explicit TMat(const T f)
 	{
 		for(U i = 0; i < J; i++)
 		{
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_set1_ps(f);
+#	else
+			m_simd[i] = {f, f, f, f};
+#	endif
 		}
 	}
+#endif
 
 	explicit TMat(const T arr[])
 	{
@@ -303,16 +318,22 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat operator+(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < J; i++)
 		{
+#	if ANKI_SIMD_SSE
 			c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
+#	else
+			c.m_simd[i] = m_simd[i] + b.m_simd[i];
+#	endif
 		}
 		return c;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat& operator+=(const TMat& b)
@@ -324,15 +345,21 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat& operator+=(const TMat& b)
 	{
 		for(U i = 0; i < J; i++)
 		{
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
+#	else
+			m_simd[i] += b.m_simd[i];
+#	endif
 		}
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat operator-(const TMat& b) const
@@ -345,16 +372,22 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat operator-(const TMat& b) const
 	{
 		TMat c;
 		for(U i = 0; i < J; i++)
 		{
+#	if ANKI_SIMD_SSE
 			c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
+#	else
+			c.m_simd[i] = m_simd[i] - b.m_simd[i];
+#	endif
 		}
 		return c;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	TMat& operator-=(const TMat& b)
@@ -366,15 +399,21 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	TMat& operator-=(const TMat& b)
 	{
 		for(U i = 0; i < J; i++)
 		{
+#	if ANKI_SIMD_SSE
 			m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
+#	else
+			m_simd[i] -= b.m_simd[i];
+#	endif
 		}
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
 	TMat operator*(const TMat& b) const
@@ -395,14 +434,15 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_MAT4_SIMD)
 	TMat operator*(const TMat& b) const
 	{
 		TMat out;
 		const auto& m = *this;
-
 		for(U i = 0; i < 4; i++)
 		{
+#	if ANKI_SIMD_SSE
 			__m128 t1, t2;
 
 			t1 = _mm_set1_ps(m(i, 0));
@@ -415,10 +455,25 @@ public:
 			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
 
 			out.m_simd[i] = t2;
+#	else
+			float32x4_t t1, t2;
+
+			t1 = vmovq_n_f32(m(i, 0));
+			t2 = b.m_simd[0] * t1;
+			t1 = vmovq_n_f32(m(i, 1));
+			t2 = b.m_simd[1] * t1 + t2;
+			t1 = vmovq_n_f32(m(i, 2));
+			t2 = b.m_simd[2] * t1 + t2;
+			t1 = vmovq_n_f32(m(i, 3));
+			t2 = b.m_simd[3] * t1 + t2;
+
+			out.m_simd[i] = t2;
+#	endif
 		}
 
 		return out;
 	}
+#endif
 
 	TMat& operator*=(const TMat& b)
 	{
@@ -541,7 +596,7 @@ public:
 		ColumnVec out;
 		for(U j = 0; j < J; j++)
 		{
-			T sum = 0.0;
+			T sum = T(0);
 			for(U i = 0; i < I; i++)
 			{
 				sum += m(j, i) * v[i];
@@ -551,16 +606,25 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_SIMD)
 	ColumnVec operator*(const RowVec& v) const
 	{
 		ColumnVec out;
+#	if ANKI_SIMD_SSE
 		for(U i = 0; i < J; i++)
 		{
 			_mm_store_ss(&out[i], _mm_dp_ps(m_simd[i], v.getSimd(), 0xF1));
 		}
+#	else
+		for(U i = 0; i < J; i++)
+		{
+			out[i] = RowVec(m_simd[i]).dot(v);
+		}
+#	endif
 		return out;
 	}
+#endif
 	/// @}
 
 	/// @name Other
@@ -706,13 +770,13 @@ public:
 		T sintheta, costheta;
 		sinCos(rad, sintheta, costheta);
 
-		m(0, 0) = 1.0;
-		m(0, 1) = 0.0;
-		m(0, 2) = 0.0;
-		m(1, 0) = 0.0;
+		m(0, 0) = T(1);
+		m(0, 1) = T(0);
+		m(0, 2) = T(0);
+		m(1, 0) = T(0);
 		m(1, 1) = costheta;
 		m(1, 2) = -sintheta;
-		m(2, 0) = 0.0;
+		m(2, 0) = T(0);
 		m(2, 1) = sintheta;
 		m(2, 2) = costheta;
 	}
@@ -724,13 +788,13 @@ public:
 		sinCos(rad, sintheta, costheta);
 
 		m(0, 0) = costheta;
-		m(0, 1) = 0.0;
+		m(0, 1) = T(0);
 		m(0, 2) = sintheta;
-		m(1, 0) = 0.0;
-		m(1, 1) = 1.0;
-		m(1, 2) = 0.0;
+		m(1, 0) = T(0);
+		m(1, 1) = T(1);
+		m(1, 2) = T(0);
 		m(2, 0) = -sintheta;
-		m(2, 1) = 0.0;
+		m(2, 1) = T(0);
 		m(2, 2) = costheta;
 	}
 
@@ -742,13 +806,13 @@ public:
 
 		m(0, 0) = costheta;
 		m(0, 1) = -sintheta;
-		m(0, 2) = 0.0;
+		m(0, 2) = T(0);
 		m(1, 0) = sintheta;
 		m(1, 1) = costheta;
-		m(1, 2) = 0.0;
-		m(2, 0) = 0.0;
-		m(2, 1) = 0.0;
-		m(2, 2) = 1.0;
+		m(1, 2) = T(0);
+		m(2, 0) = T(0);
+		m(2, 1) = T(0);
+		m(2, 2) = T(1);
 	}
 
 	/// It rotates "this" in the axis defined by the rotation AND not the
@@ -849,7 +913,7 @@ public:
 	{
 		TMat& m = *this;
 		// If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
-		ANKI_ASSERT(absolute(1.0 - q.getLength()) <= 0.002);
+		ANKI_ASSERT(absolute(T(1) - q.getLength()) <= 0.002);
 
 		T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
 
@@ -988,11 +1052,22 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(J == I && HAS_SIMD)
 	void transpose()
 	{
+#	if ANKI_SIMD_SSE
 		_MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
+#	else
+		const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
+		const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
+		m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
+		m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
+		m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
+		m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
+#	endif
 	}
+#endif
 
 	void transposeRotationPart()
 	{
@@ -1000,7 +1075,7 @@ public:
 		{
 			for(U i = j + 1; i < 3; i++)
 			{
-				T tmp = m_arr2[j][i];
+				const T tmp = m_arr2[j][i];
 				m_arr2[j][i] = m_arr2[i][j];
 				m_arr2[i][j] = tmp;
 			}
@@ -1064,7 +1139,7 @@ public:
 		ANKI_ASSERT(!isZero<T>(det)); // Cannot invert det == 0
 
 		// create adjoint matrix and multiply by 1/det to get inverse
-		const T invDet = 1.0 / det;
+		const T invDet = T(1) / det;
 		r(0, 0) = invDet * cofactor0;
 		r(0, 1) = invDet * cofactor3;
 		r(0, 2) = invDet * cofactor6;
@@ -1170,8 +1245,8 @@ public:
 		// See the clean code in < r664
 
 		// one of the 2 mat4 doesnt represent transformation
-		ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - 1.0)
-					&& isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - 1.0));
+		ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - T(1))
+					&& isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - T(1)));
 
 		TMat m4;
 
@@ -1191,8 +1266,8 @@ public:
 
 		m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
 
-		m4(3, 0) = m4(3, 1) = m4(3, 2) = 0.0;
-		m4(3, 3) = 1.0;
+		m4(3, 0) = m4(3, 1) = m4(3, 2) = T(0);
+		m4(3, 3) = T(1);
 
 		return m4;
 	}
@@ -1223,12 +1298,13 @@ public:
 		return c;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(J == 3 && I == 4 && HAS_SIMD)
 	TMat combineTransformations(const TMat& b) const
 	{
 		TMat c;
 		const auto& a = *this;
-
+#	if ANKI_SIMD_SSE
 		for(U i = 0; i < 3; i++)
 		{
 			__m128 t1, t2;
@@ -1240,14 +1316,33 @@ public:
 			t1 = _mm_set1_ps(a(i, 2));
 			t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
 
-			TVec<T, 4> v4(0.0, 0.0, 0.0, a(i, 3));
+			TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
 			t2 = _mm_add_ps(v4.getSimd(), t2);
 
 			c.m_simd[i] = t2;
 		}
+#	else
+		for(U i = 0; i < 3; i++)
+		{
+			float32x4_t t1, t2;
+
+			t1 = vdupq_n_f32(a(i, 0));
+			t2 = b.m_simd[0] * t1;
+			t1 = vdupq_n_f32(a(i, 1));
+			t2 = b.m_simd[1] * t1 + t2;
+			t1 = vdupq_n_f32(a(i, 2));
+			t2 = b.m_simd[2] * t1 + t2;
+
+			TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
+			t2 += v4.getSimd();
+
+			c.m_simd[i] = t2;
+		}
+#	endif
 
 		return c;
 	}
+#endif
 
 	/// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
 	ANKI_ENABLE_METHOD(I == 4 && J == 4)
@@ -1342,10 +1437,10 @@ public:
 
 		// Using the same logic the Pv.x = x' * w / m00
 		// so Pv.x = x' * Pv.z * (-1 / m00)
-		out.x() = -T(1.0) / m00;
+		out.x() = -T(T(1)) / m00;
 
 		// Same for y
-		out.y() = -T(1.0) / m11;
+		out.y() = -T(T(1)) / m11;
 
 		return out;
 	}
@@ -1359,8 +1454,8 @@ public:
 		const auto& m = *this;
 		out.z() = -m(2, 3);
 		out.w() = m(2, 2);
-		out.x() = -T(1.0) / m(0, 0);
-		out.y() = -T(1.0) / m(1, 1);
+		out.x() = -T(T(1)) / m(0, 0);
+		out.y() = -T(T(1)) / m(1, 1);
 		return out;
 	}
 
@@ -1410,12 +1505,12 @@ public:
 
 	TMat lerp(const TMat& b, T t) const
 	{
-		return ((*this) * (1.0 - t)) + (b * t);
+		return ((*this) * (T(1) - t)) + (b * t);
 	}
 
 	static TMat getZero()
 	{
-		return TMat(0.0);
+		return TMat(T(0));
 	}
 
 	void setZero()
@@ -1426,19 +1521,19 @@ public:
 	ANKI_ENABLE_METHOD(I == 3 && J == 3)
 	static TMat getIdentity()
 	{
-		return TMat(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);
+		return TMat(T(1), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(1));
 	}
 
 	ANKI_ENABLE_METHOD(I == 4 && J == 4)
 	static TMat getIdentity()
 	{
-		return TMat(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
+		return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1));
 	}
 
 	ANKI_ENABLE_METHOD(I == 4 && J == 3)
 	static TMat getIdentity()
 	{
-		return TMat(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0);
+		return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0));
 	}
 
 	void setIdentity()

+ 178 - 6
AnKi/Math/Vec.h

@@ -25,7 +25,7 @@ public:
 	using Simd = typename MathSimd<T, N>::Type;
 	static constexpr U COMPONENT_COUNT = N;
 	static constexpr Bool IS_INTEGER = std::is_integral<T>::value;
-	static constexpr Bool HAS_VEC4_SIMD = N == 4 && std::is_same<T, F32>::value && ANKI_SIMD_SSE;
+	static constexpr Bool HAS_VEC4_SIMD = N == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
 
 	/// @name Constructors
 	/// @{
@@ -69,11 +69,17 @@ public:
 		}
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	explicit TVec(const T f)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_set1_ps(f);
+#	else
+		m_simd = vdupq_n_f32(f);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	explicit TVec(const T arr[])
@@ -139,11 +145,17 @@ public:
 		w() = w_;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec(const T x_, const T y_, const T z_, const T w_)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_set_ps(w_, z_, y_, x_);
+#	else
+		m_simd = {x_, y_, z_, w_};
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(N == 4)
 	TVec(const TVec<T, 3>& a, const T w_)
@@ -2328,11 +2340,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator+(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_add_ps(m_simd, b.m_simd));
+#	else
+		return TVec(m_simd + b.m_simd);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator+=(const TVec& b)
@@ -2344,12 +2362,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator+=(const TVec& b)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_add_ps(m_simd, b.m_simd);
+#	else
+		m_simd += b.m_simd;
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator-(const TVec& b) const
@@ -2362,11 +2386,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator-(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_sub_ps(m_simd, b.m_simd));
+#	else
+		return TVec(m_simd - b.m_simd);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator-=(const TVec& b)
@@ -2378,12 +2408,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator-=(const TVec& b)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_sub_ps(m_simd, b.m_simd);
+#	else
+		m_simd -= b.m_simd;
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator*(const TVec& b) const
@@ -2396,11 +2432,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator*(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_mul_ps(m_simd, b.m_simd));
+#	else
+		return TVec(m_simd * b.m_simd);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator*=(const TVec& b)
@@ -2412,12 +2454,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator*=(const TVec& b)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_mul_ps(m_simd, b.m_simd);
+#	else
+		m_simd *= b.m_simd;
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator/(const TVec& b) const
@@ -2431,11 +2479,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator/(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_div_ps(m_simd, b.m_simd));
+#	else
+		return TVec(m_simd / b.m_simd);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec& operator/=(const TVec& b)
@@ -2448,12 +2502,18 @@ public:
 		return *this;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec& operator/=(const TVec& b)
 	{
+#	if ANKI_SIMD_SSE
 		m_simd = _mm_div_ps(m_simd, b.m_simd);
+#	else
+		m_simd /= b.m_simd;
+#	endif
 		return *this;
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec operator-() const
@@ -2466,11 +2526,17 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec operator-() const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_xor_ps(m_simd, _mm_set1_ps(-0.0)));
+#	else
+		return TVec(-m_simd);
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(IS_INTEGER)
 	TVec operator<<(const TVec& b) const
@@ -2577,6 +2643,27 @@ public:
 		return *this;
 	}
 
+	ANKI_ENABLE_METHOD(IS_INTEGER)
+	TVec operator%(const TVec& b) const
+	{
+		TVec out;
+		for(U i = 0; i < N; i++)
+		{
+			out.m_carr[i] = m_carr[i] % b.m_carr[i];
+		}
+		return out;
+	}
+
+	ANKI_ENABLE_METHOD(IS_INTEGER)
+	TVec& operator%=(const TVec& b)
+	{
+		for(U i = 0; i < N; i++)
+		{
+			m_carr[i] %= b.m_carr[i];
+		}
+		return *this;
+	}
+
 	Bool operator==(const TVec& b) const
 	{
 		for(U i = 0; i < N; i++)
@@ -2754,6 +2841,19 @@ public:
 		return *this;
 	}
 
+	ANKI_ENABLE_METHOD(IS_INTEGER)
+	TVec operator%(const T f) const
+	{
+		return (*this) % TVec(f);
+	}
+
+	ANKI_ENABLE_METHOD(IS_INTEGER)
+	TVec& operator%=(const T f)
+	{
+		(*this) %= TVec(f);
+		return *this;
+	}
+
 	Bool operator==(const T f) const
 	{
 		return *this == TVec(f);
@@ -2812,13 +2912,22 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	T dot(const TVec& b) const
 	{
 		T o;
+#	if ANKI_SIMD_SSE
 		_mm_store_ss(&o, _mm_dp_ps(m_simd, b.m_simd, 0xF1));
+#	else
+		const float32x4_t tmp = m_simd * b.m_simd;
+		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
+		sum = vpadd_f32(sum, sum);
+		o = sum[0];
+#	endif
 		return o;
 	}
+#endif
 
 	/// 6 muls, 3 adds
 	ANKI_ENABLE_METHOD(N == 3)
@@ -2836,11 +2945,13 @@ public:
 		return TVec(xyz().cross(b.xyz()), T(0));
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(N == 4 && HAS_VEC4_SIMD)
 	TVec cross(const TVec& b) const
 	{
 		ANKI_ASSERT(w() == T(0));
 		ANKI_ASSERT(b.w() == T(0));
+#	if ANKI_SIMD_SSE
 		const auto& a = *this;
 		constexpr unsigned int mask0 = _MM_SHUFFLE(3, 0, 2, 1);
 		constexpr unsigned int mask1 = _MM_SHUFFLE(3, 1, 0, 2);
@@ -2851,7 +2962,20 @@ public:
 			_mm_mul_ps(_mm_shuffle_ps(a.m_simd, a.m_simd, U8(mask1)), _mm_shuffle_ps(b.m_simd, b.m_simd, U8(mask0)));
 
 		return TVec(_mm_sub_ps(tmp0, tmp1));
+#	else
+		TVec out;
+		float32x4_t& c = out.m_simd;
+		const float32x4_t& v0 = m_simd;
+		const float32x4_t& v1 = b.m_simd;
+
+		c = v0 * __builtin_shufflevector(v1, v1, 1, 2, 0, 3);
+		c = vfmsq_f32(c, __builtin_shufflevector(v0, v0, 1, 2, 0, 3), v1);
+		c = __builtin_shufflevector(c, c, 1, 2, 0, 3);
+
+		return out;
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(N == 3)
 	TVec projectTo(const TVec& toThis) const
@@ -2907,9 +3031,7 @@ public:
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	T getLengthSquared() const
 	{
-		T o;
-		_mm_store_ss(&o, _mm_dp_ps(m_simd, m_simd, 0xF1));
-		return o;
+		return dot(*this);
 	}
 
 	T getLength() const
@@ -2933,12 +3055,28 @@ public:
 		(*this) /= getLength();
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	void normalize()
 	{
-		__m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
+#	if ANKI_SIMD_SSE
+		const __m128 inverseNorm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
 		m_simd = _mm_mul_ps(m_simd, inverseNorm);
+#	else
+		// Dot (len squared)
+		float32x4_t tmp = m_simd * m_simd;
+		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
+		sum = vpadd_f32(sum, sum);
+		float32x4_t lensq = vdupq_lane_f32(sum, 0);
+
+		// 1/sqrt(lensq)
+		float32x4_t mul = vrsqrteq_f32(lensq);
+
+		// Multiply
+		m_simd *= mul;
+#	endif
 	}
+#endif
 
 	ANKI_ENABLE_METHOD(!HAS_VEC4_SIMD)
 	TVec getNormalized() const
@@ -2946,12 +3084,28 @@ public:
 		return (*this) / getLength();
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec getNormalized() const
 	{
-		__m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
+#	if ANKI_SIMD_SSE
+		const __m128 inverse_norm = _mm_rsqrt_ps(_mm_dp_ps(m_simd, m_simd, 0xFF));
 		return TVec(_mm_mul_ps(m_simd, inverse_norm));
+#	else
+		// Dot (len squared)
+		float32x4_t tmp = m_simd * m_simd;
+		float32x2_t sum = vpadd_f32(vget_low_f32(tmp), vget_high_f32(tmp));
+		sum = vpadd_f32(sum, sum);
+		float32x4_t lensq = vdupq_lane_f32(sum, 0);
+
+		// 1/sqrt(lensq)
+		float32x4_t mul = vrsqrteq_f32(lensq);
+
+		// Multiply
+		return TVec(m_simd * mul);
+#	endif
 	}
+#endif
 
 	/// Return lerp(this, v1, t)
 	TVec lerp(const TVec& v1, T t) const
@@ -2970,12 +3124,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec abs() const
 	{
+#	if ANKI_SIMD_SSE
 		const __m128 signMask = _mm_set1_ps(-0.0f);
 		return TVec(_mm_andnot_ps(signMask, m_simd));
+#	else
+		return TVec(vabsq_f32(m_simd));
+#	endif
 	}
+#endif
 
 	/// Get clamped between two values.
 	TVec clamp(const T minv, const T maxv) const
@@ -3001,12 +3161,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	/// Get the min of all components.
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec min(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_min_ps(m_simd, b.m_simd));
+#	else
+		return TVec(vminq_f32(m_simd, b.m_simd));
+#	endif
 	}
+#endif
 
 	/// Get the min of all components.
 	TVec min(const T b) const
@@ -3026,12 +3192,18 @@ public:
 		return out;
 	}
 
+#if ANKI_ENABLE_SIMD
 	/// Get the max of all components.
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec max(const TVec& b) const
 	{
+#	if ANKI_SIMD_SSE
 		return TVec(_mm_max_ps(m_simd, b.m_simd));
+#	else
+		return TVec(vmaxq_f32(m_simd, b.m_simd));
+#	endif
 	}
+#endif
 
 	/// Get the max of all components.
 	TVec max(const T b) const

+ 1 - 1
AnKi/Physics/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 1 - 1
AnKi/Physics/PhysicsCollisionShape.cpp

@@ -66,7 +66,7 @@ PhysicsTriangleSoup::PhysicsTriangleSoup(PhysicsWorld* world, ConstWeakArray<Vec
 	{
 		m_type = ShapeType::CONVEX; // Fake the type
 
-		m_convex.init(&positions[0][0], I32(positions.getSize()), sizeof(Vec3));
+		m_convex.init(&positions[0][0], I32(positions.getSize()), U32(sizeof(Vec3)));
 		m_convex->setMargin(getWorld().getCollisionMargin());
 		m_convex->setUserPointer(static_cast<PhysicsObject*>(this));
 	}

+ 1 - 1
AnKi/Renderer/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB_RECURSE SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 2 - 2
AnKi/Renderer/ConfigDefs.h

@@ -36,7 +36,7 @@ ANKI_CONFIG_OPTION(r_probeReflectionShadowMapResolution, 64, 4, 2048)
 ANKI_CONFIG_OPTION(r_lensFlareMaxSpritesPerFlare, 8, 4, 256)
 ANKI_CONFIG_OPTION(r_lensFlareMaxFlares, 16, 8, 256)
 
-ANKI_CONFIG_OPTION(r_giTileResolution, 32, 4, 2048)
+ANKI_CONFIG_OPTION(r_giTileResolution, (ANKI_OS_ANDROID) ? 16 : 32, 4, 2048)
 ANKI_CONFIG_OPTION(r_giShadowMapResolution, 128, 4, 2048)
 ANKI_CONFIG_OPTION(r_giMaxCachedProbes, 16, 4, 2048)
 ANKI_CONFIG_OPTION(r_giMaxVisibleProbes, 8, 1, 256)
@@ -56,5 +56,5 @@ ANKI_CONFIG_OPTION(r_rtShadowsSvgf, 0, 0, 1)
 ANKI_CONFIG_OPTION(r_rtShadowsSvgfAtrousPassCount, 3, 1, 20)
 ANKI_CONFIG_OPTION(r_rtShadowsRaysPerPixel, 1, 1, 8)
 
-ANKI_CONFIG_OPTION(r_fsr, 1, 0, 1)
+ANKI_CONFIG_OPTION(r_fsr, 1, 0, 2, "0: Use bilinear, 1: FSR low quality, 2: FSR high quality")
 ANKI_CONFIG_OPTION(r_sharpen, 1, 0, 1)

+ 1 - 0
AnKi/Renderer/DepthDownscale.h

@@ -50,6 +50,7 @@ public:
 		width = m_copyToBuff.m_lastMipWidth;
 		height = m_copyToBuff.m_lastMipHeight;
 		ANKI_ASSERT(m_copyToBuff.m_buffAddr);
+		m_copyToBuff.m_buff->invalidate(0, MAX_PTR_SIZE);
 		depthValues = static_cast<F32*>(m_copyToBuff.m_buffAddr);
 	}
 

+ 1 - 1
AnKi/Renderer/FinalComposite.cpp

@@ -41,7 +41,7 @@ Error FinalComposite::initInternal(const ConfigSet& config)
 	m_fbDescr.m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
 	m_fbDescr.bake();
 
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseRgb864x64.png", m_blueNoise));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_blueNoise));
 
 	// Progs
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/FinalComposite.ankiprog", m_prog));

+ 1 - 1
AnKi/Renderer/ProbeReflections.cpp

@@ -53,7 +53,7 @@ Error ProbeReflections::initInternal(const ConfigSet& config)
 	ANKI_CHECK(initShadowMapping(config));
 
 	// Load split sum integration LUT
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/SplitSumIntegration.ankitex", m_integrationLut));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/SplitSumIntegration.png", m_integrationLut));
 
 	SamplerInitInfo sinit;
 	sinit.m_minMagFilter = SamplingFilter::LINEAR;

+ 1 - 1
AnKi/Renderer/RtShadows.cpp

@@ -38,7 +38,7 @@ Error RtShadows::initInternal(const ConfigSet& cfg)
 	m_useSvgf = cfg.getNumberU8("r_rtShadowsSvgf") != 0;
 	m_atrousPassCount = cfg.getNumberU8("r_rtShadowsSvgfAtrousPassCount");
 
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseRgb864x64.png", m_blueNoiseImage));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_blueNoiseImage));
 
 	// Ray gen program
 	{

+ 4 - 1
AnKi/Renderer/Scale.cpp

@@ -38,7 +38,8 @@ Error Scale::init(const ConfigSet& cfg)
 
 	ANKI_R_LOGI("Initializing (up|down)scale pass");
 
-	m_fsr = cfg.getBool("r_fsr");
+	const U32 fsrQuality = cfg.getNumberU8("r_fsr");
+	m_fsr = fsrQuality != 0;
 
 	// Program
 	if(needsScaling)
@@ -50,6 +51,7 @@ Error Scale::init(const ConfigSet& cfg)
 		{
 			ShaderProgramResourceVariantInitInfo variantInitInfo(m_scaleProg);
 			variantInitInfo.addMutation("SHARPEN", 0);
+			variantInitInfo.addMutation("FSR_QUALITY", fsrQuality - 1);
 			m_scaleProg->getOrCreateVariant(variantInitInfo, variant);
 		}
 		else
@@ -64,6 +66,7 @@ Error Scale::init(const ConfigSet& cfg)
 		ANKI_CHECK(getResourceManager().loadResource("Shaders/Fsr.ankiprog", m_sharpenProg));
 		ShaderProgramResourceVariantInitInfo variantInitInfo(m_sharpenProg);
 		variantInitInfo.addMutation("SHARPEN", 1);
+		variantInitInfo.addMutation("FSR_QUALITY", 0);
 		const ShaderProgramResourceVariant* variant;
 		m_sharpenProg->getOrCreateVariant(variantInitInfo, variant);
 		m_sharpenGrProg = variant->getProgram();

+ 1 - 1
AnKi/Renderer/Ssao.cpp

@@ -21,7 +21,7 @@ Ssao::~Ssao()
 Error Ssao::initMain(const ConfigSet& config)
 {
 	// Noise
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseLdrRgb64x64.ankitex", m_main.m_noiseImage));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_main.m_noiseImage));
 
 	// Shader
 	if(m_useCompute)

+ 8 - 0
AnKi/Renderer/Ssao.h

@@ -24,6 +24,7 @@ public:
 	Ssao(Renderer* r)
 		: RendererObject(r)
 	{
+		registerDebugRenderTarget("SsaoFinal");
 	}
 
 	~Ssao();
@@ -38,6 +39,13 @@ public:
 		return m_runCtx.m_rts[1];
 	}
 
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle,
+							  ShaderProgramPtr& optionalShaderProgram) const override
+	{
+		ANKI_ASSERT(rtName == "SsaoFinal");
+		handle = getRt();
+	}
+
 private:
 	static const Bool m_useNormal = false;
 	static const Bool m_useCompute = true;

+ 1 - 1
AnKi/Renderer/Ssgi.cpp

@@ -42,7 +42,7 @@ Error Ssgi::initInternal(const ConfigSet& cfg)
 	m_main.m_depthLod = min(cfg.getNumberU32("r_ssgiDepthLod"), m_r->getDepthDownscale().getMipmapCount() - 1);
 	m_main.m_firstStepPixels = 32;
 
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseRgb816x16.png", m_main.m_noiseImage));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_16x16.png", m_main.m_noiseImage));
 
 	// Init main
 	{

+ 1 - 1
AnKi/Renderer/Ssr.cpp

@@ -38,7 +38,7 @@ Error Ssr::initInternal(const ConfigSet& cfg)
 	m_depthLod = cfg.getNumberU32("r_ssrDepthLod");
 	m_firstStepPixels = 32;
 
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseRgb816x16.png", m_noiseImage));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_16x16.png", m_noiseImage));
 
 	// Create RTs
 	TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(

+ 7 - 1
AnKi/Renderer/TileAllocator.h

@@ -22,11 +22,17 @@ enum class TileAllocatorResult : U32
 };
 
 /// Allocates tiles out of a tilemap suitable for shadow mapping.
-class TileAllocator : public NonCopyable
+class TileAllocator
 {
 public:
+	TileAllocator() = default;
+
+	TileAllocator(const TileAllocator&) = delete; // Non-copyable
+
 	~TileAllocator();
 
+	TileAllocator& operator=(const TileAllocator&) = delete; // Non-copyable
+
 	/// Initialize the allocator.
 	void init(HeapAllocator<U8> alloc, U32 tileCountX, U32 tileCountY, U32 lodCount, Bool enableCaching);
 

+ 1 - 1
AnKi/Renderer/Tonemapping.cpp

@@ -12,7 +12,7 @@ namespace anki
 
 Error Tonemapping::init(const ConfigSet& cfg)
 {
-	Error err = initInternal(cfg);
+	const Error err = initInternal(cfg);
 	if(err)
 	{
 		ANKI_R_LOGE("Failed to initialize tonemapping");

+ 1 - 1
AnKi/Renderer/VolumetricLightingAccumulation.cpp

@@ -42,7 +42,7 @@ Error VolumetricLightingAccumulation::init(const ConfigSet& config)
 		return Error::USER_DATA;
 	}
 
-	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoiseRgb864x64.png", m_noiseImage));
+	ANKI_CHECK(getResourceManager().loadResource("EngineAssets/BlueNoise_Rgba8_64x64.png", m_noiseImage));
 
 	// Shaders
 	ANKI_CHECK(getResourceManager().loadResource("Shaders/VolumetricLightingAccumulation.ankiprog", m_prog));

+ 1 - 1
AnKi/Resource/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB_RECURSE SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 2 - 0
AnKi/Resource/ConfigDefs.h

@@ -9,4 +9,6 @@ ANKI_CONFIG_OPTION(
 	rsrc_dataPaths, ".",
 	"The engine loads assets only in from these paths. Separate them with : (it's smart enough to identify drive "
 	"letters in Windows)")
+ANKI_CONFIG_OPTION(rsrc_dataPathExcludedStrings, "build",
+				   "A list of string separated by : that will be used to exclude paths from rsrc_dataPaths")
 ANKI_CONFIG_OPTION(rsrc_transferScratchMemorySize, 256_MB, 1_MB, 4_GB)

+ 7 - 2
AnKi/Resource/ImageBinary.h

@@ -45,7 +45,8 @@ enum class ImageBinaryDataCompression : U32
 	NONE,
 	RAW = 1 << 0,
 	S3TC = 1 << 1,
-	ETC = 1 << 2
+	ETC = 1 << 2,
+	ASTC = 1 << 3
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(ImageBinaryDataCompression)
 
@@ -62,7 +63,9 @@ public:
 	ImageBinaryDataCompression m_compressionMask;
 	U32 m_isNormal;
 	U32 m_mipmapCount;
-	Array<U8, 88> m_padding;
+	U32 m_astcBlockSizeX;
+	U32 m_astcBlockSizeY;
+	Array<U8, 80> m_padding;
 
 	template<typename TSerializer, typename TClass>
 	static void serializeCommon(TSerializer& s, TClass self)
@@ -76,6 +79,8 @@ public:
 		s.doValue("m_compressionMask", offsetof(ImageBinaryHeader, m_compressionMask), self.m_compressionMask);
 		s.doValue("m_isNormal", offsetof(ImageBinaryHeader, m_isNormal), self.m_isNormal);
 		s.doValue("m_mipmapCount", offsetof(ImageBinaryHeader, m_mipmapCount), self.m_mipmapCount);
+		s.doValue("m_astcBlockSizeX", offsetof(ImageBinaryHeader, m_astcBlockSizeX), self.m_astcBlockSizeX);
+		s.doValue("m_astcBlockSizeY", offsetof(ImageBinaryHeader, m_astcBlockSizeY), self.m_astcBlockSizeY);
 		s.doArray("m_padding", offsetof(ImageBinaryHeader, m_padding), &self.m_padding[0], self.m_padding.getSize());
 	}
 

+ 5 - 2
AnKi/Resource/ImageBinary.xml

@@ -36,7 +36,8 @@ enum class ImageBinaryDataCompression : U32
 	NONE,
 	RAW = 1 << 0,
 	S3TC = 1 << 1,
-	ETC = 1 << 2
+	ETC = 1 << 2,
+	ASTC = 1 << 3
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(ImageBinaryDataCompression)
 ]]></prefix_code>
@@ -53,7 +54,9 @@ ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(ImageBinaryDataCompression)
 				<member name="m_compressionMask" type="ImageBinaryDataCompression"/>
 				<member name="m_isNormal" type="U32"/>
 				<member name="m_mipmapCount" type="U32"/>
-				<member name="m_padding" type="U8" array_size="88"/>
+				<member name="m_astcBlockSizeX" type="U32"/>
+				<member name="m_astcBlockSizeY" type="U32"/>
+				<member name="m_padding" type="U8" array_size="80"/>
 			</members>
 		</class>
 	</classes>

+ 60 - 16
AnKi/Resource/ImageLoader.cpp

@@ -16,7 +16,7 @@ static const U8 tgaHeaderCompressed[12] = {0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
 /// Get the size in bytes of a single surface
 static PtrSize calcSurfaceSize(const U32 width32, const U32 height32, const ImageBinaryDataCompression comp,
-							   const ImageBinaryColorFormat cf)
+							   const ImageBinaryColorFormat cf, UVec2 astcBlockSize)
 {
 	const PtrSize width = width32;
 	const PtrSize height = height32;
@@ -35,6 +35,9 @@ static PtrSize calcSurfaceSize(const U32 width32, const U32 height32, const Imag
 	case ImageBinaryDataCompression::ETC:
 		out = (width / 4) * (height / 4) * 8;
 		break;
+	case ImageBinaryDataCompression::ASTC:
+		out = (width / astcBlockSize.x()) * (height / astcBlockSize.y()) * 16;
+		break;
 	default:
 		ANKI_ASSERT(0);
 	}
@@ -97,7 +100,9 @@ static PtrSize calcSizeOfSegment(const ImageBinaryHeader& header, ImageBinaryDat
 
 		while(mips-- != 0)
 		{
-			out += calcSurfaceSize(width, height, comp, header.m_colorFormat) * surfCountPerMip;
+			out += calcSurfaceSize(width, height, comp, header.m_colorFormat,
+								   UVec2(header.m_astcBlockSizeX, header.m_astcBlockSizeY))
+				   * surfCountPerMip;
 
 			width /= 2;
 			height /= 2;
@@ -336,7 +341,7 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 								 DynamicArray<ImageLoaderSurface>& surfaces, DynamicArray<ImageLoaderVolume>& volumes,
 								 GenericMemoryPoolAllocator<U8>& alloc, U32& width, U32& height, U32& depth,
 								 U32& layerCount, U32& mipCount, ImageBinaryType& imageType,
-								 ImageBinaryColorFormat& colorFormat)
+								 ImageBinaryColorFormat& colorFormat, UVec2& astcBlockSize)
 {
 	//
 	// Read and check the header
@@ -344,7 +349,7 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 	ImageBinaryHeader header;
 	ANKI_CHECK(file.read(&header, sizeof(ImageBinaryHeader)));
 
-	if(std::memcmp(&header.m_magic[0], IMAGE_MAGIC, sizeof(IMAGE_MAGIC - 1)) != 0)
+	if(std::memcmp(&header.m_magic[0], IMAGE_MAGIC, sizeof(IMAGE_MAGIC) - 1) != 0)
 	{
 		ANKI_RESOURCE_LOGE("Wrong magic word");
 		return Error::USER_DATA;
@@ -375,12 +380,22 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 		return Error::USER_DATA;
 	}
 
-	if((header.m_compressionMask & preferredCompression) == ImageBinaryDataCompression::NONE)
+	if(!!(header.m_compressionMask & ImageBinaryDataCompression::ASTC))
+	{
+		if((header.m_astcBlockSizeX != 8 && header.m_astcBlockSizeX != 4)
+		   || (header.m_astcBlockSizeY != 8 && header.m_astcBlockSizeY != 4))
+		{
+			ANKI_RESOURCE_LOGE("Incorrect header: ASTC block size");
+			return Error::USER_DATA;
+		}
+	}
+
+	if(!(header.m_compressionMask & preferredCompression))
 	{
 		// Fallback
 		preferredCompression = ImageBinaryDataCompression::RAW;
 
-		if((header.m_compressionMask & preferredCompression) == ImageBinaryDataCompression::NONE)
+		if(!(header.m_compressionMask & preferredCompression))
 		{
 			ANKI_RESOURCE_LOGE("File does not contain raw compression");
 			return Error::USER_DATA;
@@ -396,6 +411,7 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 	// Set a few things
 	colorFormat = header.m_colorFormat;
 	imageType = header.m_type;
+	astcBlockSize = UVec2(header.m_astcBlockSizeX, header.m_astcBlockSizeY);
 
 	U32 faceCount = 1;
 	switch(header.m_type)
@@ -424,6 +440,7 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 	//
 	// Move file pointer
 	//
+	PtrSize skipSize = 0;
 
 	if(preferredCompression == ImageBinaryDataCompression::RAW)
 	{
@@ -431,25 +448,50 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 	}
 	else if(preferredCompression == ImageBinaryDataCompression::S3TC)
 	{
-		if((header.m_compressionMask & ImageBinaryDataCompression::RAW) != ImageBinaryDataCompression::NONE)
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::RAW))
 		{
 			// If raw compression is present then skip it
-			ANKI_CHECK(file.seek(calcSizeOfSegment(header, ImageBinaryDataCompression::RAW), FileSeekOrigin::CURRENT));
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::RAW);
 		}
 	}
 	else if(preferredCompression == ImageBinaryDataCompression::ETC)
 	{
-		if((header.m_compressionMask & ImageBinaryDataCompression::RAW) != ImageBinaryDataCompression::NONE)
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::RAW))
+		{
+			// If raw compression is present then skip it
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::RAW);
+		}
+
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::S3TC))
+		{
+			// If s3tc compression is present then skip it
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::S3TC);
+		}
+	}
+	else if(preferredCompression == ImageBinaryDataCompression::ASTC)
+	{
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::RAW))
 		{
 			// If raw compression is present then skip it
-			ANKI_CHECK(file.seek(calcSizeOfSegment(header, ImageBinaryDataCompression::RAW), FileSeekOrigin::CURRENT));
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::RAW);
 		}
 
-		if((header.m_compressionMask & ImageBinaryDataCompression::S3TC) != ImageBinaryDataCompression::NONE)
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::S3TC))
 		{
 			// If s3tc compression is present then skip it
-			ANKI_CHECK(file.seek(calcSizeOfSegment(header, ImageBinaryDataCompression::S3TC), FileSeekOrigin::CURRENT));
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::S3TC);
 		}
+
+		if(!!(header.m_compressionMask & ImageBinaryDataCompression::ETC))
+		{
+			// If ETC compression is present then skip it
+			skipSize += calcSizeOfSegment(header, ImageBinaryDataCompression::ETC);
+		}
+	}
+
+	if(skipSize)
+	{
+		ANKI_CHECK(file.seek(skipSize, FileSeekOrigin::CURRENT));
 	}
 
 	//
@@ -470,7 +512,8 @@ Error ImageLoader::loadAnkiImage(FileInterface& file, U32 maxImageSize,
 				for(U32 f = 0; f < faceCount; ++f)
 				{
 					const U32 dataSize =
-						U32(calcSurfaceSize(mipWidth, mipHeight, preferredCompression, header.m_colorFormat));
+						U32(calcSurfaceSize(mipWidth, mipHeight, preferredCompression, header.m_colorFormat,
+											UVec2(header.m_astcBlockSizeX, header.m_astcBlockSizeY)));
 
 					// Check if this mipmap can be skipped because of size
 					if(max(mipWidth, mipHeight) <= maxImageSize || mip == header.m_mipmapCount - 1)
@@ -551,6 +594,7 @@ Error ImageLoader::loadStb(FileInterface& fs, U32& width, U32& height, DynamicAr
 
 	// Use STB to read the image
 	int stbw, stbh, comp;
+	stbi_set_flip_vertically_on_load_thread(true);
 	U8* stbdata = reinterpret_cast<U8*>(stbi_load_from_memory(&fileData[0], I32(fileSize), &stbw, &stbh, &comp, 4));
 	if(!stbdata)
 	{
@@ -642,14 +686,14 @@ Error ImageLoader::loadInternal(FileInterface& file, const CString& filename, U3
 	}
 	else if(ext == "ankitex")
 	{
-#if 0
-		compression = ImageBinaryDataCompression::RAW;
+#if ANKI_OS_ANDROID
+		m_compression = ImageBinaryDataCompression::ASTC;
 #else
 		m_compression = ImageBinaryDataCompression::S3TC;
 #endif
 
 		ANKI_CHECK(loadAnkiImage(file, maxImageSize, m_compression, m_surfaces, m_volumes, m_alloc, m_width, m_height,
-								 m_depth, m_layerCount, m_mipmapCount, m_imageType, m_colorFormat));
+								 m_depth, m_layerCount, m_mipmapCount, m_imageType, m_colorFormat, m_astcBlockSize));
 	}
 	else if(ext == "png" || ext == "jpg")
 	{

+ 9 - 1
AnKi/Resource/ImageLoader.h

@@ -93,6 +93,13 @@ public:
 		return m_imageType;
 	}
 
+	UVec2 getAstcBlockSize() const
+	{
+		ANKI_ASSERT(!!(m_compression & ImageBinaryDataCompression::ASTC));
+		ANKI_ASSERT(m_astcBlockSize != UVec2(0u));
+		return m_astcBlockSize;
+	}
+
 	const ImageLoaderSurface& getSurface(U32 level, U32 face, U32 layer) const;
 
 	const ImageLoaderVolume& getVolume(U32 level) const;
@@ -121,6 +128,7 @@ private:
 	U32 m_height = 0;
 	U32 m_depth = 0;
 	U32 m_layerCount = 0;
+	UVec2 m_astcBlockSize = UVec2(0u);
 	ImageBinaryDataCompression m_compression = ImageBinaryDataCompression::NONE;
 	ImageBinaryColorFormat m_colorFormat = ImageBinaryColorFormat::NONE;
 	ImageBinaryType m_imageType = ImageBinaryType::NONE;
@@ -145,7 +153,7 @@ private:
 											   DynamicArray<ImageLoaderVolume>& volumes,
 											   GenericMemoryPoolAllocator<U8>& alloc, U32& width, U32& height,
 											   U32& depth, U32& layerCount, U32& mipCount, ImageBinaryType& imageType,
-											   ImageBinaryColorFormat& colorFormat);
+											   ImageBinaryColorFormat& colorFormat, UVec2& astcBlockSize);
 
 	ANKI_USE_RESULT Error loadInternal(FileInterface& file, const CString& filename, U32 maxImageSize);
 };

+ 27 - 1
AnKi/Resource/ImageResource.cpp

@@ -7,6 +7,7 @@
 #include <AnKi/Resource/ImageLoader.h>
 #include <AnKi/Resource/ResourceManager.h>
 #include <AnKi/Resource/AsyncLoader.h>
+#include <AnKi/Util/Filesystem.h>
 
 namespace anki
 {
@@ -67,7 +68,10 @@ Error ImageResource::load(const ResourceFilename& filename, Bool async)
 	}
 	ImageLoader& loader = ctx->m_loader;
 
-	TextureInitInfo init("RsrcTex");
+	StringAuto filenameExt(getTempAllocator());
+	getFilepathFilename(filename, filenameExt);
+
+	TextureInitInfo init(filenameExt);
 	init.m_usage = TextureUsageBit::ALL_SAMPLED | TextureUsageBit::TRANSFER_DESTINATION;
 	init.m_initialUsage = TextureUsageBit::ALL_SAMPLED;
 	U32 faces = 0;
@@ -122,6 +126,17 @@ Error ImageResource::load(const ResourceFilename& filename, Bool async)
 		case ImageBinaryDataCompression::S3TC:
 			init.m_format = Format::BC1_RGB_UNORM_BLOCK;
 			break;
+		case ImageBinaryDataCompression::ASTC:
+			if(loader.getAstcBlockSize() == UVec2(4u))
+			{
+				init.m_format = Format::ASTC_4x4_UNORM_BLOCK;
+			}
+			else
+			{
+				ANKI_ASSERT(loader.getAstcBlockSize() == UVec2(8u));
+				init.m_format = Format::ASTC_8x8_UNORM_BLOCK;
+			}
+			break;
 		default:
 			ANKI_ASSERT(0);
 		}
@@ -136,6 +151,17 @@ Error ImageResource::load(const ResourceFilename& filename, Bool async)
 		case ImageBinaryDataCompression::S3TC:
 			init.m_format = Format::BC3_UNORM_BLOCK;
 			break;
+		case ImageBinaryDataCompression::ASTC:
+			if(loader.getAstcBlockSize() == UVec2(4u))
+			{
+				init.m_format = Format::ASTC_4x4_UNORM_BLOCK;
+			}
+			else
+			{
+				ANKI_ASSERT(loader.getAstcBlockSize() == UVec2(8u));
+				init.m_format = Format::ASTC_8x8_UNORM_BLOCK;
+			}
+			break;
 		default:
 			ANKI_ASSERT(0);
 		}

+ 12 - 2
AnKi/Resource/MaterialResource.h

@@ -60,7 +60,7 @@ enum class BuiltinMutatorId : U8
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(BuiltinMutatorId)
 
 /// Holds the shader variables. It's a container for shader program variables that share the same name.
-class MaterialVariable : public NonCopyable
+class MaterialVariable
 {
 	friend class MaterialVariant;
 	friend class MaterialResource;
@@ -68,6 +68,8 @@ class MaterialVariable : public NonCopyable
 public:
 	MaterialVariable();
 
+	MaterialVariable(const MaterialVariable&) = delete; // Non-copyable
+
 	MaterialVariable(MaterialVariable&& b)
 	{
 		*this = std::move(b);
@@ -75,6 +77,8 @@ public:
 
 	~MaterialVariable();
 
+	MaterialVariable& operator=(const MaterialVariable&) = delete; // Non-copyable
+
 	MaterialVariable& operator=(MaterialVariable&& b)
 	{
 		m_name = std::move(b.m_name);
@@ -206,11 +210,17 @@ inline const ImageResourcePtr& MaterialVariable::getValue() const
 }
 
 /// Material variant.
-class MaterialVariant : public NonCopyable
+class MaterialVariant
 {
 	friend class MaterialResource;
 
 public:
+	MaterialVariant() = default;
+
+	MaterialVariant(const MaterialVariant&) = delete; // Non-copyable
+
+	MaterialVariant& operator=(const MaterialVariant&) = delete; // Non-copyable
+
 	/// Return true of the the variable is active.
 	Bool isVariableActive(const MaterialVariable& var) const
 	{

+ 89 - 44
AnKi/Resource/ResourceFilesystem.cpp

@@ -210,6 +210,9 @@ Error ResourceFilesystem::init(const ConfigSet& config, const CString& cacheDir)
 	StringListAuto paths(m_alloc);
 	paths.splitString(config.getString("rsrc_dataPaths"), ':');
 
+	StringListAuto excludedStrings(m_alloc);
+	excludedStrings.splitString(config.getString("rsrc_dataPathExcludedStrings"), ':');
+
 	// Workaround the fact that : is used in drives in Windows
 #if ANKI_OS_WINDOWS
 	StringListAuto paths2(m_alloc);
@@ -241,9 +244,14 @@ Error ResourceFilesystem::init(const ConfigSet& config, const CString& cacheDir)
 		return Error::USER_DATA;
 	}
 
+#if ANKI_OS_ANDROID
+	// Add the files of the .apk
+	ANKI_CHECK(addNewPath("APK package", excludedStrings, true));
+#endif
+
 	for(auto& path : paths)
 	{
-		ANKI_CHECK(addNewPath(path.toCString()));
+		ANKI_CHECK(addNewPath(path.toCString(), excludedStrings));
 	}
 
 	addCachePath(cacheDir);
@@ -260,18 +268,56 @@ void ResourceFilesystem::addCachePath(const CString& path)
 	m_paths.emplaceBack(m_alloc, std::move(p));
 }
 
-Error ResourceFilesystem::addNewPath(const CString& path)
+Error ResourceFilesystem::addNewPath(const CString& filepath, const StringListAuto& excludedStrings, Bool special)
 {
-	U32 fileCount = 0;
-	static const CString extension(".AnKiZLibip");
+	U32 fileCount = 0; // Count files manually because it's slower to get that number from the list
+	static const CString extension(".ankizip");
+
+	auto rejectPath = [&](CString p) -> Bool {
+		for(const String& s : excludedStrings)
+		{
+			if(p.find(s) != CString::NPOS)
+			{
+				return true;
+			}
+		}
 
-	auto pos = path.find(extension);
-	if(pos != CString::NPOS && pos == path.getLength() - extension.getLength())
+		return false;
+	};
+
+	PtrSize pos;
+	Path path;
+	if(special)
+	{
+		// Android apk, read the file that contains the directory structure
+
+		// Read the file
+		File dirStructure;
+		ANKI_CHECK(dirStructure.open("DirStructure.txt", FileOpenFlag::READ | FileOpenFlag::SPECIAL));
+		StringAuto txt(m_alloc);
+		ANKI_CHECK(dirStructure.readAllText(txt));
+
+		StringListAuto filenames(m_alloc);
+		filenames.splitString(txt, '\n');
+
+		// Create the Path
+		for(const String& filename : filenames)
+		{
+			if(!rejectPath(filename))
+			{
+				path.m_files.pushBack(m_alloc, filename);
+				++fileCount;
+			}
+		}
+
+		path.m_isSpecial = true;
+	}
+	else if((pos = filepath.find(extension)) != CString::NPOS && pos == filepath.getLength() - extension.getLength())
 	{
 		// It's an archive
 
 		// Open
-		unzFile zfile = unzOpen(&path[0]);
+		unzFile zfile = unzOpen(&filepath[0]);
 		if(!zfile)
 		{
 			ANKI_RESOURCE_LOGE("Failed to open archive");
@@ -286,10 +332,6 @@ Error ResourceFilesystem::addNewPath(const CString& path)
 			return Error::FILE_ACCESS;
 		}
 
-		Path p;
-		p.m_isArchive = true;
-		p.m_path.sprintf(m_alloc, "%s", &path[0]);
-
 		do
 		{
 			Array<char, 1024> filename;
@@ -302,56 +344,46 @@ Error ResourceFilesystem::addNewPath(const CString& path)
 				return Error::FILE_ACCESS;
 			}
 
-			// If compressed size is zero then it's a dir
-			if(info.uncompressed_size > 0)
+			const Bool itsADir = info.uncompressed_size == 0;
+			if(!itsADir && !rejectPath(&filename[0]))
 			{
-				p.m_files.pushBackSprintf(m_alloc, "%s", &filename[0]);
+				path.m_files.pushBackSprintf(m_alloc, "%s", &filename[0]);
 				++fileCount;
 			}
 		} while(unzGoToNextFile(zfile) == UNZ_OK);
 
-		m_paths.emplaceFront(m_alloc, std::move(p));
 		unzClose(zfile);
+
+		path.m_isArchive = true;
 	}
 	else
 	{
 		// It's simple directory
 
-		m_paths.emplaceFront(m_alloc, Path());
-		Path& p = m_paths.getFront();
-		p.m_path.sprintf(m_alloc, "%s", &path[0]);
-		p.m_isArchive = false;
-
-		struct UserData
-		{
-			ResourceFilesystem* m_sys;
-			U32* m_fileCount;
-		} ud{this, &fileCount};
-
-		ANKI_CHECK(walkDirectoryTree(path, &ud, [](const CString& fname, void* ud, Bool isDir) -> Error {
-			if(isDir)
+		ANKI_CHECK(walkDirectoryTree(filepath, m_alloc, [&, this](const CString& fname, Bool isDir) -> Error {
+			if(!isDir && !rejectPath(fname))
 			{
-				return Error::NONE;
+				path.m_files.pushBackSprintf(m_alloc, "%s", fname.cstr());
+				++fileCount;
 			}
 
-			UserData* udd = static_cast<UserData*>(ud);
-			ResourceFilesystem* self = udd->m_sys;
-
-			Path& p = self->m_paths.getFront();
-			p.m_files.pushBackSprintf(self->m_alloc, "%s", fname.cstr());
-
-			++(*udd->m_fileCount);
 			return Error::NONE;
 		}));
+	}
 
-		if(p.m_files.getSize() < 1)
-		{
-			ANKI_RESOURCE_LOGE("Directory is empty: %s", &path[0]);
-			return Error::USER_DATA;
-		}
+	ANKI_ASSERT(path.m_files.getSize() == fileCount);
+	if(fileCount == 0)
+	{
+		ANKI_RESOURCE_LOGW("Ignoring empty resource path: %s", &filepath[0]);
+	}
+	else
+	{
+		path.m_path.sprintf(m_alloc, "%s", &filepath[0]);
+		m_paths.emplaceFront(m_alloc, std::move(path));
+
+		ANKI_RESOURCE_LOGI("Added new data path \"%s\" that contains %u files", &filepath[0], fileCount);
 	}
 
-	ANKI_RESOURCE_LOGI("Added new data path \"%s\" that contains %u files", &path[0], fileCount);
 	return Error::NONE;
 }
 
@@ -401,12 +433,25 @@ Error ResourceFilesystem::openFile(const ResourceFilename& filename, ResourceFil
 				else
 				{
 					StringAuto newFname(m_alloc);
-					newFname.sprintf("%s/%s", &p.m_path[0], &filename[0]);
+					if(!p.m_isSpecial)
+					{
+						newFname.sprintf("%s/%s", &p.m_path[0], &filename[0]);
+					}
+					else
+					{
+						newFname.sprintf("%s", &filename[0]);
+					}
 
 					CResourceFile* file = m_alloc.newInstance<CResourceFile>(m_alloc);
 					rfile = file;
 
-					err = file->m_file.open(&newFname[0], FileOpenFlag::READ);
+					FileOpenFlag fopenFlags = FileOpenFlag::READ;
+					if(p.m_isSpecial)
+					{
+						fopenFlags |= FileOpenFlag::SPECIAL;
+					}
+
+					err = file->m_file.open(&newFname[0], fopenFlags);
 
 #if 0
 					printf("Opening asset %s\n", &newFname[0]);

+ 21 - 10
AnKi/Resource/ResourceFilesystem.h

@@ -21,7 +21,7 @@ class ConfigSet;
 /// @{
 
 /// Resource filesystem file. An interface that abstracts the resource file.
-class ResourceFile : public NonCopyable
+class ResourceFile
 {
 public:
 	ResourceFile(GenericMemoryPoolAllocator<U8> alloc)
@@ -29,10 +29,14 @@ public:
 	{
 	}
 
+	ResourceFile(const ResourceFile&) = delete; // Non-copyable
+
 	virtual ~ResourceFile()
 	{
 	}
 
+	ResourceFile& operator=(const ResourceFile&) = delete; // Non-copyable
+
 	/// Read data from the file
 	virtual ANKI_USE_RESULT Error read(void* buff, PtrSize size) = 0;
 
@@ -72,7 +76,7 @@ private:
 using ResourceFilePtr = IntrusivePtr<ResourceFile>;
 
 /// Resource filesystem.
-class ResourceFilesystem : public NonCopyable
+class ResourceFilesystem
 {
 public:
 	ResourceFilesystem(GenericMemoryPoolAllocator<U8> alloc)
@@ -80,8 +84,12 @@ public:
 	{
 	}
 
+	ResourceFilesystem(const ResourceFilesystem&) = delete; // Non-copyable
+
 	~ResourceFilesystem();
 
+	ResourceFilesystem& operator=(const ResourceFilesystem&) = delete; // Non-copyable
+
 	ANKI_USE_RESULT Error init(const ConfigSet& config, const CString& cacheDir);
 
 	/// Search the path list to find the file. Then open the file for reading. It's thread-safe.
@@ -104,30 +112,33 @@ public:
 #if !ANKI_TESTS
 private:
 #endif
-	class Path : public NonCopyable
+	class Path
 	{
 	public:
 		StringList m_files; ///< Files inside the directory.
 		String m_path; ///< A directory or an archive.
 		Bool m_isArchive = false;
 		Bool m_isCache = false;
+		Bool m_isSpecial = false;
 
 		Path() = default;
 
+		Path(const Path&) = delete; // Non-copyable
+
 		Path(Path&& b)
-			: m_files(std::move(b.m_files))
-			, m_path(std::move(b.m_path))
-			, m_isArchive(std::move(b.m_isArchive))
-			, m_isCache(std::move(b.m_isCache))
 		{
+			*this = std::move(b);
 		}
 
+		Path& operator=(const Path&) = delete; // Non-copyable
+
 		Path& operator=(Path&& b)
 		{
 			m_files = std::move(b.m_files);
 			m_path = std::move(b.m_path);
-			m_isArchive = std::move(b.m_isArchive);
-			m_isCache = std::move(b.m_isCache);
+			m_isArchive = b.m_isArchive;
+			m_isCache = b.m_isCache;
+			m_isSpecial = b.m_isSpecial;
 			return *this;
 		}
 	};
@@ -137,7 +148,7 @@ private:
 	String m_cacheDir;
 
 	/// Add a filesystem path or an archive. The path is read-only.
-	ANKI_USE_RESULT Error addNewPath(const CString& path);
+	ANKI_USE_RESULT Error addNewPath(const CString& path, const StringListAuto& excludedStrings, Bool special = false);
 
 	void addCachePath(const CString& path);
 };

+ 7 - 1
AnKi/Resource/ShaderProgramResource.h

@@ -26,12 +26,18 @@ class ShaderProgramResourceVariantInitInfo;
 
 /// The means to mutate a shader program.
 /// @memberof ShaderProgramResource
-class ShaderProgramResourceMutator : public NonCopyable
+class ShaderProgramResourceMutator
 {
 public:
 	CString m_name;
 	ConstWeakArray<MutatorValue> m_values;
 
+	ShaderProgramResourceMutator() = default;
+
+	ShaderProgramResourceMutator(const ShaderProgramResourceMutator&) = delete; // Non-copyable
+
+	ShaderProgramResourceMutator& operator=(const ShaderProgramResourceMutator&) = delete; // Non-copyable
+
 	Bool valueExists(MutatorValue v) const
 	{
 		for(MutatorValue x : m_values)

+ 1 - 1
AnKi/Resource/TransferGpuAllocator.cpp

@@ -63,7 +63,7 @@ public:
 
 	U32 getMaxAlignment() final
 	{
-		return 16;
+		return TransferGpuAllocator::GPU_BUFFER_ALIGNMENT;
 	}
 };
 

+ 4 - 0
AnKi/Resource/TransferGpuAllocator.h

@@ -88,6 +88,10 @@ class TransferGpuAllocator
 	friend class TransferGpuAllocatorHandle;
 
 public:
+	/// Choose an alignment that satisfies 16 bytes and 3 bytes. RGB8 formats require 3 bytes alignment for the source
+	/// of the buffer to image copies.
+	static constexpr U32 GPU_BUFFER_ALIGNMENT = 16 * 3;
+
 	static const U32 FRAME_COUNT = 3;
 	static const PtrSize CHUNK_INITIAL_SIZE = 64_MB;
 	static constexpr Second MAX_FENCE_WAIT_TIME = 500.0_ms;

+ 1 - 1
AnKi/Scene/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB_RECURSE SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 3 - 0
AnKi/Scene/Components/GpuParticleEmitterComponent.cpp

@@ -71,6 +71,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 		props->m_maxStartingPosition = inProps.m_particle.m_maxStartingPosition;
 		props->m_particleCount = inProps.m_maxNumOfParticles;
 
+		m_propsBuff->flush(0, MAX_PTR_SIZE);
 		m_propsBuff->unmap();
 	}
 
@@ -90,6 +91,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 			particle->m_life = -1.0f; // Force GPU to init the particle
 		}
 
+		m_particlesBuff->flush(0, MAX_PTR_SIZE);
 		m_particlesBuff->unmap();
 	}
 
@@ -112,6 +114,7 @@ Error GpuParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 			*randFactors = getRandomRange(0.0f, 1.0f);
 		}
 
+		m_randFactorsBuff->flush(0, MAX_PTR_SIZE);
 		m_randFactorsBuff->unmap();
 	}
 

+ 2 - 0
AnKi/Scene/DebugDrawer.cpp

@@ -89,6 +89,7 @@ Error DebugDrawer2::init(ResourceManager* rsrcManager)
 		verts[6] = Vec3(-size, -size, -size); // back bottom left
 		verts[7] = Vec3(size, -size, -size); // back bottom right
 
+		m_cubePositionsBuffer->flush(0, MAX_PTR_SIZE);
 		m_cubePositionsBuffer->unmap();
 	}
 
@@ -131,6 +132,7 @@ Error DebugDrawer2::init(ResourceManager* rsrcManager)
 		indices[indexCount++] = 3;
 		indices[indexCount++] = 7;
 
+		m_cubeIndicesBuffer->flush(0, MAX_PTR_SIZE);
 		m_cubeIndicesBuffer->unmap();
 	}
 

+ 12 - 2
AnKi/Scene/Octree.h

@@ -36,7 +36,7 @@ public:
 };
 
 /// Octree for visibility tests.
-class Octree : public NonCopyable
+class Octree
 {
 	friend class OctreePlaceable;
 
@@ -46,8 +46,12 @@ public:
 	{
 	}
 
+	Octree(const Octree&) = delete; // Non-copyable
+
 	~Octree();
 
+	Octree& operator=(const Octree&) = delete; // Non-copyable
+
 	void init(const Vec3& sceneAabbMin, const Vec3& sceneAabbMax, U32 maxDepth);
 
 	/// Place or re-place an element in the tree.
@@ -281,13 +285,19 @@ private:
 };
 
 /// An entity that can be placed in octrees.
-class OctreePlaceable : public NonCopyable
+class OctreePlaceable
 {
 	friend class Octree;
 
 public:
 	void* m_userData = nullptr;
 
+	OctreePlaceable() = default;
+
+	OctreePlaceable(const OctreePlaceable&) = delete; // Non-copyable
+
+	OctreePlaceable& operator=(const OctreePlaceable&) = delete; // Non-copyable
+
 	void reset()
 	{
 		m_visitedMask.setNonAtomically(0);

+ 1 - 1
AnKi/Script/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB_RECURSE SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 6 - 1
AnKi/Script/LuaBinder.h

@@ -149,12 +149,17 @@ public:
 };
 
 /// Lua binder class. A wrapper on top of LUA
-class LuaBinder : public NonCopyable
+class LuaBinder
 {
 public:
 	LuaBinder();
+
+	LuaBinder(const LuaBinder&) = delete; // Non-copyable
+
 	~LuaBinder();
 
+	LuaBinder& operator=(const LuaBinder&) = delete; // Non-copyable
+
 	ANKI_USE_RESULT Error init(ScriptAllocator alloc, LuaBinderOtherSystems* otherSystems);
 
 	lua_State* getLuaState()

+ 1 - 1
AnKi/ShaderCompiler/CMakeLists.txt

@@ -1,2 +1,2 @@
 file(GLOB SOURCES *.cpp)
-addAnkiSourceFiles(${SOURCES})
+anki_add_source_files(${SOURCES})

+ 7 - 4
AnKi/ShaderCompiler/Glslang.cpp

@@ -235,6 +235,7 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 
 	StringAuto prettySrc(alloc);
 	StringListAuto lines(alloc);
+	StringAuto errorLineTxt(alloc);
 
 	static const char* padding = "==============================================================================";
 
@@ -249,6 +250,7 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 		if(!it->isEmpty() && lineno == errorLineNumber)
 		{
 			tmp.sprintf(">>%8u: %s\n", lineno, &(*it)[0]);
+			errorLineTxt.sprintf("%s", &(*it)[0]);
 		}
 		else if(!it->isEmpty())
 		{
@@ -262,8 +264,9 @@ static ANKI_USE_RESULT Error logShaderErrorCode(CString error, CString source, G
 		prettySrc.append(tmp);
 	}
 
-	ANKI_SHADER_COMPILER_LOGE("Shader compilation failed:\n%s\n%s\n%s\n%s\n%s\n%s", padding, &error[0], padding,
-							  &prettySrc[0], padding, &error[0]);
+	ANKI_SHADER_COMPILER_LOGE("Shader compilation failed:\n%s\n%s\nIn: %s\n%s\n%s\n%s\n%s\nIn: %s\n", padding,
+							  &error[0], errorLineTxt.cstr(), padding, &prettySrc[0], padding, &error[0],
+							  errorLineTxt.cstr());
 
 	return Error::NONE;
 }
@@ -318,8 +321,8 @@ Error compilerGlslToSpirv(CString src, ShaderType shaderType, GenericMemoryPoolA
 	glslang::TShader shader(stage);
 	Array<const char*, 1> csrc = {&src[0]};
 	shader.setStrings(&csrc[0], 1);
-	shader.setEnvClient(glslang::EShClientVulkan, glslang::EShTargetVulkan_1_2);
-	shader.setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_5);
+	shader.setEnvClient(glslang::EShClientVulkan, glslang::EShTargetVulkan_1_1);
+	shader.setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_4);
 	if(!shader.parse(&GLSLANG_LIMITS, 100, false, messages))
 	{
 		ANKI_CHECK(logShaderErrorCode(shader.getInfoLog(), src, tmpAlloc));

+ 5 - 1
AnKi/ShaderCompiler/ShaderProgramCompiler.h

@@ -19,7 +19,7 @@ extern const U32 SHADER_BINARY_VERSION;
 
 /// A wrapper over the POD ShaderProgramBinary class.
 /// @memberof ShaderProgramCompiler
-class ShaderProgramBinaryWrapper : public NonCopyable
+class ShaderProgramBinaryWrapper
 {
 	friend Error compileShaderProgramInternal(CString fname, ShaderProgramFilesystemInterface& fsystem,
 											  ShaderProgramPostParseInterface* postParseCallback,
@@ -34,11 +34,15 @@ public:
 	{
 	}
 
+	ShaderProgramBinaryWrapper(const ShaderProgramBinaryWrapper&) = delete; // Non-copyable
+
 	~ShaderProgramBinaryWrapper()
 	{
 		cleanup();
 	}
 
+	ShaderProgramBinaryWrapper& operator=(const ShaderProgramBinaryWrapper&) = delete; // Non-copyable
+
 	ANKI_USE_RESULT Error serializeToFile(CString fname) const;
 
 	ANKI_USE_RESULT Error deserializeFromFile(CString fname);

+ 39 - 22
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -22,6 +22,11 @@ static const Array<CString, U32(ShaderType::COUNT)> SHADER_STAGE_NAMES = {
 
 static const char* SHADER_HEADER = R"(#version 460 core
 #define ANKI_%s_SHADER 1
+#define ANKI_OS_ANDROID %d
+#define ANKI_OS_WINDOWS %d
+#define ANKI_OS_LINUX %d
+
+#define _ANKI_SUPPORTS_64BIT !ANKI_OS_ANDROID
 
 #define gl_VertexID gl_VertexIndex
 
@@ -47,18 +52,21 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
+
+#if _ANKI_SUPPORTS_64BIT
+#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #extension GL_EXT_shader_atomic_int64 : enable
 #extension GL_EXT_shader_subgroup_extended_types_int64 : enable
+#endif
 
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_scalar_block_layout : enable
 
-#define ANKI_MAX_BINDLESS_TEXTURES %u
-#define ANKI_MAX_BINDLESS_IMAGES %u
+#define ANKI_MAX_BINDLESS_TEXTURES %uu
+#define ANKI_MAX_BINDLESS_IMAGES %uu
 
 #if defined(ANKI_RAY_GEN_SHADER) || defined(ANKI_ANY_HIT_SHADER) || defined(ANKI_CLOSEST_HIT_SHADER) || defined(ANKI_MISS_SHADER) || defined(ANKI_INTERSECTION_SHADER) || defined(ANKI_CALLABLE_SHADER)
 #	extension GL_EXT_ray_tracing : enable
@@ -144,23 +152,25 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #define IVec4 ivec4
 #define _ANKI_SIZEOF_ivec4 16u
 
-#define U64 uint64_t
-#define _ANKI_SIZEOF_uint64_t 8u
-#define U64Vec2 u64vec2
-#define _ANKI_SIZEOF_u64vec2 16u
-#define U64Vec3 u64vec3
-#define _ANKI_SIZEOF_u64vec3 24u
-#define U64Vec4 u64vec4
-#define _ANKI_SIZEOF_u64vec4 32u
-
-#define I64 int64_t
-#define _ANKI_SIZEOF_int64_t 8u
-#define I64Vec2 i64vec2
-#define _ANKI_SIZEOF_i64vec2 16u
-#define I64Vec3 i64vec3
-#define _ANKI_SIZEOF_i64vec3 24u
-#define I64Vec4 i64vec4
-#define _ANKI_SIZEOF_i64vec4 32u
+#if _ANKI_SUPPORTS_64BIT
+#	define U64 uint64_t
+#	define _ANKI_SIZEOF_uint64_t 8u
+#	define U64Vec2 u64vec2
+#	define _ANKI_SIZEOF_u64vec2 16u
+#	define U64Vec3 u64vec3
+#	define _ANKI_SIZEOF_u64vec3 24u
+#	define U64Vec4 u64vec4
+#	define _ANKI_SIZEOF_u64vec4 32u
+
+#	define I64 int64_t
+#	define _ANKI_SIZEOF_int64_t 8u
+#	define I64Vec2 i64vec2
+#	define _ANKI_SIZEOF_i64vec2 16u
+#	define I64Vec3 i64vec3
+#	define _ANKI_SIZEOF_i64vec3 24u
+#	define I64Vec4 i64vec4
+#	define _ANKI_SIZEOF_i64vec4 32u
+#endif
 
 #define Mat3 mat3
 
@@ -172,6 +182,13 @@ static const char* SHADER_HEADER = R"(#version 460 core
 
 #define Bool bool
 
+#if _ANKI_SUPPORTS_64BIT
+#	define Address U64
+#else
+#	define Address UVec2
+#endif
+#define _ANKI_SIZEOF_Address 8u
+
 #define _ANKI_CONCATENATE(a, b) a##b
 #define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
 
@@ -912,8 +929,8 @@ Error ShaderProgramParser::parse()
 void ShaderProgramParser::generateAnkiShaderHeader(ShaderType shaderType, const ShaderCompilerOptions& compilerOptions,
 												   StringAuto& header)
 {
-	header.sprintf(SHADER_HEADER, SHADER_STAGE_NAMES[shaderType].cstr(),
-				   compilerOptions.m_bindlessLimits.m_bindlessTextureCount,
+	header.sprintf(SHADER_HEADER, SHADER_STAGE_NAMES[shaderType].cstr(), ANKI_OS_ANDROID, ANKI_OS_WINDOWS,
+				   ANKI_OS_LINUX, compilerOptions.m_bindlessLimits.m_bindlessTextureCount,
 				   compilerOptions.m_bindlessLimits.m_bindlessImageCount);
 }
 

+ 5 - 1
AnKi/ShaderCompiler/ShaderProgramParser.h

@@ -87,14 +87,18 @@ private:
 /// #pragma anki ray_type NUMBER
 ///
 /// Only the "anki input" should be in an ifdef-like guard. For everything else it's ignored.
-class ShaderProgramParser : public NonCopyable
+class ShaderProgramParser
 {
 public:
 	ShaderProgramParser(CString fname, ShaderProgramFilesystemInterface* fsystem, GenericMemoryPoolAllocator<U8> alloc,
 						const ShaderCompilerOptions& compilerOptions);
 
+	ShaderProgramParser(const ShaderProgramParser&) = delete; // Non-copyable
+
 	~ShaderProgramParser();
 
+	ShaderProgramParser& operator=(const ShaderProgramParser&) = delete; // Non-copyable
+
 	/// Parse the file and its includes.
 	ANKI_USE_RESULT Error parse();
 

+ 3 - 6
AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

@@ -23,15 +23,12 @@ void main()
 {
 	const UVec2 cubeSizeu = UVec2(imageSize(u_cubeTex));
 	const Vec2 cubeSize = Vec2(cubeSizeu);
-	if(gl_GlobalInvocationID.x >= cubeSizeu.x || gl_GlobalInvocationID.y >= cubeSizeu.y)
-	{
-		return;
-	}
+	const UVec2 globalInvocationID = min(gl_GlobalInvocationID.xy, cubeSizeu - 1u);
 
 	const U32 faceIdx = gl_LocalInvocationID.z;
 
 	// Compute the UVs to read the gbuffer from
-	Vec2 sampleUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(cubeSize);
+	Vec2 sampleUv = (Vec2(globalInvocationID) + 0.5) / Vec2(cubeSize);
 	sampleUv.x *= (1.0 / 6.0);
 	sampleUv.x += (1.0 / 6.0) * F32(faceIdx);
 
@@ -48,7 +45,7 @@ void main()
 	const Vec3 indirect = gbuffer.m_diffuse * irradiance;
 
 	// Read the prev color and apply indirect
-	const IVec3 coords = IVec3(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, faceIdx);
+	const IVec3 coords = IVec3(globalInvocationID, faceIdx);
 	const Vec3 prevColor = imageLoad(u_cubeTex, coords).xyz;
 	const Vec3 prevColorWithIndirectDiffuse = prevColor + gbuffer.m_diffuse * indirect;
 

Some files were not shown because too many files changed in this diff