Browse Source

Replaced std::shared_ptr with dsr::Handle and introduced textures as separate types.

David Piuva 10 months ago
parent
commit
5a2eee2f5f
100 changed files with 6652 additions and 5662 deletions
  1. 3 3
      Doc/Buffers.html
  2. 3 3
      Doc/Generator/Input/Buffers.txt
  3. 25 41
      Doc/Generator/Input/Troubleshooting.txt
  4. 32 45
      Doc/Troubleshooting.html
  5. 41 1
      Source/DFPSR/History.txt
  6. 1 1
      Source/DFPSR/License.txt
  7. 42 114
      Source/DFPSR/api/bufferAPI.cpp
  8. 21 37
      Source/DFPSR/api/bufferAPI.h
  9. 895 65
      Source/DFPSR/api/drawAPI.cpp
  10. 74 29
      Source/DFPSR/api/drawAPI.h
  11. 3 3
      Source/DFPSR/api/fileAPI.cpp
  12. 2 1
      Source/DFPSR/api/fileAPI.h
  13. 769 50
      Source/DFPSR/api/filterAPI.cpp
  14. 18 12
      Source/DFPSR/api/filterAPI.h
  15. 0 4
      Source/DFPSR/api/fontAPI.cpp
  16. 7 3
      Source/DFPSR/api/fontAPI.h
  17. 21 21
      Source/DFPSR/api/guiAPI.cpp
  18. 24 2
      Source/DFPSR/api/guiAPI.h
  19. 185 462
      Source/DFPSR/api/imageAPI.cpp
  20. 390 156
      Source/DFPSR/api/imageAPI.h
  21. 14 12
      Source/DFPSR/api/mediaMachineAPI.cpp
  22. 10 1
      Source/DFPSR/api/mediaMachineAPI.h
  23. 29 35
      Source/DFPSR/api/modelAPI.cpp
  24. 19 9
      Source/DFPSR/api/modelAPI.h
  25. 216 315
      Source/DFPSR/api/stringAPI.cpp
  26. 209 96
      Source/DFPSR/api/stringAPI.h
  27. 126 0
      Source/DFPSR/api/textureAPI.cpp
  28. 530 0
      Source/DFPSR/api/textureAPI.h
  29. 0 55
      Source/DFPSR/api/types.cpp
  30. 0 214
      Source/DFPSR/api/types.h
  31. 131 0
      Source/DFPSR/base/DsrTraits.h
  32. 371 0
      Source/DFPSR/base/Handle.h
  33. 111 62
      Source/DFPSR/base/SafePointer.cpp
  34. 114 134
      Source/DFPSR/base/SafePointer.h
  35. 5 0
      Source/DFPSR/base/endian.h
  36. 350 73
      Source/DFPSR/base/heap.cpp
  37. 106 12
      Source/DFPSR/base/heap.h
  38. 25 15
      Source/DFPSR/base/memory.h
  39. 112 0
      Source/DFPSR/base/noSimd.h
  40. 454 35
      Source/DFPSR/base/simd.h
  41. 7 0
      Source/DFPSR/base/simd3D.h
  42. 21 12
      Source/DFPSR/base/virtualStack.cpp
  43. 13 13
      Source/DFPSR/base/virtualStack.h
  44. 2 2
      Source/DFPSR/font/Font.cpp
  45. 6 3
      Source/DFPSR/font/Font.h
  46. 0 1
      Source/DFPSR/gui/BackendWindow.h
  47. 11 11
      Source/DFPSR/gui/DsrWindow.cpp
  48. 10 12
      Source/DFPSR/gui/DsrWindow.h
  49. 50 59
      Source/DFPSR/gui/VisualComponent.cpp
  50. 12 11
      Source/DFPSR/gui/VisualComponent.h
  51. 9 9
      Source/DFPSR/gui/VisualTheme.cpp
  52. 6 0
      Source/DFPSR/gui/VisualTheme.h
  53. 1 1
      Source/DFPSR/gui/components/Button.cpp
  54. 1 1
      Source/DFPSR/gui/components/Label.cpp
  55. 3 3
      Source/DFPSR/gui/components/Menu.cpp
  56. 3 3
      Source/DFPSR/gui/components/TextBox.cpp
  57. 0 103
      Source/DFPSR/image/Color.cpp
  58. 97 43
      Source/DFPSR/image/Color.h
  59. 263 30
      Source/DFPSR/image/Image.h
  60. 0 39
      Source/DFPSR/image/ImageF32.cpp
  61. 0 44
      Source/DFPSR/image/ImageF32.h
  62. 3 3
      Source/DFPSR/image/ImageLoader.h
  63. 0 378
      Source/DFPSR/image/ImageRgbaU8.cpp
  64. 0 137
      Source/DFPSR/image/ImageRgbaU8.h
  65. 0 39
      Source/DFPSR/image/ImageU16.cpp
  66. 0 45
      Source/DFPSR/image/ImageU16.h
  67. 0 39
      Source/DFPSR/image/ImageU8.cpp
  68. 0 45
      Source/DFPSR/image/ImageU8.h
  69. 120 119
      Source/DFPSR/image/PackOrder.h
  70. 106 0
      Source/DFPSR/image/Texture.h
  71. 0 1576
      Source/DFPSR/image/draw.cpp
  72. 0 94
      Source/DFPSR/image/draw.h
  73. 0 82
      Source/DFPSR/image/internal/imageInternal.h
  74. 0 65
      Source/DFPSR/image/internal/imageTemplate.h
  75. 3 1
      Source/DFPSR/image/stbImage/stbImageWrapper.cpp
  76. 2 3
      Source/DFPSR/image/stbImage/stbImageWrapper.h
  77. 1 0
      Source/DFPSR/includeFramework.h
  78. 3 3
      Source/DFPSR/machine/VirtualMachine.cpp
  79. 4 4
      Source/DFPSR/machine/VirtualMachine.h
  80. 7 8
      Source/DFPSR/machine/mediaFilters.cpp
  81. 2 1
      Source/DFPSR/machine/mediaFilters.h
  82. 31 23
      Source/DFPSR/math/scalar.h
  83. 13 13
      Source/DFPSR/persistent/ClassFactory.cpp
  84. 15 19
      Source/DFPSR/persistent/ClassFactory.h
  85. 45 9
      Source/DFPSR/render/ResourcePool.cpp
  86. 17 11
      Source/DFPSR/render/ResourcePool.h
  87. 31 40
      Source/DFPSR/render/model/Model.cpp
  88. 12 14
      Source/DFPSR/render/model/Model.h
  89. 18 20
      Source/DFPSR/render/renderCore.cpp
  90. 6 14
      Source/DFPSR/render/renderCore.h
  91. 60 52
      Source/DFPSR/render/shader/RgbaMultiply.h
  92. 7 7
      Source/DFPSR/render/shader/Shader.h
  93. 19 37
      Source/DFPSR/render/shader/fillerTemplates.h
  94. 38 164
      Source/DFPSR/render/shader/shaderMethods.h
  95. 40 31
      Source/DFPSR/render/shader/shaderTypes.h
  96. 25 25
      Source/DFPSR/settings.h
  97. 7 7
      Source/SDK/SpriteEngine/lightAPI.cpp
  98. 4 4
      Source/SDK/SpriteEngine/spriteAPI.cpp
  99. 8 7
      Source/SDK/terrain/main.cpp
  100. 2 2
      Source/templates/basic3D/main.cpp

+ 3 - 3
Doc/Buffers.html

@@ -25,7 +25,7 @@ A:active { color: #FFFFFF; background: #444444; }
 <A href="Manual.html">Back to main page</A>
 </P><P>
 </P><H1> Buffers</H1><P>Every file that is saved or loaded in the framework will pass through a Buffer.
-Buffers can not refer to each other in cycles and are automatically reference counted and deleted, so that you don't have to worry about memory leaks from them unless you explicitly call buffer_replaceDestructor.
+Buffers can not refer to each other in cycles and are automatically reference counted and deleted, so that you don't have to worry about memory leaks unless something holding a buffer creates a cycle of handles.
 They store a fixed size allocation of memory padded and aligned with DSR_MAXIMUM_ALIGNMENT bytes to work well with the largest SIMD vectors without false sharing of cache lines between threads.
 
 </P><P>
@@ -41,13 +41,13 @@ The memory always start initialized to zero, which prevents random bugs.
 
 </P><P>
 If you create a buffer of size zero, it will allocate the head but not the data.
-Trying to clone an empty buffer head will just return the same handle without cloning, because empty buffers are immutable.
+Trying to clone an empty buffer will just return the same handle without cloning, because empty buffers are immutable.
 </P><IMG SRC="Images/Border.png"><P>
 </P><H2> Read and write data access</H2><P>
 </P><P>
 Trying to get the pointer of a non-existing or zero length Buffer will safely return a null pointer, no matter if you use buffer_getSafeData<type>(buffer, "Buffer name") or buffer_dangerous_getUnsafeData(buffer).
 You access the data by getting a SafePointer, which can later be sliced into smaller parts.
-Sometimes you can't use the SafePointer because an operating system wants a regular C pointer.
+Sometimes you can't use the SafePointer because an operating system wants a regular pointer.
 </P><IMG SRC="Images/Border.png"><P>
 </P>
 </BODY> </HTML>

+ 3 - 3
Doc/Generator/Input/Buffers.txt

@@ -2,7 +2,7 @@
 
 Title: Buffers
 Every file that is saved or loaded in the framework will pass through a Buffer.
-Buffers can not refer to each other in cycles and are automatically reference counted and deleted, so that you don't have to worry about memory leaks from them unless you explicitly call buffer_replaceDestructor.
+Buffers can not refer to each other in cycles and are automatically reference counted and deleted, so that you don't have to worry about memory leaks unless something holding a buffer creates a cycle of handles.
 They store a fixed size allocation of memory padded and aligned with DSR_MAXIMUM_ALIGNMENT bytes to work well with the largest SIMD vectors without false sharing of cache lines between threads.
 
 ---
@@ -15,11 +15,11 @@ To create a buffer that actually stores something, call buffer_create with the n
 The memory always start initialized to zero, which prevents random bugs.
 
 If you create a buffer of size zero, it will allocate the head but not the data.
-Trying to clone an empty buffer head will just return the same handle without cloning, because empty buffers are immutable.
+Trying to clone an empty buffer will just return the same handle without cloning, because empty buffers are immutable.
 ---
 Title2: Read and write data access
 
 Trying to get the pointer of a non-existing or zero length Buffer will safely return a null pointer, no matter if you use buffer_getSafeData<type>(buffer, "Buffer name") or buffer_dangerous_getUnsafeData(buffer).
 You access the data by getting a SafePointer, which can later be sliced into smaller parts.
-Sometimes you can't use the SafePointer because an operating system wants a regular C pointer.
+Sometimes you can't use the SafePointer because an operating system wants a regular pointer.
 ---

+ 25 - 41
Doc/Generator/Input/Troubleshooting.txt

@@ -2,31 +2,31 @@
 
 Title: Troubleshooting
 
-When using a specific framework, the common mistakes and solutions are usually very similar.
-This guide explains both the steps for finding your bugs, and reducing the risk of them comming back.
----
-Title2: To use or not use an IDE with a built-in debugger
-
 *
-If your low-level code is crashing often from advanced optimizations,
-you might need an IDE with a built-in debugger to quickly show where the crash happened.
-The IDE integration can then directly point to the code instead of showing line numbers from a separate debugger.
+If your program crashes with segmentation faults, start by replacing pointers with SafePointer and building the program in debug mode.
 
-*
-If your high-level code only crashes rarely but the amount of pixel data is too much for a debugger,
-create a debug window showing internal images or debug overlays with coordinates on top of the program's existing graphics.
+In the Builder build system, debug mode is activated by writing 'Debug' in the *.DsrProj project file, which declares the Debug variable and assigns it to one.
+
+For other build systems, give -DDEBUG to the compiler to define the DEBUG macro.
+Make sure that the release flag -DNDEBUG is not also active.
+
+Then memory.h will enable the SAFE_POINTER_CHECKS macro from detecting debug mode.
+Then SafePointer.h will store the permitted region in each SafePointer and perform bound checks when data is accessed using SafePointer.
 
 ---
-Title2: Finding the cause of bugs takes too long.
+Title2: Getting random memory crashes.
 
 *
-Unless you are profiling, test in debug mode using the -DDEBUG compiler flag.
-This catches bugs earlier with more information about the crash.
-Make sure that the release flag -DNDEBUG is not also active.
+If your program is getting random memory corruption despite using SafePointer and debug mode, continue by enabling the EXTRA_SAFE_POINTER_CHECKS macro.
+
+In the Builder build system, extra safe memory checks can be enabled for debug mode by writing 'CompilerFlag "-DEXTRA_SAFE_POINTER_CHECKS"' in the *.DsrProj project file.
+EXTRA_SAFE_POINTER_CHECKS can also be defined as a macro in settings.h or by giving -DEXTRA_SAFE_POINTER_CHECKS to a different build system.
+
+Then SafePointer will check that the allocation has not been replaced by another allocation in heap.cpp.
+SafePointer will also check that no thread is trying to access virtual stack memory allocated by another thread.
 
 *
-If using raw pointers, you might want to replace them with the SafePointer class to get tighter bound checks in debug mode.
-Debuggers will wait until your bugs write outside of the whole allocation before throwing an error.
+If your program does not crash but your image filters do not work as expected, create a debug window showing internal images or debug overlays with coordinates on top of the program's existing graphics.
 
 *
 Make sure that all your multi-threading can be turned off easily when finding the root cause.
@@ -36,11 +36,16 @@ Create a basic reference implementation without dangerous optimizations for ever
 Both for finding the cause of instability and being able to remove a feature without sending emergency patches in panic with more bugs.
 Image filters are first written using lambdas returning the color of a pixel based on the pixel coordinate and exception-free pixel sampling.
 Then one can make an optimized version using SafePointer and SIMD vectors.
+
 ---
 Title2: Getting memory leaks.
 
+In debug mode, terminating the program should print "All heap memory was freed without leaks.".
+If it does not, you might have a memory leak for memory allocated by the framework in heap.cpp.
+Due to the non-deterministic release order for global variables in C++, it is not possible to print a warning when there is a memory leak without redefining the _start function.
+
 *
-Avoid using manual memory management (malloc, free, new, delete...), because it is a waste of time unless you are writing a new abstraction layer.
+Avoid using manual memory management (malloc, free, new, delete...), use dsr::Handle for object handles.
 
 *
 Make sure that no reference counted object can create a cycle of reference counted pointers back to itself, because then none of them would be unused according to reference counting.
@@ -49,40 +54,19 @@ Make sure that no reference counted object can create a cycle of reference count
 Use the dsr::Buffer object instead of C allocation calls, to let it automatically free your memory when nobody keeps the reference counted handle.
 You can then work on its memory using SafePointer, which provides bound checks in debug mode, but must be kept close to the buffer's reference counted handle to keep the data it points to alive.
 <- Buffers.html | Read about the Buffer API
----
-Title2: Getting random memory crashes.
-
-*
-Check that you are not using raw C pointers by searching for any use of &, *, [] in the code and replacing them with SafePointer to get bound checks.
-
-*
-Make sure that you are using debug mode, so that outside access with SafePointer is caught with error messages.
-
-*
-Make sure that no SafePointer outlives the parent Buffer, because SafePointer is not reference counting on its own.
-If SafePointer would be reference counting, it would not be a zero overhead substitution for raw C pointers in release mode, and nobody would use it for optimizations.
 
 *
 Remember that a reference in C++ is a pointer under the C++ syntax, which can also cause crashes if taken from a location in memory that may be freed during the call.
-If you passed "const ReadableString &text" from "List<String>" to a function that can reallocate the list that the string is stored in, this can cause a crash by referring to a memory location that got replaced by the list.
-If you instead pass "ReadableString text" from "List<String>", no additional heap allocations will be made, but activating reference counting makes sure that the string can be passed around independently from where it came from without causing any crashes.
+If you passed "const SomeClass &object" from "List<SomeClass>" to a function that can reallocate the list that the object is stored in, this can cause a crash by referring to a memory location that got replaced by the list.
+If you instead pass "SomeClass object" from "List<SomeClass>", the object will be copied in the call instead of referring to freed memory.
 Returning a reference to a stack allocated variable, can also cause crashes with references.
 
-*
-Assert bounds with assertions in debug mode for fixed size and variable length (VLA) arrays, which have no bound checks in C/C++ but are much faster than heap memory for many small allocations.
-Getters and setters can make the bound checks reusable if you only have a few types with fixed size arrays.
 ---
 Title2: The application crashes, but the debugger does not detect it.
 
 Use a debugger directly on the application with debug symbols enabled when compiling.
 Connecting Valgrind to the script used to run your application will catch memory leaks, but not invalid memory access.
 Without debug symbols, you can see which method crashed, but not the line.
-
----
-Title2: Getting crashes after linking to an external library.
-
-Try disabling the library's memory recycling, by either removing the ReuseMemory flag in your DsrProj build script (cleanest way) or adding the -DDISABLE_ALLOCATOR compiler flag to define the DISABLE_ALLOCATOR macro (works with other build systems).
-This will disable the library's memory recycling at DFPSR/base/allocator.cpp in case that another library already has a memory recycler.
 ---
 
 Title2: Getting linker errors when creating a new project without a window.

+ 32 - 45
Doc/Troubleshooting.html

@@ -26,34 +26,38 @@ A:active { color: #FFFFFF; background: #444444; }
 </P><P>
 </P><H1> Troubleshooting</H1><P>
 </P><P>
-When using a specific framework, the common mistakes and solutions are usually very similar.
-This guide explains both the steps for finding your bugs, and reducing the risk of them comming back.
-</P><IMG SRC="Images/Border.png"><P>
-</P><H2> To use or not use an IDE with a built-in debugger</H2><P>
-</P><P>
 <IMG SRC="Images/SmallDot.png">
-If your low-level code is crashing often from advanced optimizations,
-you might need an IDE with a built-in debugger to quickly show where the crash happened.
-The IDE integration can then directly point to the code instead of showing line numbers from a separate debugger.
+If your program crashes with segmentation faults, start by replacing pointers with SafePointer and building the program in debug mode.
 
 </P><P>
-<IMG SRC="Images/SmallDot.png">
-If your high-level code only crashes rarely but the amount of pixel data is too much for a debugger,
-create a debug window showing internal images or debug overlays with coordinates on top of the program's existing graphics.
+In the Builder build system, debug mode is activated by writing 'Debug' in the *.DsrProj project file, which declares the Debug variable and assigns it to one.
+
+</P><P>
+For other build systems, give -DDEBUG to the compiler to define the DEBUG macro.
+Make sure that the release flag -DNDEBUG is not also active.
+
+</P><P>
+Then memory.h will enable the SAFE_POINTER_CHECKS macro from detecting debug mode.
+Then SafePointer.h will store the permitted region in each SafePointer and perform bound checks when data is accessed using SafePointer.
 
 </P><P>
 </P><IMG SRC="Images/Border.png"><P>
-</P><H2> Finding the cause of bugs takes too long.</H2><P>
+</P><H2> Getting random memory crashes.</H2><P>
 </P><P>
 <IMG SRC="Images/SmallDot.png">
-Unless you are profiling, test in debug mode using the -DDEBUG compiler flag.
-This catches bugs earlier with more information about the crash.
-Make sure that the release flag -DNDEBUG is not also active.
+If your program is getting random memory corruption despite using SafePointer and debug mode, continue by enabling the EXTRA_SAFE_POINTER_CHECKS macro.
+
+</P><P>
+In the Builder build system, extra safe memory checks can be enabled for debug mode by writing 'CompilerFlag "-DEXTRA_SAFE_POINTER_CHECKS"' in the *.DsrProj project file.
+EXTRA_SAFE_POINTER_CHECKS can also be defined as a macro in settings.h or by giving -DEXTRA_SAFE_POINTER_CHECKS to a different build system.
+
+</P><P>
+Then SafePointer will check that the allocation has not been replaced by another allocation in heap.cpp.
+SafePointer will also check that no thread is trying to access virtual stack memory allocated by another thread.
 
 </P><P>
 <IMG SRC="Images/SmallDot.png">
-If using raw pointers, you might want to replace them with the SafePointer class to get tighter bound checks in debug mode.
-Debuggers will wait until your bugs write outside of the whole allocation before throwing an error.
+If your program does not crash but your image filters do not work as expected, create a debug window showing internal images or debug overlays with coordinates on top of the program's existing graphics.
 
 </P><P>
 <IMG SRC="Images/SmallDot.png">
@@ -65,11 +69,18 @@ Create a basic reference implementation without dangerous optimizations for ever
 Both for finding the cause of instability and being able to remove a feature without sending emergency patches in panic with more bugs.
 Image filters are first written using lambdas returning the color of a pixel based on the pixel coordinate and exception-free pixel sampling.
 Then one can make an optimized version using SafePointer and SIMD vectors.
+
+</P><P>
 </P><IMG SRC="Images/Border.png"><P>
 </P><H2> Getting memory leaks.</H2><P>
+</P><P>
+In debug mode, terminating the program should print "All heap memory was freed without leaks.".
+If it does not, you might have a memory leak for memory allocated by the framework in heap.cpp.
+Due to the non-deterministic release order for global variables in C++, it is not possible to print a warning when there is a memory leak without redefining the _start function.
+
 </P><P>
 <IMG SRC="Images/SmallDot.png">
-Avoid using manual memory management (malloc, free, new, delete...), because it is a waste of time unless you are writing a new abstraction layer.
+Avoid using manual memory management (malloc, free, new, delete...), use dsr::Handle for object handles.
 
 </P><P>
 <IMG SRC="Images/SmallDot.png">
@@ -79,45 +90,21 @@ Make sure that no reference counted object can create a cycle of reference count
 <IMG SRC="Images/SmallDot.png">
 Use the dsr::Buffer object instead of C allocation calls, to let it automatically free your memory when nobody keeps the reference counted handle.
 You can then work on its memory using SafePointer, which provides bound checks in debug mode, but must be kept close to the buffer's reference counted handle to keep the data it points to alive.
-<A href="Buffers.html">Read about the Buffer API</A></P><IMG SRC="Images/Border.png"><P>
-</P><H2> Getting random memory crashes.</H2><P>
-</P><P>
-<IMG SRC="Images/SmallDot.png">
-Check that you are not using raw C pointers by searching for any use of &, *, [] in the code and replacing them with SafePointer to get bound checks.
-
-</P><P>
-<IMG SRC="Images/SmallDot.png">
-Make sure that you are using debug mode, so that outside access with SafePointer is caught with error messages.
-
-</P><P>
-<IMG SRC="Images/SmallDot.png">
-Make sure that no SafePointer outlives the parent Buffer, because SafePointer is not reference counting on its own.
-If SafePointer would be reference counting, it would not be a zero overhead substitution for raw C pointers in release mode, and nobody would use it for optimizations.
-
+<A href="Buffers.html">Read about the Buffer API</A>
 </P><P>
 <IMG SRC="Images/SmallDot.png">
 Remember that a reference in C++ is a pointer under the C++ syntax, which can also cause crashes if taken from a location in memory that may be freed during the call.
-If you passed "const ReadableString &text" from "List<String>" to a function that can reallocate the list that the string is stored in, this can cause a crash by referring to a memory location that got replaced by the list.
-If you instead pass "ReadableString text" from "List<String>", no additional heap allocations will be made, but activating reference counting makes sure that the string can be passed around independently from where it came from without causing any crashes.
+If you passed "const SomeClass &object" from "List<SomeClass>" to a function that can reallocate the list that the object is stored in, this can cause a crash by referring to a memory location that got replaced by the list.
+If you instead pass "SomeClass object" from "List<SomeClass>", the object will be copied in the call instead of referring to freed memory.
 Returning a reference to a stack allocated variable, can also cause crashes with references.
 
 </P><P>
-<IMG SRC="Images/SmallDot.png">
-Assert bounds with assertions in debug mode for fixed size and variable length (VLA) arrays, which have no bound checks in C/C++ but are much faster than heap memory for many small allocations.
-Getters and setters can make the bound checks reusable if you only have a few types with fixed size arrays.
 </P><IMG SRC="Images/Border.png"><P>
 </P><H2> The application crashes, but the debugger does not detect it.</H2><P>
 </P><P>
 Use a debugger directly on the application with debug symbols enabled when compiling.
 Connecting Valgrind to the script used to run your application will catch memory leaks, but not invalid memory access.
 Without debug symbols, you can see which method crashed, but not the line.
-
-</P><P>
-</P><IMG SRC="Images/Border.png"><P>
-</P><H2> Getting crashes after linking to an external library.</H2><P>
-</P><P>
-Try disabling the library's memory recycling, by either removing the ReuseMemory flag in your DsrProj build script (cleanest way) or adding the -DDISABLE_ALLOCATOR compiler flag to define the DISABLE_ALLOCATOR macro (works with other build systems).
-This will disable the library's memory recycling at DFPSR/base/allocator.cpp in case that another library already has a memory recycler.
 </P><IMG SRC="Images/Border.png"><P>
 
 </P><P>

+ 41 - 1
Source/DFPSR/History.txt

@@ -3,7 +3,7 @@ While every new feature may create naming conflicts from using the dsr namespace
 
 There are plans to create an automatic refactoring tool built into the Builder build system that could potentially do this automatically for you, but one must be very careful with overwriting people's code in case that someone does not use version control.
 
-Changes since version 0.1.0
+Changes from version 0.1.0 to version 0.2.0 (Bug fixes)
 	* simdExtra.h was removed, because such a low depth of abstraction would risk making the code slower from not fitting well with future SIMD extensions.
 		The more features you add to the badly defined extra feature set, the less systems it will work on, until it is just a slower version of specific instruction sets.
 		On missing header when including simdExtra.h:
@@ -29,3 +29,43 @@ Changes since version 0.1.0
 			Rethink the design if you relied on distinguishing between left and right control, shift or alt.
 	* If you used a custom theme before the system was finished, you will now have to add the assignment "filter = 1" for components where rounded edges became black from adding the filter setting.
 		Because one can not let default values depend on which component is used when theme classes are shared freely between components.
+
+Changes from version 0.2.0 to version 0.3.0 (Performance and safety improvements)
+	* To make SafePointer fully typesafe so that one can't accidentally give write access to write protected data, the recursive constness had to be removed.
+		Replace 'const SafePointer<' with 'SafePointer<const '
+		Replace 'const dsr::SafePointer<' with 'dsr::SafePointer<const '
+	* The function given to image_dangerous_replaceDestructor no longer frees the allocation itself, only external resources associated with the data.
+		Because heap_free is called automatically after the destructor in the new memory allocator.
+	* simd.h has moved into the dsr namespace because it was getting too big for the global namespace.
+		gather has been renamed into gather_U32, gather_I32 and gather_F32.
+			This avoids potential ambiguity.
+		The 'a == b' and 'a != b' operators have been replaced with 'allLanesEqual(a, b)' and '!allLanesEqual(a, b)'.
+			This reserves the comparison operators for future use with multiple boolean results.
+		Immediate bit shifting now use the bitShiftLeftImmediate and bitShiftRightImmediate functions with a template argument for the number of bits to shift.
+			Replace any << or >> operator that takes a constant offset with the new functions to prevent slowing down.
+				Replace a << 3 with bitShiftLeftImmediate<3>(a).
+				Replace a >> 5 with bitShiftRightImmediate<5>(a).
+			To get dynamic offset, cast the bit offset into a SIMD vector of unsigned integers with the same number of lanes.
+				Replace a << b with a << U32x4(b), a << U16x8(b), a << U8x16(b), a << U32x8(b), a << U16x16(b), a << U8x32(b), a << U32xX(b), a << U16xX(b) or a << U8xX(b).
+				Replace a >> b with a >> U32x4(b), a >> U16x8(b), a >> U8x16(b), a >> U32x8(b), a >> U16x16(b), a >> U8x32(b), a >> U32xX(b), a >> U16xX(b) or a >> U8xX(b).
+				The more lanes you use, the slower it becomes when not available in SIMD hardware, so try to use at least 32-bit integers for faster fallback implementations.
+			If you know that the offset is always evenly divisible by 8, you can use byteShiftLeft and byteShiftRight instead.
+				Replace a << 8 with byteShiftLeft(a, 8).
+				Replace a >> 16 with byteShiftRight(a, 16).
+			This makes sure that one does not accidentally use an immediate bit shift with a variable offset.
+				Using a template argument for the offset also allow detecting offsets outside of the deterministic range in compile time.
+	* Textures have been separated from images to allow using them as separate value types.
+		Because it was very difficult to re-use internal texture sampling methods for custom rendering pipelines.
+		  Now images and textures have immutable value allocated heads and all side-effects are in the pixel buffers.		  
+		ImageRgbaU8 has been replaced by TextureRgbaU8 for the diffuse and lightmap textures in modelAPI.h.
+		  Then you must create a texture from an image ahead of time and then give the texture to the model or drawn polygon.
+		Replace 'image_generatePyramid' with 'texture_generatePyramid'.
+		Create a texture from the image using texture_create_RgbaU8 with the image and the number of resolutions.
+		  Then assign the texture instead of the image.
+s	* PackOrder.h has a new packOrder_ prefix for global functions to prevent naming conflicts.
+		Replace 'getRed' with 'packOrder_getRed'.
+		Replace 'getGreen' with 'packOrder_getGreen'.
+		Replace 'getBlue' with 'packOrder_getBlue'.
+		Replace 'getAlpha' with 'packOrder_getAlpha'.
+		Replace 'packBytes' with 'packOrder_packBytes'.
+		Replace 'floatToSaturatedByte' with 'packOrder_floatToSaturatedByte'.

+ 1 - 1
Source/DFPSR/License.txt

@@ -1,7 +1,7 @@
 Main license for David Piuva's software renderer:
 	zlib open source license
 
-	Copyright (c) 2017 to 2023 David Forsgren Piuva
+	Copyright (c) 2017 to 2025 David Forsgren Piuva
 
 	This software is provided 'as-is', without any express or implied
 	warranty. In no event will the authors be held liable for any damages

+ 42 - 114
Source/DFPSR/api/bufferAPI.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2019 to 2024 David Forsgren Piuva
+// Copyright (c) 2019 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,147 +25,75 @@
 #include "bufferAPI.h"
 #include "stringAPI.h"
 #include "../math/scalar.h"
+#include "../base/SafePointer.h"
 
 namespace dsr {
 
-// Hidden type
-
-class BufferImpl {
-public:
-	// A Buffer cannot have a name, because each String contains a buffer
-	const int64_t size; // The actually used data
-	const int64_t bufferSize; // The accessible data
-	uint8_t *data;
-	std::function<void(uint8_t *)> destructor;
-public:
-	// Create head without data.
-	BufferImpl();
-	// Create head with newly allocated data.
-	explicit BufferImpl(int64_t newSize);
-	// Create head with inherited data.
-	BufferImpl(int64_t newSize, uint8_t *newData);
-	~BufferImpl();
-public:
-	// No implicit copies, only pass using the Buffer handle
-	BufferImpl(const BufferImpl&) = delete;
-	BufferImpl& operator=(const BufferImpl&) = delete;
-};
-
-// Internal methods
-
-static uint8_t* buffer_allocate(int64_t newSize, std::function<void(uint8_t *)>& targetDestructor) {
-	uint8_t* allocation = heap_allocate(newSize).data;
-	targetDestructor = [](uint8_t *data) { heap_free(data); };
-	return allocation;
-}
-
-BufferImpl::BufferImpl() : size(0), bufferSize(0), data(nullptr) {}
-
-BufferImpl::BufferImpl(int64_t newSize) :
-  size(newSize),
-  bufferSize(roundUp(newSize, DSR_MAXIMUM_ALIGNMENT)) {
-	this->data = buffer_allocate(this->bufferSize, this->destructor);
-	if (this->data == nullptr) {
-		throwError(U"Failed to allocate buffer of ", newSize, " bytes!\n");
-	}
-	memset(this->data, 0, this->bufferSize);
-}
-
-BufferImpl::BufferImpl(int64_t newSize, uint8_t *newData)
-: size(newSize), bufferSize(newSize), data(newData), destructor([](uint8_t *data) { heap_free(data); }) {}
-
-BufferImpl::~BufferImpl() {
-	if (this->data) {
-		this->destructor(this->data);
-	}
-}
-
-// API
-
-Buffer buffer_clone(const Buffer &buffer) {
-	if (!buffer_exists(buffer)) {
-		// If the original buffer does not exist, just return another null handle.
-		return Buffer();
-	} else {
-		if (buffer->size <= 0) {
-			// No need to clone when there is no shared data.
-			return buffer;
-		} else {
-			// Clone the data so that content of the allocations can be modified individually without affecting each other.
-			Buffer newBuffer = std::make_shared<BufferImpl>(buffer->size);
-			memcpy(newBuffer->data, buffer->data, buffer->size);
-			return newBuffer;
-		}
-	}
-}
-
-Buffer buffer_create(int64_t newSize) {
+Buffer buffer_create(intptr_t newSize) {
 	if (newSize < 0) newSize = 0;
-	if (newSize == 0) {
-		// Allocate empty head to indicate that an empty buffer exists.
-		return std::make_shared<BufferImpl>();
-	} else {
-		// Allocate head and data.
-		return std::make_shared<BufferImpl>(newSize);
-	}
+	// Allocate head and data.
+	return handle_createArray<uint8_t>(AllocationInitialization::Zeroed, (uintptr_t)newSize);
 }
 
-Buffer buffer_create(int64_t newSize, int minimumAlignment) {
+Buffer buffer_create(intptr_t newSize, int paddToAlignment) {
 	if (newSize < 0) newSize = 0;
-	if (newSize == 0) {
-		// Allocate empty head to indicate that an empty buffer exists.
-		return std::make_shared<BufferImpl>();
-	} else if (minimumAlignment > DSR_MAXIMUM_ALIGNMENT) {
+	if (paddToAlignment > DSR_MAXIMUM_ALIGNMENT) {
 		throwError(U"Maximum alignment exceeded when creating a buffer!\n");
-		return Buffer();
+		return Handle<uint8_t>();
 	} else {
-		// Allocate head and data.
-		return std::make_shared<BufferImpl>(newSize);
+		return handle_createArray<uint8_t>(AllocationInitialization::Zeroed, memory_getPaddedSize((uintptr_t)newSize, paddToAlignment));
 	}
 }
 
-Buffer buffer_create(int64_t newSize, uint8_t *newData) {
-	if (newSize < 0) newSize = 0;
-	return std::make_shared<BufferImpl>(newSize, newData);
-}
-
-void buffer_replaceDestructor(const Buffer &buffer, const std::function<void(uint8_t *)>& newDestructor) {
+void buffer_replaceDestructor(Buffer &buffer, const HeapDestructor& newDestructor) {
 	if (!buffer_exists(buffer)) {
 		throwError(U"buffer_replaceDestructor: Cannot replace destructor for a buffer that don't exist.\n");
-	} else if (buffer->bufferSize > 0) {
-		buffer->destructor = newDestructor;
+	} else {
+		heap_setAllocationDestructor(buffer.getUnsafe(), newDestructor);
 	}
 }
 
-int64_t buffer_getSize(const Buffer &buffer) {
+// TODO: Create clone and reallocation methods in heap.h to handle object lifetime in a reusable way.
+Buffer buffer_clone(const Buffer &buffer) {
 	if (!buffer_exists(buffer)) {
-		return 0;
+		return Handle<uint8_t>();
 	} else {
-		return buffer->size;
+		uintptr_t size = buffer.getUsedSize();
+		if (size == 0) {
+			// Buffers of zero elements are reused with reference counting.
+			return buffer;
+		} else {
+			// Allocate new memory without setting it to zero, before cloning data into it.
+			Buffer result = handle_createArray<uint8_t>(AllocationInitialization::Uninitialized, size);
+			SafePointer<const uint8_t> source = buffer_getSafeData<const uint8_t>(buffer, "Buffer cloning source");
+			SafePointer<uint8_t> target = buffer_getSafeData<uint8_t>(result, "Buffer cloning target");
+			safeMemoryCopy(target, source, size);
+			return result;
+		}
 	}
 }
 
-int64_t buffer_getUseCount(const Buffer &buffer) {
-	if (!buffer_exists(buffer)) {
-		return 0;
-	} else {
-		return buffer.use_count();
-	}
+intptr_t buffer_getSize(const Buffer &buffer) {
+	return buffer.getUsedSize();
+}
+
+intptr_t buffer_getUseCount(const Buffer &buffer) {
+	return buffer.getUseCount();
 }
 
 uint8_t* buffer_dangerous_getUnsafeData(const Buffer &buffer) {
-	if (!buffer_exists(buffer)) {
-		return nullptr;
-	} else {
-		return buffer->data;
-	}
+	return buffer.getUnsafe();
 }
 
 void buffer_setBytes(const Buffer &buffer, uint8_t value) {
-	if (!buffer_exists(buffer)) {
-		throwError(U"buffer_setBytes: Cannot set bytes for a buffer that don't exist.\n");
-	} else if (buffer->bufferSize > 0) {
-		memset(buffer->data, value, buffer->bufferSize);
+	if (buffer.isNull()) {
+		throwError(U"buffer_setBytes: Can not set bytes for a buffer that does not exist.\n");
+	} else {
+		uintptr_t size = buffer.getUsedSize();
+		if (size > 0) {
+			SafePointer<uint8_t> target = buffer_getSafeData<uint8_t>(buffer, "Buffer set target");
+			safeMemorySet(target, value, size);
+		}
 	}
 }
 

+ 21 - 37
Source/DFPSR/api/bufferAPI.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2018 to 2024 David Forsgren Piuva
+// Copyright (c) 2018 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -29,9 +29,9 @@
 #include <functional>
 #include "../base/SafePointer.h"
 #include "../settings.h"
-#include "../base/heap.h"
+#include "../base/Handle.h"
 
-// The types of buffer handles to consider when designing algorithms:
+// The types of buffers to consider when designing algorithms:
 // * Null handle suggesting that there is nothing, such as when loading a file failed.
 //     Size does not exist, but is substituted with zero when asked.
 //     buffer_exists(Buffer()) == false
@@ -51,36 +51,28 @@
 //     buffer_getSize(buffer_create(bytes)) == bytes
 
 namespace dsr {
-	// A safer replacement for raw memory allocation when you don't need to resize the content.
-	// Guarantees that internal addresses will not be invalidated during its lifetime.
-	//   Just remember to always keep a handle together with any pointers to the data to prevent the buffer from being freed.
-	class BufferImpl;
-	using Buffer = std::shared_ptr<BufferImpl>;
+	using Buffer = Handle<uint8_t>;
 
-	// Side-effect: Creates a new buffer head regardless of newSize, but only allocates a zeroed data allocation if newSize > 0.
-	// Post-condition: Returns a handle to the new buffer, which is initialized to zeroes.
-	// Creating a buffer without a size will only allocate the buffer's head referring to null data with size zero.
-	Buffer buffer_create(int64_t newSize);
-	// The buffer always allocate with DSR_MAXIMUM_ALIGNMENT, but you can check that your requested alignment is not too much.
-	Buffer buffer_create(int64_t newSize, int minimumAlignment);
+	// Allocate a Buffer without padding,
+	//   The newSize argument should not include any padding.
+	//   The memory is allocated in whole aligned blocks of DSR_MAXIMUM_ALIGNMENT and buffer_getSafeData padds out the SafePointer region to the maximum alignment.
+	// Side-effect: Creates a new buffer containing newSize bytes.
+	// Post-condition: Returns the new buffer, which is initialized to zeroes.
+	Buffer buffer_create(intptr_t newSize);
 
-	// Pre-conditions:
-	//   newData must be padded and aligned by DSR_MAXIMUM_ALIGNMENT from settings.h if you plan to use it for SIMD or multi-threading.
-	//   newSize may not be larger than the size of newData in bytes.
-	//     Breaking this pre-condition may cause crashes, so only provide a newData pointer if you know what you are doing.
-	// Side-effect: Creates a new buffer of newSize bytes inheriting ownership of newData.
-	//   If the given data cannot be freed as a C allocation, replaceDestructor must be called with the special destructor.
-	// Post-condition: Returns a handle to the manually constructed buffer.
-	Buffer buffer_create(int64_t newSize, uint8_t *newData);
+	// Allocate a Buffer with padding.
+	// The buffer always align the start with DSR_MAXIMUM_ALIGNMENT, but this function makes sure that paddToAlignment does not exceed DSR_MAXIMUM_ALIGNMENT.
+	// Pre-condition: paddToAlignment <= DSR_MAXIMUM_ALIGNMENT
+	Buffer buffer_create(intptr_t newSize, int paddToAlignment);
 
 	// Sets the allocation's destructor, to be called when there are no more reference counted pointers to the buffer.
+	//   The destructor is not responsible for freeing the memory allocation itself, only calling destructors in the content.
 	// Pre-condition: The buffer exists.
-	//   If the buffer has a head but no data allocation, the command will be ignored because there is no allocation to delete.
-	void buffer_replaceDestructor(const Buffer &buffer, const std::function<void(uint8_t *)>& newDestructor);
+	void buffer_replaceDestructor(Buffer &buffer, const HeapDestructor& newDestructor);
 
 	// Returns true iff buffer exists, even if it is empty without any data allocation.
 	inline bool buffer_exists(const Buffer &buffer) {
-		return buffer.get() != nullptr;
+		return buffer.isNotNull();
 	}
 
 	// Returns a clone of the buffer.
@@ -91,28 +83,20 @@ namespace dsr {
 
 	// Returns the buffer's size in bytes, as given when allocating it excluding allocation padding.
 	// Returns zero if buffer doesn't exist or has no data allocated.
-	int64_t buffer_getSize(const Buffer &buffer);
+	intptr_t buffer_getSize(const Buffer &buffer);
 
 	// Returns the number of reference counted handles to the buffer, or 0 if the buffer does not exist.
-	int64_t buffer_getUseCount(const Buffer &buffer);
+	intptr_t buffer_getUseCount(const Buffer &buffer);
 
 	// Returns a raw pointer to the data.
 	// An empty handle or buffer of length zero without data will return nullptr.
 	uint8_t* buffer_dangerous_getUnsafeData(const Buffer &buffer);
 
 	// A wrapper for getting a bound-checked pointer of the correct element type.
-	//   Only cast to trivially packed types with power of two dimensions so that the compiler does not add padding.
-	// The name must be an ansi encoded constant literal, because each String contains a Buffer which would cause a cyclic dependency.
+	// The name must be an ascii encoded constant literal.
 	// Returns a safe null pointer if buffer does not exist or there is no data allocation.
 	template <typename T>
-	SafePointer<T> buffer_getSafeData(const Buffer &buffer, const char* name) {
-		if (!buffer_exists(buffer)) {
-			return SafePointer<T>();
-		} else {
-			uint8_t *data = buffer_dangerous_getUnsafeData(buffer);
-			return SafePointer<T>(name, (T*)data, buffer_getSize(buffer), (T*)data, heap_getHeader(data));
-		}
-	}
+	SafePointer<T> buffer_getSafeData(const Buffer &buffer, const char* name) { return buffer.getSafe<T>(name); }
 
 	// Set all bytes to the same value.
 	// Pre-condition: buffer exists, or else an exception is thrown to warn you.

+ 895 - 65
Source/DFPSR/api/drawAPI.cpp

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,129 +22,959 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
-
 #include "imageAPI.h"
-#include "drawAPI.h"
-#include "../image/draw.h"
+#include "../math/scalar.h"
 #include "../image/PackOrder.h"
-#include "../image/internal/imageTemplate.h"
-#include "../image/internal/imageInternal.h"
+#include <limits>
+
+namespace dsr {
+
+// Preconditions:
+//   0 <= a <= 255
+//   0 <= b <= 255
+// Postconditions:
+//   Returns the normalized multiplication of a and b, where the 0..255 range represents decimal values from 0.0 to 1.0.
+//   The result may not be less than zero or larger than any of the inputs.
+// Examples:
+//   normalizedByteMultiplication(0, 0) = 0
+//   normalizedByteMultiplication(x, 0) = 0
+//   normalizedByteMultiplication(0, x) = 0
+//   normalizedByteMultiplication(x, 255) = x
+//   normalizedByteMultiplication(255, x) = x
+//   normalizedByteMultiplication(255, 255) = 255
+inline uint32_t normalizedByteMultiplication(uint32_t a, uint32_t b) {
+	// Approximate the reciprocal of an unsigned byte's maximum value 255 for normalization
+	//   256³ / 255 ≈ 65793
+	// Truncation goes down, so add half a unit before rounding to get the closest value
+	//   2^24 / 2 = 8388608
+	// No overflow for unsigned 32-bit integers
+	//   255² * 65793 + 8388608 = 4286578433 < 2^32
+	return (a * b * 65793 + 8388608) >> 24;
+}
 
-using namespace dsr;
+inline bool isUniformByte(uint16_t value) {
+	return (value & 0xFF) == ((value & 0xFF00) >> 8);
+}
 
+inline bool isUniformByte(uint32_t value) {
+	uint32_t least =    value & 0x000000FF;
+	return   least == ((value & 0x0000FF00) >> 8)
+	      && least == ((value & 0x00FF0000) >> 16)
+		  && least == ((value & 0xFF000000) >> 24);
+}
 
 // -------------------------------- Drawing shapes --------------------------------
 
+// TODO: Use the longest available SIMD vector to assign a color and overwrite padding when the image is not a sub-image.
+//       Create a safe and reusable 32-bit memset function in SafePointer.h.
+
+template <typename IMAGE_TYPE, typename COLOR_TYPE>
+void drawSolidRectangleAssign(const IMAGE_TYPE &target, int32_t left, int32_t top, int32_t right, int32_t bottom, COLOR_TYPE color) {
+	int32_t leftBound = max(0, left);
+	int32_t topBound = max(0, top);
+	int32_t rightBound = min(right, image_getWidth(target));
+	int32_t bottomBound = min(bottom, image_getHeight(target));
+	int32_t stride = image_getStride(target);
+	SafePointer<COLOR_TYPE> rowData = image_getSafePointer<COLOR_TYPE>(target, topBound);
+	rowData += leftBound;
+	for (int32_t y = topBound; y < bottomBound; y++) {
+		SafePointer<COLOR_TYPE> pixelData = rowData;
+		for (int32_t x = leftBound; x < rightBound; x++) {
+			pixelData.get() = color;
+			pixelData += 1;
+		}
+		rowData.increaseBytes(stride);
+	}
+}
+
+template <typename IMAGE_TYPE, typename COLOR_TYPE>
+void drawSolidRectangleMemset(const IMAGE_TYPE &target, int32_t left, int32_t top, int32_t right, int32_t bottom, uint8_t uniformByte) {
+	int32_t leftBound = max(0, left);
+	int32_t topBound = max(0, top);
+	int32_t rightBound = min(right, image_getWidth(target));
+	int32_t bottomBound = min(bottom, image_getHeight(target));
+	if (rightBound > leftBound && bottomBound > topBound) {
+		int32_t stride = image_getStride(target);
+		SafePointer<COLOR_TYPE> rowData = image_getSafePointer<COLOR_TYPE>(target, topBound);
+		rowData += leftBound;
+		int32_t filledWidth = rightBound - leftBound;
+		int32_t rowSize = filledWidth * sizeof(COLOR_TYPE);
+		int32_t rowCount = bottomBound - topBound;
+		if ((!target.impl_dimensions.isSubImage()) && filledWidth == image_getWidth(target)) {
+			// Write over any padding for parent images owning the whole buffer.
+			// Including parent images with sub-images using the same data
+			//   because no child image may display the parent-image's padding bytes.
+			safeMemorySet(rowData, uniformByte, (stride * (rowCount - 1)) + rowSize);
+		} else if (rowSize == stride) {
+			// When the filled row stretches all the way from left to right in the main allocation
+			//   there's no unseen pixels being overwritten in other images sharing the buffer.
+			// This case handles sub-images that uses the full width of
+			//   the parent image which doesn't have any padding.
+			safeMemorySet(rowData, uniformByte, rowSize * rowCount);
+		} else {
+			// Fall back on using one memset operation per row.
+			// This case is for sub-images that must preserve interleaved pixel rows belonging
+			//   to other images that aren't visible and therefore not owned by this image.
+			for (int32_t y = topBound; y < bottomBound; y++) {
+				safeMemorySet(rowData, uniformByte, rowSize);
+				rowData.increaseBytes(stride);
+			}
+		}
+	}
+}
 
-void dsr::draw_rectangle(ImageU8& image, const IRect& bound, int color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, bound, color);
+void draw_rectangle(const ImageU8& image, const IRect& bound, int32_t color) {
+	if (image_exists(image)) {
+		if (color < 0) { color = 0; }
+		if (color > 255) { color = 255; }
+		drawSolidRectangleMemset<ImageU8, uint8_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
+	}
+}
+void draw_rectangle(const ImageU16& image, const IRect& bound, int32_t color) {
+	if (image_exists(image)) {
+		if (color < 0) { color = 0; }
+		if (color > 65535) { color = 65535; }
+		uint16_t uColor = color;
+		if (isUniformByte(uColor)) {
+			drawSolidRectangleMemset<ImageU16, uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
+		} else {
+			drawSolidRectangleAssign<ImageU16, uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), uColor);
+		}
+	}
+}
+void draw_rectangle(const ImageF32& image, const IRect& bound, float color) {
+	if (image_exists(image)) {
+		// Floating-point zero is a special value where all bits are assigned zeroes to allow fast initialization.
+		if (color == 0.0f) {
+			drawSolidRectangleMemset<ImageF32, float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
+		} else {
+			drawSolidRectangleAssign<ImageF32, float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
+		}
 	}
 }
-void dsr::draw_rectangle(ImageF32& image, const IRect& bound, float color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, bound, color);
+void draw_rectangle(const ImageRgbaU8& image, const IRect& bound, uint32_t packedColor) {
+	if (image_exists(image)) {
+		if (isUniformByte(packedColor)) {
+			drawSolidRectangleMemset<ImageRgbaU8, uint32_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor & 0xFF);
+		} else {
+			drawSolidRectangleAssign<ImageRgbaU8, uint32_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
+		}
 	}
 }
-void dsr::draw_rectangle(ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, bound, color);
+void draw_rectangle(const ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color) {
+	if (image_exists(image)) {
+		uint32_t packedColor = image_saturateAndPack(image, color);
+		if (isUniformByte(packedColor)) {
+			drawSolidRectangleMemset<ImageRgbaU8, uint32_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor & 0xFF);
+		} else {
+			drawSolidRectangleAssign<ImageRgbaU8, uint32_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
+		}
 	}
 }
 
-void dsr::draw_line(ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
-	if (image) {
-		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+template <typename IMAGE_TYPE, typename COLOR_TYPE>
+inline void drawLineSuper(const IMAGE_TYPE &target, int32_t x1, int32_t y1, int32_t x2, int32_t y2, COLOR_TYPE color) {
+	// Culling test to reduce wasted pixels outside of the image.
+	int32_t width = image_getWidth(target);
+	int32_t height = image_getHeight(target);	
+	if ((x1 < 0 && x1 < 0) || (y1 < 0 && y1 < 0) || (x1 >= width && x1 >= width) || (y1 >= height && y1 >= height)) {
+		// Skip drawing because both points are outside of the same edge.
+		return;
+	}
+	if (y1 == y2) {
+		// Sideways
+		int32_t left = min(x1, x2);
+		int32_t right = max(x1, x2);
+		for (int32_t x = left; x <= right; x++) {
+			image_writePixel(target, x, y1, color);
+		}
+	} else if (x1 == x2) {
+		// Down
+		int32_t top = min(y1, y2);
+		int32_t bottom = max(y1, y2);
+		for (int32_t y = top; y <= bottom; y++) {
+			image_writePixel(target, x1, y, color);
+		}
+	} else {
+		if (std::abs(y2 - y1) >= std::abs(x2 - x1)) {
+			if (y2 < y1) {
+				swap(x1, x2);
+				swap(y1, y2);
+			}
+			assert(y2 > y1);
+			if (x2 > x1) {
+				// Down right
+				int32_t x = x1;
+				int32_t y = y1;
+				int32_t tilt = (x2 - x1) * 2;
+				int32_t maxError = y2 - y1;
+				int32_t error = 0;
+				while (y <= y2) {
+					image_writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						x++;
+						error -= maxError * 2;
+					}
+					y++;
+				}
+			} else {
+				// Down left
+				int32_t x = x1;
+				int32_t y = y1;
+				int32_t tilt = (x1 - x2) * 2;
+				int32_t maxError = y2 - y1;
+				int32_t error = 0;
+				while (y <= y2) {
+					image_writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						x--;
+						error -= maxError * 2;
+					}
+					y++;
+				}
+			}
+		} else {
+			if (x2 < x1) {
+				swap(x1, x2);
+				swap(y1, y2);
+			}
+			assert(x2 > x1);
+			if (y2 > y1) {
+				// Down right
+				int32_t x = x1;
+				int32_t y = y1;
+				int32_t tilt = (y2 - y1) * 2;
+				int32_t maxError = x2 - x1;
+				int32_t error = 0;
+				while (x <= x2) {
+					image_writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						y++;
+						error -= maxError * 2;
+					}
+					x++;
+				}
+			} else {
+				// Up right
+				int32_t x = x1;
+				int32_t y = y1;
+				int32_t tilt = (y1 - y2) * 2;
+				int32_t maxError = x2 - x1;
+				int32_t error = 0;
+				while (x <= x2) {
+					image_writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						y--;
+						error -= maxError * 2;
+					}
+					x++;
+				}
+			}
+		}
+	}
+}
+
+void draw_line(const ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int32_t color) {
+	if (image_exists(image)) {
+		if (color < 0) { color = 0; }
+		if (color > 255) { color = 255; }
+		drawLineSuper<ImageU8, uint8_t>(image, x1, y1, x2, y2, color);
 	}
 }
-void dsr::draw_line(ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
-	if (image) {
-		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+void draw_line(const ImageU16& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int32_t color) {
+	if (image_exists(image)) {
+		if (color < 0) { color = 0; }
+		if (color > 65535) { color = 65535; }
+		drawLineSuper<ImageU16, uint16_t>(image, x1, y1, x2, y2, color);
 	}
 }
-void dsr::draw_line(ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
-	if (image) {
-		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+void draw_line(const ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
+	if (image_exists(image)) {
+		drawLineSuper<ImageF32, float>(image, x1, y1, x2, y2, color);
 	}
 }
+void draw_line(const ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t packedColor) {
+	if (image_exists(image)) {
+		drawLineSuper<ImageRgbaU8, uint32_t>(image, x1, y1, x2, y2, packedColor);
+	}
+}
+void draw_line(const ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
+	uint32_t packedColor = image_saturateAndPack(image, color);
+	draw_line(image, x1, y1, x2, y2, packedColor);
+}
 
 
 // -------------------------------- Drawing images --------------------------------
 
 
+// Unpacked image dimensions.
+struct UnpackedDimensions {
+	// width is the number of used pixels on each row.
+	// height is the number of rows.
+	// stride is the byte offset from one row to another including any padding.
+	// pixelSize is the byte offset from one pixel to another from left to right.
+	int32_t width, height, stride, pixelSize;
+	UnpackedDimensions() : width(0), height(0), stride(0), pixelSize(0) {}
+	UnpackedDimensions(const Image& image) :
+	  width(image_getWidth(image)), height(image_getHeight(image)), stride(image_getStride(image)), pixelSize(image_getPixelSize(image)) {}
+};
+
+struct ImageWriter : public UnpackedDimensions {
+	uint8_t *data;
+	ImageWriter(const UnpackedDimensions &dimensions, uint8_t *data) :
+	  UnpackedDimensions(dimensions), data(data) {}
+};
+
+struct ImageReader : public UnpackedDimensions {
+	const uint8_t *data;
+	ImageReader(const UnpackedDimensions &dimensions, const uint8_t *data) :
+	  UnpackedDimensions(dimensions), data(data) {}
+};
+
+static ImageWriter getWriter(const Image &image) {
+	return ImageWriter(UnpackedDimensions(image), buffer_dangerous_getUnsafeData(image.impl_buffer) + image.impl_dimensions.getByteStartOffset());
+}
+
+static ImageReader getReader(const Image &image) {
+	return ImageReader(UnpackedDimensions(image), buffer_dangerous_getUnsafeData(image.impl_buffer) + image.impl_dimensions.getByteStartOffset());
+}
+
+static Image getGenericSubImage(const Image &image, int32_t left, int32_t top, int32_t width, int32_t height) {
+	return Image(image, IRect(left, top, width, height));
+}
+
+struct ImageIntersection {
+	ImageWriter subTarget;
+	ImageReader subSource;
+	ImageIntersection(const ImageWriter &subTarget, const ImageReader &subSource) :
+	  subTarget(subTarget), subSource(subSource) {}
+	static bool canCreate(const Image &target, const Image &source, int32_t left, int32_t top) {
+		int32_t targetRegionRight = left + image_getWidth(source);
+		int32_t targetRegionBottom = top + image_getHeight(source);
+		return left < image_getWidth(target) && top < image_getHeight(target) && targetRegionRight > 0 && targetRegionBottom > 0;
+	}
+	// Only call if canCreate passed with the same arguments
+	static ImageIntersection create(const Image &target, const Image &source, int32_t left, int32_t top) {
+		int32_t targetRegionRight = left + image_getWidth(source);
+		int32_t targetRegionBottom = top + image_getHeight(source);
+		assert(ImageIntersection::canCreate(target, source, left, top));
+		// Check if the source has to be clipped
+		if (left < 0 || top < 0 || targetRegionRight > image_getWidth(target) || targetRegionBottom > image_getHeight(target)) {
+			int32_t clipLeft = max(0, -left);
+			int32_t clipTop = max(0, -top);
+			int32_t clipRight = max(0, targetRegionRight - image_getWidth(target));
+			int32_t clipBottom = max(0, targetRegionBottom - image_getHeight(target));
+			int32_t newWidth = image_getWidth(source) - (clipLeft + clipRight);
+			int32_t newHeight = image_getHeight(source) - (clipTop + clipBottom);
+			assert(newWidth > 0 && newHeight > 0);
+			// Partial drawing
+			Image subTarget = getGenericSubImage(target, left + clipLeft, top + clipTop, newWidth, newHeight);
+			Image subSource = getGenericSubImage(source, clipLeft, clipTop, newWidth, newHeight);
+			return ImageIntersection(getWriter(subTarget), getReader(subSource));
+		} else {
+			// Full drawing
+			Image subTarget = getGenericSubImage(target, left, top, image_getWidth(source), image_getHeight(source));
+			return ImageIntersection(getWriter(subTarget), getReader(source));
+		}
+	}
+};
+
+#define ITERATE_ROWS(WRITER, READER, OPERATION) \
+{ \
+	uint8_t *targetRow = WRITER.data; \
+	const uint8_t *sourceRow = READER.data; \
+	for (int32_t y = 0; y < READER.height; y++) { \
+		OPERATION; \
+		targetRow += WRITER.stride; \
+		sourceRow += READER.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS(WRITER, READER, OPERATION) \
+{ \
+	uint8_t *targetRow = WRITER.data; \
+	const uint8_t *sourceRow = READER.data; \
+	for (int32_t y = 0; y < READER.height; y++) { \
+		uint8_t *targetPixel = targetRow; \
+		const uint8_t *sourcePixel = sourceRow; \
+		for (int32_t x = 0; x < READER.width; x++) { \
+			{OPERATION;} \
+			targetPixel += WRITER.pixelSize; \
+			sourcePixel += READER.pixelSize; \
+		} \
+		targetRow += WRITER.stride; \
+		sourceRow += READER.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS_2(WRITER1, READER1, WRITER2, READER2, OPERATION) \
+{ \
+	uint8_t *targetRow1 = WRITER1.data; \
+	uint8_t *targetRow2 = WRITER2.data; \
+	const uint8_t *sourceRow1 = READER1.data; \
+	const uint8_t *sourceRow2 = READER2.data; \
+	int32_t minWidth = min(READER1.width, READER2.width); \
+	int32_t minHeight = min(READER1.height, READER2.height); \
+	for (int32_t y = 0; y < minHeight; y++) { \
+		uint8_t *targetPixel1 = targetRow1; \
+		uint8_t *targetPixel2 = targetRow2; \
+		const uint8_t *sourcePixel1 = sourceRow1; \
+		const uint8_t *sourcePixel2 = sourceRow2; \
+		for (int32_t x = 0; x < minWidth; x++) { \
+			{OPERATION;} \
+			targetPixel1 += WRITER1.pixelSize; \
+			targetPixel2 += WRITER2.pixelSize; \
+			sourcePixel1 += READER1.pixelSize; \
+			sourcePixel2 += READER2.pixelSize; \
+		} \
+		targetRow1 += WRITER1.stride; \
+		targetRow2 += WRITER2.stride; \
+		sourceRow1 += READER1.stride; \
+		sourceRow2 += READER2.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS_3(WRITER1, READER1, WRITER2, READER2, WRITER3, READER3, OPERATION) \
+{ \
+	uint8_t *targetRow1 = WRITER1.data; \
+	uint8_t *targetRow2 = WRITER2.data; \
+	uint8_t *targetRow3 = WRITER3.data; \
+	const uint8_t *sourceRow1 = READER1.data; \
+	const uint8_t *sourceRow2 = READER2.data; \
+	const uint8_t *sourceRow3 = READER3.data; \
+	int32_t minWidth = min(min(READER1.width, READER2.width), READER3.width); \
+	int32_t minHeight = min(min(READER1.height, READER2.height), READER3.height); \
+	for (int32_t y = 0; y < minHeight; y++) { \
+		uint8_t *targetPixel1 = targetRow1; \
+		uint8_t *targetPixel2 = targetRow2; \
+		uint8_t *targetPixel3 = targetRow3; \
+		const uint8_t *sourcePixel1 = sourceRow1; \
+		const uint8_t *sourcePixel2 = sourceRow2; \
+		const uint8_t *sourcePixel3 = sourceRow3; \
+		for (int32_t x = 0; x < minWidth; x++) { \
+			{OPERATION;} \
+			targetPixel1 += WRITER1.pixelSize; \
+			targetPixel2 += WRITER2.pixelSize; \
+			targetPixel3 += WRITER3.pixelSize; \
+			sourcePixel1 += READER1.pixelSize; \
+			sourcePixel2 += READER2.pixelSize; \
+			sourcePixel3 += READER3.pixelSize; \
+		} \
+		targetRow1 += WRITER1.stride; \
+		targetRow2 += WRITER2.stride; \
+		targetRow3 += WRITER3.stride; \
+		sourceRow1 += READER1.stride; \
+		sourceRow2 += READER2.stride; \
+		sourceRow3 += READER3.stride; \
+	} \
+}
+
+inline int32_t saturateFloat(float value) {
+	if (!(value >= 0.5f)) {
+		// NaN or negative
+		return 0;
+	} else if (value > 254.5f) {
+		// Too large
+		return 255;
+	} else {
+		// Round to closest
+		return (uint8_t)(value + 0.5f);
+	}
+}
+
+// Copy data from one image region to another of the same size.
+//   Packing order is reinterpreted without conversion.
+static void copyImageData(ImageWriter writer, ImageReader reader) {
+	assert(writer.width == reader.width && writer.height == reader.height && writer.pixelSize == reader.pixelSize);
+	ITERATE_ROWS(writer, reader, std::memcpy(targetRow, sourceRow, reader.width * reader.pixelSize));
+}
+
+// TODO: Can SIMD be used for specific platforms where vector extract accepts a variable offset?
+static void imageImpl_drawCopy(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top) {
+	PackOrderIndex targetPackOrderIndex = image_getPackOrderIndex(target);
+	PackOrderIndex sourcePackOrderIndex = image_getPackOrderIndex(source);
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		if (targetPackOrderIndex == sourcePackOrderIndex) {
+			// No conversion needed
+			copyImageData(intersection.subTarget, intersection.subSource);
+		} else {
+			PackOrder targetPackOrder = PackOrder::getPackOrder(targetPackOrderIndex);
+			PackOrder sourcePackOrder = PackOrder::getPackOrder(sourcePackOrderIndex);
+			// Read and repack to convert between different color formats
+			// TODO: Pre-compute conversions for each combination of source and target pack order.
+			//       We do not need to store the data in RGBA order, just pack from one format to another.
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				targetPixel[targetPackOrder.redIndex]   = sourcePixel[sourcePackOrder.redIndex];
+				targetPixel[targetPackOrder.greenIndex] = sourcePixel[sourcePackOrder.greenIndex];
+				targetPixel[targetPackOrder.blueIndex]  = sourcePixel[sourcePackOrder.blueIndex];
+				targetPixel[targetPackOrder.alphaIndex] = sourcePixel[sourcePackOrder.alphaIndex];
+			);
+		}
+	}
+}
+static void imageImpl_drawCopy(const ImageU8& target, const ImageU8& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+static void imageImpl_drawCopy(const ImageU16& target, const ImageU16& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+static void imageImpl_drawCopy(const ImageF32& target, const ImageF32& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+static void imageImpl_drawCopy(const ImageRgbaU8& target, const ImageU8& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			uint8_t luma = *sourcePixel;
+			targetPixel[targetPackOrder.redIndex]   = luma;
+			targetPixel[targetPackOrder.greenIndex] = luma;
+			targetPixel[targetPackOrder.blueIndex]  = luma;
+			targetPixel[targetPackOrder.alphaIndex] = 255;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageRgbaU8& target, const ImageU16& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int32_t luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			targetPixel[targetPackOrder.redIndex]   = luma;
+			targetPixel[targetPackOrder.greenIndex] = luma;
+			targetPixel[targetPackOrder.blueIndex]  = luma;
+			targetPixel[targetPackOrder.alphaIndex] = 255;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageRgbaU8& target, const ImageF32& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int32_t luma = saturateFloat(*((const float*)sourcePixel));
+			targetPixel[targetPackOrder.redIndex]   = luma;
+			targetPixel[targetPackOrder.greenIndex] = luma;
+			targetPixel[targetPackOrder.blueIndex]  = luma;
+			targetPixel[targetPackOrder.alphaIndex] = 255;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageU8& target, const ImageF32& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*targetPixel = saturateFloat(*((const float*)sourcePixel));
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageU8& target, const ImageU16& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int32_t luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			*targetPixel = luma;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageU16& target, const ImageU8& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*((uint16_t*)targetPixel) = *sourcePixel;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageU16& target, const ImageF32& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int32_t luma = *((const float*)sourcePixel);
+			if (luma < 0) { luma = 0; }
+			if (luma > 65535) { luma = 65535; }
+			*((uint16_t*)targetPixel) = *sourcePixel;
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageF32& target, const ImageU8& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*((float*)targetPixel) = (float)(*sourcePixel);
+		);
+	}
+}
+static void imageImpl_drawCopy(const ImageF32& target, const ImageU16& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int32_t luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			*((float*)targetPixel) = (float)luma;
+		);
+	}
+}
+
+
+static void imageImpl_drawAlphaFilter(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		PackOrder sourcePackOrder = image_getPackOrder(source);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			// Optimized for anti-aliasing, where most alpha values are 0 or 255
+			uint32_t sourceRatio = sourcePixel[sourcePackOrder.alphaIndex];
+			if (sourceRatio > 0) {
+				if (sourceRatio == 255) {
+					targetPixel[targetPackOrder.redIndex]   = sourcePixel[sourcePackOrder.redIndex];
+					targetPixel[targetPackOrder.greenIndex] = sourcePixel[sourcePackOrder.greenIndex];
+					targetPixel[targetPackOrder.blueIndex]  = sourcePixel[sourcePackOrder.blueIndex];
+					targetPixel[targetPackOrder.alphaIndex] = 255;
+				} else {
+					uint32_t targetRatio = 255 - sourceRatio;
+					targetPixel[targetPackOrder.redIndex]   = normalizedByteMultiplication(targetPixel[targetPackOrder.redIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[sourcePackOrder.redIndex], sourceRatio);
+					targetPixel[targetPackOrder.greenIndex] = normalizedByteMultiplication(targetPixel[targetPackOrder.greenIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[sourcePackOrder.greenIndex], sourceRatio);
+					targetPixel[targetPackOrder.blueIndex]  = normalizedByteMultiplication(targetPixel[targetPackOrder.blueIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[sourcePackOrder.blueIndex], sourceRatio);
+					targetPixel[targetPackOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[targetPackOrder.alphaIndex], targetRatio) + sourceRatio;
+				}
+			}
+		);
+	}
+}
+
+static void imageImpl_drawMaxAlpha(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		PackOrder sourcePackOrder = image_getPackOrder(source);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		if (sourceAlphaOffset == 0) {
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				int32_t sourceAlpha = sourcePixel[sourcePackOrder.alphaIndex];
+				if (sourceAlpha > targetPixel[targetPackOrder.alphaIndex]) {
+					targetPixel[targetPackOrder.redIndex]   = sourcePixel[sourcePackOrder.redIndex];
+					targetPixel[targetPackOrder.greenIndex] = sourcePixel[sourcePackOrder.greenIndex];
+					targetPixel[targetPackOrder.blueIndex]  = sourcePixel[sourcePackOrder.blueIndex];
+					targetPixel[targetPackOrder.alphaIndex] = sourceAlpha;
+				}
+			);
+		} else {
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				int32_t sourceAlpha = sourcePixel[sourcePackOrder.alphaIndex];
+				if (sourceAlpha > 0) {
+					sourceAlpha += sourceAlphaOffset;
+					if (sourceAlpha > targetPixel[targetPackOrder.alphaIndex]) {
+						targetPixel[targetPackOrder.redIndex]   = sourcePixel[sourcePackOrder.redIndex];
+						targetPixel[targetPackOrder.greenIndex] = sourcePixel[sourcePackOrder.greenIndex];
+						targetPixel[targetPackOrder.blueIndex]  = sourcePixel[sourcePackOrder.blueIndex];
+						if (sourceAlpha < 0) { sourceAlpha = 0; }
+						if (sourceAlpha > 255) { sourceAlpha = 255; }
+						targetPixel[targetPackOrder.alphaIndex] = sourceAlpha;
+					}
+				}
+			);
+		}
+	}
+}
+
+static void imageImpl_drawAlphaClip(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t threshold) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		PackOrder sourcePackOrder = image_getPackOrder(source);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			if (sourcePixel[sourcePackOrder.alphaIndex] > threshold) {
+				targetPixel[targetPackOrder.redIndex]   = sourcePixel[sourcePackOrder.redIndex];
+				targetPixel[targetPackOrder.greenIndex] = sourcePixel[sourcePackOrder.greenIndex];
+				targetPixel[targetPackOrder.blueIndex]  = sourcePixel[sourcePackOrder.blueIndex];
+				targetPixel[targetPackOrder.alphaIndex] = 255;
+			}
+		);
+	}
+}
+
+template <bool FULL_ALPHA>
+void drawSilhouette_template(const ImageRgbaU8& target, const ImageU8& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		PackOrder targetPackOrder = image_getPackOrder(target);
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			uint32_t sourceRatio;
+			if (FULL_ALPHA) {
+				sourceRatio = *sourcePixel;
+			} else {
+				sourceRatio = normalizedByteMultiplication(*sourcePixel, color.alpha);
+			}
+			if (sourceRatio > 0) {
+				if (sourceRatio == 255) {
+					targetPixel[targetPackOrder.redIndex]   = color.red;
+					targetPixel[targetPackOrder.greenIndex] = color.green;
+					targetPixel[targetPackOrder.blueIndex]  = color.blue;
+					targetPixel[targetPackOrder.alphaIndex] = 255;
+				} else {
+					uint32_t targetRatio = 255 - sourceRatio;
+					targetPixel[targetPackOrder.redIndex]   = normalizedByteMultiplication(targetPixel[targetPackOrder.redIndex], targetRatio) + normalizedByteMultiplication(color.red, sourceRatio);
+					targetPixel[targetPackOrder.greenIndex] = normalizedByteMultiplication(targetPixel[targetPackOrder.greenIndex], targetRatio) + normalizedByteMultiplication(color.green, sourceRatio);
+					targetPixel[targetPackOrder.blueIndex]  = normalizedByteMultiplication(targetPixel[targetPackOrder.blueIndex], targetRatio) + normalizedByteMultiplication(color.blue, sourceRatio);
+					targetPixel[targetPackOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[targetPackOrder.alphaIndex], targetRatio) + sourceRatio;
+				}
+			}
+		);
+	}
+}
+static void imageImpl_drawSilhouette(const ImageRgbaU8& target, const ImageU8& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (color.alpha > 0) {
+		ColorRgbaI32 saturatedColor = color.saturate();
+		if (color.alpha < 255) {
+			drawSilhouette_template<false>(target, source, saturatedColor, left, top);
+		} else {
+			drawSilhouette_template<true>(target, source, saturatedColor, left, top);
+		}
+	}
+}
+
+static void imageImpl_drawHigher(const ImageU16& targetHeight, const ImageU16& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
+			int32_t newHeight = *((const uint16_t*)sourcePixel);
+			if (newHeight > 0) {
+				newHeight += sourceHeightOffset;
+				if (newHeight < 0) { newHeight = 0; }
+				if (newHeight > 65535) { newHeight = 65535; }
+				if (newHeight > 0 && newHeight > *((uint16_t*)targetPixel)) {
+					*((uint16_t*)targetPixel) = newHeight;
+				}
+			}
+		);
+	}
+}
+static void imageImpl_drawHigher(const ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	assert(image_getWidth(sourceA) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceA) == image_getHeight(sourceHeight));
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		PackOrder targetAPackOrder = image_getPackOrder(targetA);
+		PackOrder sourceAPackOrder = image_getPackOrder(sourceA);
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
+			int32_t newHeight = *((const uint16_t*)sourcePixel1);
+			if (newHeight > 0) {
+				newHeight += sourceHeightOffset;
+				if (newHeight < 0) { newHeight = 0; }
+				if (newHeight > 65535) { newHeight = 65535; }
+				if (newHeight > *((uint16_t*)targetPixel1)) {
+					*((uint16_t*)targetPixel1) = newHeight;
+					targetPixel2[targetAPackOrder.redIndex]   = sourcePixel2[sourceAPackOrder.redIndex];
+					targetPixel2[targetAPackOrder.greenIndex] = sourcePixel2[sourceAPackOrder.greenIndex];
+					targetPixel2[targetAPackOrder.blueIndex]  = sourcePixel2[sourceAPackOrder.blueIndex];
+					targetPixel2[targetAPackOrder.alphaIndex] = sourcePixel2[sourceAPackOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+static void imageImpl_drawHigher(const ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	assert(image_getWidth(sourceA) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceA) == image_getHeight(sourceHeight));
+	assert(image_getWidth(sourceB) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceB) == image_getHeight(sourceHeight));
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		PackOrder targetAPackOrder = image_getPackOrder(targetA);
+		PackOrder targetBPackOrder = image_getPackOrder(targetB);
+		PackOrder sourceAPackOrder = image_getPackOrder(sourceA);
+		PackOrder sourceBPackOrder = image_getPackOrder(sourceB);
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
+		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
+			int32_t newHeight = *((const uint16_t*)sourcePixel1);
+			if (newHeight > 0) {
+				newHeight += sourceHeightOffset;
+				if (newHeight < 0) { newHeight = 0; }
+				if (newHeight > 65535) { newHeight = 65535; }
+				if (newHeight > *((uint16_t*)targetPixel1)) {
+					*((uint16_t*)targetPixel1) = newHeight;
+					targetPixel2[targetAPackOrder.redIndex]   = sourcePixel2[sourceAPackOrder.redIndex];
+					targetPixel2[targetAPackOrder.greenIndex] = sourcePixel2[sourceAPackOrder.greenIndex];
+					targetPixel2[targetAPackOrder.blueIndex]  = sourcePixel2[sourceAPackOrder.blueIndex];
+					targetPixel2[targetAPackOrder.alphaIndex] = sourcePixel2[sourceAPackOrder.alphaIndex];
+					targetPixel3[targetBPackOrder.redIndex]   = sourcePixel3[sourceBPackOrder.redIndex];
+					targetPixel3[targetBPackOrder.greenIndex] = sourcePixel3[sourceBPackOrder.greenIndex];
+					targetPixel3[targetBPackOrder.blueIndex]  = sourcePixel3[sourceBPackOrder.blueIndex];
+					targetPixel3[targetBPackOrder.alphaIndex] = sourcePixel3[sourceBPackOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+
+static void imageImpl_drawHigher(const ImageF32& targetHeight, const ImageF32& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
+			float newHeight = *((const float*)sourcePixel);
+			if (newHeight > -std::numeric_limits<float>::infinity()) {
+				newHeight += sourceHeightOffset;
+				if (newHeight > *((float*)targetPixel)) {
+					*((float*)targetPixel) = newHeight;
+				}
+			}
+		);
+	}
+}
+static void imageImpl_drawHigher(const ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  int32_t left, int32_t top, float sourceHeightOffset) {
+	assert(image_getWidth(sourceA) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceA) == image_getHeight(sourceHeight));
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		PackOrder targetAPackOrder = image_getPackOrder(targetA);
+		PackOrder sourceAPackOrder = image_getPackOrder(sourceA);
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
+			float newHeight = *((const float*)sourcePixel1);
+			if (newHeight > -std::numeric_limits<float>::infinity()) {
+				newHeight += sourceHeightOffset;
+				if (newHeight > *((float*)targetPixel1)) {
+					*((float*)targetPixel1) = newHeight;
+					targetPixel2[targetAPackOrder.redIndex]   = sourcePixel2[sourceAPackOrder.redIndex];
+					targetPixel2[targetAPackOrder.greenIndex] = sourcePixel2[sourceAPackOrder.greenIndex];
+					targetPixel2[targetAPackOrder.blueIndex]  = sourcePixel2[sourceAPackOrder.blueIndex];
+					targetPixel2[targetAPackOrder.alphaIndex] = sourcePixel2[sourceAPackOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+static void imageImpl_drawHigher(const ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
+	assert(image_getWidth(sourceA) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceA) == image_getHeight(sourceHeight));
+	assert(image_getWidth(sourceB) == image_getWidth(sourceHeight));
+	assert(image_getHeight(sourceB) == image_getHeight(sourceHeight));
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		PackOrder targetAPackOrder = image_getPackOrder(targetA);
+		PackOrder targetBPackOrder = image_getPackOrder(targetB);
+		PackOrder sourceAPackOrder = image_getPackOrder(sourceA);
+		PackOrder sourceBPackOrder = image_getPackOrder(sourceB);
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
+		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
+			float newHeight = *((const float*)sourcePixel1);
+			if (newHeight > -std::numeric_limits<float>::infinity()) {
+				newHeight += sourceHeightOffset;
+				if (newHeight > *((float*)targetPixel1)) {
+					*((float*)targetPixel1) = newHeight;
+					targetPixel2[targetAPackOrder.redIndex]   = sourcePixel2[sourceAPackOrder.redIndex];
+					targetPixel2[targetAPackOrder.greenIndex] = sourcePixel2[sourceAPackOrder.greenIndex];
+					targetPixel2[targetAPackOrder.blueIndex]  = sourcePixel2[sourceAPackOrder.blueIndex];
+					targetPixel2[targetAPackOrder.alphaIndex] = sourcePixel2[sourceAPackOrder.alphaIndex];
+					targetPixel3[targetBPackOrder.redIndex]   = sourcePixel3[sourceBPackOrder.redIndex];
+					targetPixel3[targetBPackOrder.greenIndex] = sourcePixel3[sourceBPackOrder.greenIndex];
+					targetPixel3[targetBPackOrder.blueIndex]  = sourcePixel3[sourceBPackOrder.blueIndex];
+					targetPixel3[targetBPackOrder.alphaIndex] = sourcePixel3[sourceBPackOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+
 #define DRAW_COPY_WRAPPER(TARGET_TYPE, SOURCE_TYPE) \
-	void dsr::draw_copy(TARGET_TYPE& target, const SOURCE_TYPE& source, int32_t left, int32_t top) { \
-		if (target && source) { \
-			imageImpl_drawCopy(*target, *source, left, top); \
+	void draw_copy(const TARGET_TYPE& target, const SOURCE_TYPE& source, int32_t left, int32_t top) { \
+		if (image_exists(target) && image_exists(source)) { \
+			imageImpl_drawCopy(target, source, left, top); \
 		} \
 	}
-DRAW_COPY_WRAPPER(ImageRgbaU8, ImageRgbaU8);
 DRAW_COPY_WRAPPER(ImageU8, ImageU8);
-DRAW_COPY_WRAPPER(ImageU16, ImageU16);
-DRAW_COPY_WRAPPER(ImageF32, ImageF32);
-DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU8);
-DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU16);
-DRAW_COPY_WRAPPER(ImageRgbaU8, ImageF32);
 DRAW_COPY_WRAPPER(ImageU8, ImageU16);
 DRAW_COPY_WRAPPER(ImageU8, ImageF32);
 DRAW_COPY_WRAPPER(ImageU16, ImageU8);
+DRAW_COPY_WRAPPER(ImageU16, ImageU16);
 DRAW_COPY_WRAPPER(ImageU16, ImageF32);
 DRAW_COPY_WRAPPER(ImageF32, ImageU8);
 DRAW_COPY_WRAPPER(ImageF32, ImageU16);
+DRAW_COPY_WRAPPER(ImageF32, ImageF32);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU8);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU16);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageF32);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageRgbaU8);
 
-void dsr::draw_alphaFilter(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top) {
-	if (target && source) {
-		imageImpl_drawAlphaFilter(*target, *source, left, top);
+void draw_alphaFilter(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top) {
+	if (image_exists(target) && image_exists(source)) {
+		imageImpl_drawAlphaFilter(target, source, left, top);
 	}
 }
-void dsr::draw_maxAlpha(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
-	if (target && source) {
-		imageImpl_drawMaxAlpha(*target, *source, left, top, sourceAlphaOffset);
+void draw_maxAlpha(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
+	if (image_exists(target) && image_exists(source)) {
+		imageImpl_drawMaxAlpha(target, source, left, top, sourceAlphaOffset);
 	}
 }
-void dsr::draw_alphaClip(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t threshold) {
-	if (target && source) {
-		imageImpl_drawAlphaClip(*target, *source, left, top, threshold);
+void draw_alphaClip(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t threshold) {
+	if (image_exists(target) && image_exists(source)) {
+		imageImpl_drawAlphaClip(target, source, left, top, threshold);
 	}
 }
-void dsr::draw_silhouette(ImageRgbaU8& target, const ImageU8& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
-	if (target && source) {
-		imageImpl_drawSilhouette(*target, *source, color, left, top);
+void draw_silhouette(const ImageRgbaU8& target, const ImageU8& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (image_exists(target) && image_exists(source)) {
+		imageImpl_drawSilhouette(target, source, color, left, top);
 	}
 }
-void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	if (targetHeight && sourceHeight) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, left, top, sourceHeightOffset);
+void draw_higher(const ImageU16& targetHeight, const ImageU16& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (image_exists(targetHeight) && image_exists(sourceHeight)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, left, top, sourceHeightOffset);
 	}
 }
-void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+void draw_higher(const ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
   int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	if (targetHeight && sourceHeight && targetA && sourceA) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, left, top, sourceHeightOffset);
+	if (image_exists(targetHeight) && image_exists(sourceHeight) && image_exists(targetA) && image_exists(sourceA)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, targetA, sourceA, left, top, sourceHeightOffset);
 	}
 }
-void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+void draw_higher(const ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
   ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	if (targetHeight && sourceHeight && targetA && sourceA && targetB && sourceB) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, *targetB, *sourceB, left, top, sourceHeightOffset);
+	if (image_exists(targetHeight) && image_exists(sourceHeight) && image_exists(targetA) && image_exists(sourceA) && image_exists(targetB) && image_exists(sourceB)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, targetA, sourceA, targetB, sourceB, left, top, sourceHeightOffset);
 	}
 }
-void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
-	if (targetHeight && sourceHeight) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, left, top, sourceHeightOffset);
+void draw_higher(const ImageF32& targetHeight, const ImageF32& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
+	if (image_exists(targetHeight) && image_exists(sourceHeight)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, left, top, sourceHeightOffset);
 	}
 }
-void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+void draw_higher(const ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
   int32_t left, int32_t top, float sourceHeightOffset) {
-	if (targetHeight && sourceHeight && targetA && sourceA) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, left, top, sourceHeightOffset);
+	if (image_exists(targetHeight) && image_exists(sourceHeight) && image_exists(targetA) && image_exists(sourceA)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, targetA, sourceA, left, top, sourceHeightOffset);
 	}
 }
-void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+void draw_higher(const ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
   ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
-	if (targetHeight && sourceHeight && targetA && sourceA && targetB && sourceB) {
-		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, *targetB, *sourceB, left, top, sourceHeightOffset);
+	if (image_exists(targetHeight) && image_exists(sourceHeight) && image_exists(targetA) && image_exists(sourceA) && image_exists(targetB) && image_exists(sourceB)) {
+		imageImpl_drawHigher(targetHeight, sourceHeight, targetA, sourceA, targetB, sourceB, left, top, sourceHeightOffset);
 	}
 }
 
+}

+ 74 - 29
Source/DFPSR/api/drawAPI.h

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2020 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,18 +25,62 @@
 #ifndef DFPSR_API_DRAW
 #define DFPSR_API_DRAW
 
-#include "types.h"
+#include "../image/Image.h"
 
 namespace dsr {
 
+// Instead of having lots of arguments for source and target regions, this library uses a system of sub-images to that any drawing method can be cropped.
+//   To limit drawing to a rectangular target region:
+//     * Create a sub-image using image_getSubImage.
+//         0-----------------------------------X
+//         | Parent-image                      |
+//         |                                   |
+//         |       -------------------         |
+//         |      | IRect             |        |
+//         |      |                   |        |
+//         |      |                   |        |
+//         |      |                   |        |
+//         |       -------------------         |
+//         |                                   |
+//         Y-----------------------------------*
+//     * Translate coordinates by subtracting the region's upper left corner.
+//         0-----------------------------------X
+//         | Parent-image                      |
+//         |                                   |
+//         |      0-------------------X        |
+//         |      | Sub-image         |        |
+//         |      |                   |        |
+//         |      |                   |        |
+//         |      |                   |        |
+//         |      Y-------------------*        |
+//         |                                   |
+//         Y-----------------------------------*
+//     * Draw to the new sub-image in the new local coordinate system.
+//         0-------------------X
+//         | Sub-image   /  |  |
+//         |            /|  |__|
+//         |   ________/ |  |  |
+//         |          /  |     |
+//         Y-------------------*
+
 // Drawing shapes
-	void draw_rectangle(ImageU8& image, const IRect& bound, int color);
-	void draw_rectangle(ImageF32& image, const IRect& bound, float color);
-	void draw_rectangle(ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color);
+	// TODO: Create wrappers taking left, top, width, height to reduce clutter from the IRect constructor.
+	void draw_rectangle(const ImageU8& image, const IRect& bound, int color);
+	void draw_rectangle(const ImageU16& image, const IRect& bound, int color);
+	void draw_rectangle(const ImageF32& image, const IRect& bound, float color);
+	void draw_rectangle(const ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color);
+	// Draw using a color that has been packed in advance with the same pack order using the image_saturateAndPack function.
+	//   This saves time on saturation and packing when drawing many rectangles of the same color.
+	void draw_rectangle(const ImageRgbaU8& image, const IRect& bound, uint32_t packedColor);
 
-	void draw_line(ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
-	void draw_line(ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color);
-	void draw_line(ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color);
+	// TODO: Also take two IVector2D as inlined wrapper functions.
+	void draw_line(const ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
+	void draw_line(const ImageU16& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
+	void draw_line(const ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color);
+	void draw_line(const ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color);
+	// Draw using a color that has been packed in advance with the same pack order using the image_saturateAndPack function.
+	//   This saves time on saturation and packing when drawing many lines of the same color.
+	void draw_line(const ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t packedColor);
 
 // Drawing images
 	// Draw an image to another image
@@ -44,28 +88,28 @@ namespace dsr {
 	//   All image types can draw to RgbaU8
 	//   All monochrome types can draw to each other
 	//   The source and target images can be sub-images from the same atlas but only if the sub-regions are not overlapping
-	void draw_copy(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU16& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageF32& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageRgbaU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageRgbaU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageRgbaU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU16& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageU16& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageF32& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
-	void draw_copy(ImageF32& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU16& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageF32& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageRgbaU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageRgbaU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageRgbaU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU16& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageU16& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageF32& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(const ImageF32& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
 	// Draw one RGBA image to another using alpha filtering
 	//   Target alpha does no affect RGB blending, in case that it contains padding for opaque targets
 	//   If you really want to draw to a transparent layer, this method should not be used
-	void draw_alphaFilter(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_alphaFilter(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
 	// Draw one RGBA image to another using the alpha channel as height
 	//   sourceAlphaOffset is added to non-zero heights from source alpha
 	//   Writes each source pixel who's alpha value is greater than the target's
 	//   Zero alpha can be used as a mask, because no source value can be below zero in unsigned color formats
-	void draw_maxAlpha(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t sourceAlphaOffset = 0);
+	void draw_maxAlpha(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t sourceAlphaOffset = 0);
 
 	// Draw between multiple images using a height buffer
 	//   Each source pixel is drawn where the source height's pixel exceeds the target height's pixel
@@ -81,11 +125,11 @@ namespace dsr {
 		ImageU16& targetHeight, const ImageU16& sourceHeight,
 		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
 	);
-	void draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight,
+	void draw_higher(const ImageU16& targetHeight, const ImageU16& sourceHeight,
 		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
 		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
 	);
-	void draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight,
+	void draw_higher(const ImageU16& targetHeight, const ImageU16& sourceHeight,
 		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
 		ImageRgbaU8& targetB, const ImageRgbaU8& sourceB,
 		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
@@ -99,21 +143,22 @@ namespace dsr {
 		ImageF32& targetHeight, const ImageF32& sourceHeight,
 		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
 	);
-	void draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight,
+	void draw_higher(const ImageF32& targetHeight, const ImageF32& sourceHeight,
 		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
 		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
 	);
-	void draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight,
+	void draw_higher(const ImageF32& targetHeight, const ImageF32& sourceHeight,
 		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
 		ImageRgbaU8& targetB, const ImageRgbaU8& sourceB,
 		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
 	);
 
+	// TODO: Inlined wrappers using IVector2D.
 	// Draw one RGBA image to another using alpha clipping
 	//   Source is solid where alpha is greater than threshold, which can be used for animations
-	void draw_alphaClip(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t threshold = 127);
+	void draw_alphaClip(const ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t threshold = 127);
 	// Draw a uniform color using a grayscale silhouette as the alpha channel
-	void draw_silhouette(ImageRgbaU8& target, const ImageU8& silhouette, const ColorRgbaI32& color, int32_t left = 0, int32_t top = 0);
+	void draw_silhouette(const ImageRgbaU8& target, const ImageU8& silhouette, const ColorRgbaI32& color, int32_t left = 0, int32_t top = 0);
 
 }
 

+ 3 - 3
Source/DFPSR/api/fileAPI.cpp

@@ -703,7 +703,7 @@ struct DsrProcessImpl {
 };
 
 DsrProcessStatus process_getStatus(const DsrProcess &process) {
-	if (process.get() == nullptr) {
+	if (process.isNull()) {
 		return DsrProcessStatus::NotStarted;
 	} else {
 		if (!process->terminated) {
@@ -765,7 +765,7 @@ DsrProcess process_execute(const ReadableString& programPath, List<String> argum
 		memset(&processInfo, 0, sizeof(PROCESS_INFORMATION));
 		startInfo.cb = sizeof(STARTUPINFO);
 		if (CreateProcessW(nullptr, (LPWSTR)nativeArgs, nullptr, nullptr, true, 0, nullptr, nullptr, &startInfo, &processInfo)) {
-			return std::make_shared<DsrProcessImpl>(processInfo); // Success
+			return handle_create<DsrProcessImpl>(processInfo).setName("DSR Process"); // Success
 		} else {
 			return DsrProcess(); // Failure
 		}
@@ -790,7 +790,7 @@ DsrProcess process_execute(const ReadableString& programPath, List<String> argum
 		argv[currentArg] = nullptr;
 		pid_t pid = 0;
 		if (posix_spawn(&pid, nativePath, nullptr, nullptr, (char**)argv.getUnsafe(), environ) == 0) {
-			return std::make_shared<DsrProcessImpl>(pid); // Success
+			return handle_create<DsrProcessImpl>(pid).setName("DSR Process"); // Success
 		} else {
 			return DsrProcess(); // Failure
 		}

+ 2 - 1
Source/DFPSR/api/fileAPI.h

@@ -26,6 +26,7 @@
 
 #include "stringAPI.h"
 #include "bufferAPI.h"
+#include "../base/Handle.h"
 #if defined(WIN32) || defined(_WIN32)
 	#define USE_MICROSOFT_WINDOWS
 #endif
@@ -290,7 +291,7 @@ namespace dsr {
 
 	// A reference counted handle to a process, so that multiple callers can read the status at any time.
 	class DsrProcessImpl;
-	using DsrProcess = std::shared_ptr<DsrProcessImpl>;
+	using DsrProcess = Handle<DsrProcessImpl>;
 
 	// Post-condition: Returns the status of process.
 	DsrProcessStatus process_getStatus(const DsrProcess &process);

+ 769 - 50
Source/DFPSR/api/filterAPI.cpp

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,53 +22,771 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+// TODO: Optimize and clean up using template programming to automatically unpack image data in advance for easy access.
+//       Create reusable inline functions for fast pixel sampling in a separate header while prototyping.
 
 #include <cassert>
-#include "imageAPI.h"
 #include "filterAPI.h"
-#include "../image/draw.h"
+#include "imageAPI.h"
+#include "drawAPI.h"
 #include "../image/PackOrder.h"
-#include "../image/internal/imageTemplate.h"
-#include "../image/internal/imageInternal.h"
+#include "../base/simd.h"
+
+namespace dsr {
+
+static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
+	return U32x4(color.red, color.green, color.blue, color.alpha);
+}
+
+static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
+	UVector4D vResult = color.get();
+	return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
+}
+
+// Uniform linear interpolation of colors from a 16-bit sub-pixel weight
+// Pre-condition0 <= fineRatio <= 65536
+// Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
+static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
+	uint16_t ratio = (uint16_t)bitShiftRightImmediate<8>(fineRatio);
+	uint16_t invRatio = 256 - ratio;
+	U16x8 weightA = U16x8(invRatio);
+	U16x8 weightB = U16x8(ratio);
+	U32x4 lowMask(0x00FF00FFu);
+	U16x8 lowColorA = U16x8(colorA & lowMask);
+	U16x8 lowColorB = U16x8(colorB & lowMask);
+	U32x4 highMask(0xFF00FF00u);
+	U16x8 highColorA = bitShiftRightImmediate<8>(U16x8((colorA & highMask)));
+	U16x8 highColorB = bitShiftRightImmediate<8>(U16x8((colorB & highMask)));
+	U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
+	U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
+	return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
+}
+
+// TODO: Use wrappers around images to get the needed information unpacked in advance for faster reading of pixels.
+#define READ_RGBAU8_CLAMP(X,Y) image_readPixel_clamp(source, X, Y)
+#define READ_RGBAU8_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_RGBAU8_CLAMP(X,Y))
+
+// Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
+static const uint32_t interpolationFullPixel = 65536;
+static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
+// Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
+static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
+
+template <bool BILINEAR>
+static uint32_t samplePixel(const ImageRgbaU8& target, const ImageRgbaU8& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
+	if (BILINEAR) {
+		uint32_t upperRatio = 65536 - lowerRatio;
+		uint32_t leftRatio = 65536 - rightRatio;
+		U32x4 vUpperLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY);
+		U32x4 vUpperRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY);
+		U32x4 vLowerLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY + 1);
+		U32x4 vLowerRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY + 1);
+		U32x4 vLeftRatio = U32x4(leftRatio);
+		U32x4 vRightRatio = U32x4(rightRatio);
+		U32x4 vUpperColor = bitShiftRightImmediate<16>((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio));
+		U32x4 vLowerColor = bitShiftRightImmediate<16>((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio));
+		U32x4 vCenterColor = bitShiftRightImmediate<16>((vUpperColor * upperRatio) + (vLowerColor * lowerRatio));
+		return image_saturateAndPack(target, U32x4_to_ColorRgbaI32(vCenterColor));
+	} else {
+		return image_saturateAndPack(target, image_readPixel_clamp(source, leftX, upperY));
+	}
+}
+
+template <bool BILINEAR>
+static uint8_t samplePixel(const ImageU8& target, const ImageU8& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
+	if (BILINEAR) {
+		uint32_t upperRatio = 65536 - lowerRatio;
+		uint32_t leftRatio = 65536 - rightRatio;
+		uint32_t upperLeftLuma = image_readPixel_clamp(source, leftX, upperY);
+		uint32_t upperRightLuma = image_readPixel_clamp(source, leftX + 1, upperY);
+		uint32_t lowerLeftLuma = image_readPixel_clamp(source, leftX, upperY + 1);
+		uint32_t lowerRightLuma = image_readPixel_clamp(source, leftX + 1, upperY + 1);
+		uint32_t upperLuma = bitShiftRightImmediate<16>((upperLeftLuma * leftRatio) + (upperRightLuma * rightRatio));
+		uint32_t lowerLuma = bitShiftRightImmediate<16>((lowerLeftLuma * leftRatio) + (lowerRightLuma * rightRatio));
+		return bitShiftRightImmediate<16>((upperLuma * upperRatio) + (lowerLuma * lowerRatio));
+	} else {
+		return image_readPixel_clamp(source, leftX, upperY);
+	}
+}
+
+// BILINEAR: Enables linear interpolation
+// scaleRegion:
+//     The stretched location of the source image in the target image
+//     Making it smaller than the target image will fill the outside with stretched pixels
+//     Allowing the caller to crop away parts of the source image that aren't interesting
+//     Can be used to round the region to a multiple of the input size for a fixed pixel size
+template <bool BILINEAR, typename IMAGE_TYPE, typename PIXEL_TYPE>
+static void resize_reference(const IMAGE_TYPE& target, const IMAGE_TYPE& source, const IRect& scaleRegion) {
+	// Reference implementation
+
+	// Offset in source pixels per target pixel
+	int32_t offsetX = interpolationFullPixel * image_getWidth(source) / scaleRegion.width();
+	int32_t offsetY = interpolationFullPixel * image_getHeight(source) / scaleRegion.height();
+	int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
+	int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
+	if (BILINEAR) {
+		startX -= interpolationHalfPixel;
+		startY -= interpolationHalfPixel;
+	}
+	SafePointer<PIXEL_TYPE> targetRow = image_getSafePointer<PIXEL_TYPE>(target);
+	int32_t readY = startY;
+	for (int32_t y = 0; y < image_getHeight(target); y++) {
+		int32_t naturalY = readY;
+		if (naturalY < 0) { naturalY = 0; }
+		uint32_t sampleY = (uint32_t)naturalY;
+		uint32_t upperY = bitShiftRightImmediate<16>(sampleY);
+		uint32_t lowerRatio = sampleY & interpolationWeightMask;
+		SafePointer<PIXEL_TYPE> targetPixel = targetRow;
+		int32_t readX = startX;
+		for (int32_t x = 0; x < image_getWidth(target); x++) {
+			int32_t naturalX = readX;
+			if (naturalX < 0) { naturalX = 0; }
+			uint32_t sampleX = (uint32_t)naturalX;
+			uint32_t leftX = bitShiftRightImmediate<16>(sampleX);
+			uint32_t rightRatio = sampleX & interpolationWeightMask;
+			*targetPixel = samplePixel<BILINEAR>(target, source, leftX, upperY, rightRatio, lowerRatio);
+			targetPixel += 1;
+			readX += offsetX;
+		}
+		targetRow.increaseBytes(image_getStride(target));
+		readY += offsetY;
+	}
+}
 
-using namespace dsr;
+template <bool BILINEAR, bool SIMD_ALIGNED>
+static void resize_optimized(const ImageRgbaU8& target, const ImageRgbaU8& source, const IRect& scaleRegion) {
+	// Get source information
+	// Compare dimensions
+	const bool sameWidth = image_getWidth(source) == scaleRegion.width() && scaleRegion.left() == 0;
+	const bool sameHeight = image_getHeight(source) == scaleRegion.height() && scaleRegion.top() == 0;
+	const bool samePackOrder = image_getPackOrderIndex(target) == image_getPackOrderIndex(source);
+	if (sameWidth && sameHeight) {
+		// No need to resize, just make a copy to save time
+		draw_copy(target, source);
+	} else if (sameWidth && (samePackOrder || BILINEAR)) {
+		// Only vertical interpolation
 
+		// Offset in source pixels per target pixel
+		int32_t offsetY = interpolationFullPixel * image_getHeight(source) / scaleRegion.height();
+		int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
+		if (BILINEAR) {
+			startY -= interpolationHalfPixel;
+		}
+		SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
+		int32_t readY = startY;
+		for (int32_t y = 0; y < image_getHeight(target); y++) {
+			int32_t naturalY = readY;
+			if (naturalY < 0) { naturalY = 0; }
+			uint32_t sampleY = (uint32_t)naturalY;
+			uint32_t upperY = bitShiftRightImmediate<16>(sampleY);
+			uint32_t lowerY = upperY + 1;
+			if (upperY >= (uint32_t)image_getHeight(source)) upperY = image_getHeight(source) - 1;
+			if (lowerY >= (uint32_t)image_getHeight(source)) lowerY = image_getHeight(source) - 1;
+			if (BILINEAR) {
+				uint32_t lowerRatio = sampleY & interpolationWeightMask;
+				uint32_t upperRatio = 65536 - lowerRatio;
+				SafePointer<uint32_t> targetPixel = targetRow;
+				if (SIMD_ALIGNED) {
+					SafePointer<const uint32_t> sourceRowUpper = image_getSafePointer<uint32_t>(source, upperY);
+					SafePointer<const uint32_t> sourceRowLower = image_getSafePointer<uint32_t>(source, lowerY);
+					for (int32_t x = 0; x < image_getWidth(target); x += 4) {
+						ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
+						ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
+						ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
+						vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
+						sourceRowUpper += 4;
+						sourceRowLower += 4;
+						targetPixel += 4;
+					}
+				} else {
+					for (int32_t x = 0; x < image_getWidth(target); x++) {
+						ALIGN16 U32x4 vUpperColor = READ_RGBAU8_CLAMP_SIMD(x, upperY);
+						ALIGN16 U32x4 vLowerColor = READ_RGBAU8_CLAMP_SIMD(x, lowerY);
+						ALIGN16 U32x4 vCenterColor = bitShiftRightImmediate<16>((vUpperColor * upperRatio) + (vLowerColor * lowerRatio));
+						ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
+						*targetPixel = image_saturateAndPack(target, finalColor);
+						targetPixel += 1;
+					}
+				}
+			} else {
+				SafePointer<const uint32_t> sourceRowUpper = image_getSafePointer<uint32_t>(source, upperY);
+				// Nearest neighbor sampling from a same width can be done using one copy per row
+				safeMemoryCopy(targetRow, sourceRowUpper, image_getWidth(source) * 4);
+			}
+			targetRow.increaseBytes(image_getStride(target));
+			readY += offsetY;
+		}
+	} else if (sameHeight) {
+		// Only horizontal interpolation
 
-// -------------------------------- Image generation and filtering --------------------------------
+		// Offset in source pixels per target pixel
+		int32_t offsetX = interpolationFullPixel * image_getWidth(source) / scaleRegion.width();
+		int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
+		if (BILINEAR) {
+			startX -= interpolationHalfPixel;
+		}
+		SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
+		for (int32_t y = 0; y < image_getHeight(target); y++) {
+			SafePointer<uint32_t> targetPixel = targetRow;
+			int32_t readX = startX;
+			for (int32_t x = 0; x < image_getWidth(target); x++) {
+				int32_t naturalX = readX;
+				if (naturalX < 0) { naturalX = 0; }
+				uint32_t sampleX = (uint32_t)naturalX;
+				uint32_t leftX = bitShiftRightImmediate<16>(sampleX);
+				uint32_t rightX = leftX + 1;
+				uint32_t rightRatio = sampleX & interpolationWeightMask;
+				uint32_t leftRatio = 65536 - rightRatio;
+				ColorRgbaI32 finalColor;
+				if (BILINEAR) {
+					ALIGN16 U32x4 vLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, y);
+					ALIGN16 U32x4 vRightColor = READ_RGBAU8_CLAMP_SIMD(rightX, y);
+					ALIGN16 U32x4 vCenterColor = bitShiftRightImmediate<16>((vLeftColor * leftRatio) + (vRightColor * rightRatio));
+					finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
+				} else {
+					finalColor = READ_RGBAU8_CLAMP(leftX, y);
+				}
+				*targetPixel = image_saturateAndPack(target, finalColor);
+				targetPixel += 1;
+				readX += offsetX;
+			}
+			targetRow.increaseBytes(image_getStride(target));
+		}
+	} else {
+		// Call the reference implementation
+		resize_reference<BILINEAR, ImageRgbaU8, uint32_t>(target, source, scaleRegion);
+	}
+}
 
+// Converting run-time flags into compile-time constants
+static void resize_aux(const ImageRgbaU8& target, const ImageRgbaU8& source, bool interpolate, const IRect& scaleRegion) {
+	// If writing to padding is allowed and both images are 16-byte aligned with the same pack order
+	if (!(image_isSubImage(source) || image_isSubImage(target))) {
+		// SIMD resize allowed
+		if (interpolate) {
+			resize_optimized<true, true>(target, source, scaleRegion);
+		} else {
+			resize_optimized<false, true>(target, source, scaleRegion);
+		}
+	} else {
+		// Non-SIMD resize
+		if (interpolate) {
+			resize_optimized<true, false>(target, source, scaleRegion);
+		} else {
+			resize_optimized<false, false>(target, source, scaleRegion);
+		}
+	}
+}
+
+// TODO: Optimize monochrome resizing.
+static void resize_aux(const ImageU8& target, const ImageU8& source, bool interpolate, const IRect& scaleRegion) {
+	if (interpolate) {
+		resize_reference<true, ImageU8, uint8_t>(target, source, scaleRegion);
+	} else {
+		resize_reference<false, ImageU8, uint8_t>(target, source, scaleRegion);
+	}
+}
+
+// Creating an image to replacedImage with the same pack order as originalImage when applicable to the image format.
+static ImageRgbaU8 createWithSamePackOrder(const ImageRgbaU8& originalImage, int32_t width, int32_t height) {
+	return image_create_RgbaU8_native(width, height, image_getPackOrderIndex(originalImage));
+}
+static ImageU8 createWithSamePackOrder(const ImageU8& originalImage, int32_t width, int32_t height) {
+	return image_create_U8(width, height);
+}
+
+template <typename IMAGE_TYPE>
+void resizeToTarget(IMAGE_TYPE& target, const IMAGE_TYPE& source, bool interpolate) {
+	IRect scaleRegion = image_getBound(target);
+	if (image_getWidth(target) != image_getWidth(source) && image_getHeight(target) > image_getHeight(source)) {
+		// Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
+		int tempWidth = image_getWidth(target);
+		int tempHeight = image_getHeight(source);
+		IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), image_getHeight(source));
+		// Create a temporary buffer.
+		IMAGE_TYPE newTempImage = createWithSamePackOrder(target, tempWidth, tempHeight);
+		resize_aux(newTempImage, source, interpolate, tempScaleRegion);
+		resize_aux(target, newTempImage, interpolate, scaleRegion);
+	} else {
+		// Downscaling or only changing one dimension is faster in one step.
+		resize_aux(target, source, interpolate, scaleRegion);
+	}
+}
 
-static void mapRgbaU8(ImageRgbaU8Impl& target, const ImageGenRgbaU8& lambda, int startX, int startY) {
-	const int targetWidth = target.width;
-	const int targetHeight = target.height;
-	const int targetStride = target.stride;
-	SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target);
+template <bool CONVERT_COLOR>
+static inline uint32_t convertRead(const ImageRgbaU8& target, const ImageRgbaU8& source, int x, int y) {
+	uint32_t result = image_readPixel_clamp_packed(source, x, y);
+	if (CONVERT_COLOR) {
+		result = image_truncateAndPack(target, image_unpack(source, result));
+	}
+	return result;
+}
+
+// Used for drawing large pixels
+static inline void fillRectangle(const ImageRgbaU8& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const uint32_t& packedColor) {
+	SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target, pixelTop) + pixelLeft;
+	for (int y = pixelTop; y < pixelBottom; y++) {
+		SafePointer<uint32_t> targetPixel = targetRow;
+		for (int x = pixelLeft; x < pixelRight; x++) {
+			*targetPixel = packedColor;
+			targetPixel += 1;
+		}
+		targetRow.increaseBytes(image_getStride(target));
+	}
+}
+
+template <bool CONVERT_COLOR>
+static void blockMagnify_reference(
+  const ImageRgbaU8& target, const ImageRgbaU8& source,
+  int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
+	int sourceY = 0;
+	int maxSourceX = image_getWidth(source) - 1;
+	int maxSourceY = image_getHeight(source) - 1;
+	if (clipWidth > image_getWidth(target)) { clipWidth = image_getWidth(target); }
+	if (clipHeight > image_getHeight(target)) { clipHeight = image_getHeight(target); }
+	for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
+		int sourceX = 0;
+		for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
+			// Read the pixel once
+			uint32_t sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
+			// Write to all target pixels in a conditionless loop
+			fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
+			// Iterate and clamp the read coordinate
+			sourceX++;
+			if (sourceX > maxSourceX) { sourceX = maxSourceX; }
+		}
+		// Iterate and clamp the read coordinate
+		sourceY++;
+		if (sourceY > maxSourceY) { sourceY = maxSourceY; }
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 2 == 0
+//   * clipHeight % 2 == 0
+static void blockMagnify_2x2(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	int blockTargetStride = image_getStride(target) * 2;
+	for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		// Write to whole multiples of 8 pixels
+		int writeLeftX = 0;
+		while (writeLeftX + 2 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue;
+			targetPixelA += 2;
+			targetPixelB += 2;
+			// Count
+			writeLeftX += 2;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 3 == 0
+//   * clipHeight % 3 == 0
+static void blockMagnify_3x3(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	int blockTargetStride = image_getStride(target) * 3;
+	for (int upperTargetY = 0; upperTargetY + 3 <= clipHeight; upperTargetY+=3) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		int writeLeftX = 0;
+		while (writeLeftX + 3 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue;
+			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue;
+			targetPixelA += 3;
+			targetPixelB += 3;
+			targetPixelC += 3;
+			// Count
+			writeLeftX += 3;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+		targetRowC.increaseBytes(blockTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 4 == 0
+//   * clipHeight % 4 == 0
+static void blockMagnify_4x4(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
+	int quadTargetStride = image_getStride(target) * 4;
+	for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		int writeLeftX = 0;
+		while (writeLeftX + 4 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Convert scalar to SIMD vector of 4 repeated pixels
+			ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
+			// Write to 4x4 pixels using 4 SIMD writes
+			sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
+			sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
+			sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
+			sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
+			targetPixelA += 4;
+			targetPixelB += 4;
+			targetPixelC += 4;
+			targetPixelD += 4;
+			// Count
+			writeLeftX += 4;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(quadTargetStride);
+		targetRowB.increaseBytes(quadTargetStride);
+		targetRowC.increaseBytes(quadTargetStride);
+		targetRowD.increaseBytes(quadTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 5 == 0
+//   * clipHeight % 5 == 0
+static void blockMagnify_5x5(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
+	SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
+	int blockTargetStride = image_getStride(target) * 5;
+	for (int upperTargetY = 0; upperTargetY + 5 <= clipHeight; upperTargetY+=5) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		SafePointer<uint32_t> targetPixelE = targetRowE;
+		int writeLeftX = 0;
+		while (writeLeftX + 5 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue;
+			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue;
+			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue;
+			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue;
+			targetPixelA += 5;
+			targetPixelB += 5;
+			targetPixelC += 5;
+			targetPixelD += 5;
+			targetPixelE += 5;
+			// Count
+			writeLeftX += 5;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+		targetRowC.increaseBytes(blockTargetStride);
+		targetRowD.increaseBytes(blockTargetStride);
+		targetRowE.increaseBytes(blockTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 6 == 0
+//   * clipHeight % 6 == 0
+static void blockMagnify_6x6(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
+	SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
+	SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
+	int blockTargetStride = image_getStride(target) * 6;
+	for (int upperTargetY = 0; upperTargetY + 6 <= clipHeight; upperTargetY+=6) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		SafePointer<uint32_t> targetPixelE = targetRowE;
+		SafePointer<uint32_t> targetPixelF = targetRowF;
+		int writeLeftX = 0;
+		while (writeLeftX + 6 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue;
+			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue;
+			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue;
+			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue;
+			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue;
+			targetPixelA += 6;
+			targetPixelB += 6;
+			targetPixelC += 6;
+			targetPixelD += 6;
+			targetPixelE += 6;
+			targetPixelF += 6;
+			// Count
+			writeLeftX += 6;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+		targetRowC.increaseBytes(blockTargetStride);
+		targetRowD.increaseBytes(blockTargetStride);
+		targetRowE.increaseBytes(blockTargetStride);
+		targetRowF.increaseBytes(blockTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 7 == 0
+//   * clipHeight % 7 == 0
+static void blockMagnify_7x7(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
+	SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
+	SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
+	SafePointer<uint32_t> targetRowG = image_getSafePointer<uint32_t>(target, 6);
+	int blockTargetStride = image_getStride(target) * 7;
+	for (int upperTargetY = 0; upperTargetY + 7 <= clipHeight; upperTargetY+=7) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		SafePointer<uint32_t> targetPixelE = targetRowE;
+		SafePointer<uint32_t> targetPixelF = targetRowF;
+		SafePointer<uint32_t> targetPixelG = targetRowG;
+		int writeLeftX = 0;
+		while (writeLeftX + 7 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue;
+			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue;
+			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue;
+			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue;
+			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue;
+			targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue;
+			targetPixelA += 7;
+			targetPixelB += 7;
+			targetPixelC += 7;
+			targetPixelD += 7;
+			targetPixelE += 7;
+			targetPixelF += 7;
+			targetPixelG += 7;
+			// Count
+			writeLeftX += 7;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+		targetRowC.increaseBytes(blockTargetStride);
+		targetRowD.increaseBytes(blockTargetStride);
+		targetRowE.increaseBytes(blockTargetStride);
+		targetRowF.increaseBytes(blockTargetStride);
+		targetRowG.increaseBytes(blockTargetStride);
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 8 == 0
+//   * clipHeight % 8 == 0
+static void blockMagnify_8x8(const ImageRgbaU8& target, const ImageRgbaU8& source, int clipWidth, int clipHeight) {
+	SafePointer<const uint32_t> sourceRow = image_getSafePointer<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = image_getSafePointer<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = image_getSafePointer<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = image_getSafePointer<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = image_getSafePointer<uint32_t>(target, 3);
+	SafePointer<uint32_t> targetRowE = image_getSafePointer<uint32_t>(target, 4);
+	SafePointer<uint32_t> targetRowF = image_getSafePointer<uint32_t>(target, 5);
+	SafePointer<uint32_t> targetRowG = image_getSafePointer<uint32_t>(target, 6);
+	SafePointer<uint32_t> targetRowH = image_getSafePointer<uint32_t>(target, 7);
+	int blockTargetStride = image_getStride(target) * 8;
+	for (int upperTargetY = 0; upperTargetY + 8 <= clipHeight; upperTargetY+=8) {
+		// Carriage return
+		SafePointer<const uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		SafePointer<uint32_t> targetPixelE = targetRowE;
+		SafePointer<uint32_t> targetPixelF = targetRowF;
+		SafePointer<uint32_t> targetPixelG = targetRowG;
+		SafePointer<uint32_t> targetPixelH = targetRowH;
+		int writeLeftX = 0;
+		while (writeLeftX + 8 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Write to a whole block of pixels
+			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue; targetPixelA[7] = scalarValue;
+			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue; targetPixelB[7] = scalarValue;
+			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue; targetPixelC[7] = scalarValue;
+			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue; targetPixelD[7] = scalarValue;
+			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue; targetPixelE[7] = scalarValue;
+			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue; targetPixelF[7] = scalarValue;
+			targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue; targetPixelG[7] = scalarValue;
+			targetPixelH[0] = scalarValue; targetPixelH[1] = scalarValue; targetPixelH[2] = scalarValue; targetPixelH[3] = scalarValue; targetPixelH[4] = scalarValue; targetPixelH[5] = scalarValue; targetPixelH[6] = scalarValue; targetPixelH[7] = scalarValue;
+			targetPixelA += 8;
+			targetPixelB += 8;
+			targetPixelC += 8;
+			targetPixelD += 8;
+			targetPixelE += 8;
+			targetPixelF += 8;
+			targetPixelG += 8;
+			targetPixelH += 8;
+			// Count
+			writeLeftX += 8;
+		}
+		// Line feed
+		sourceRow.increaseBytes(image_getStride(source));
+		targetRowA.increaseBytes(blockTargetStride);
+		targetRowB.increaseBytes(blockTargetStride);
+		targetRowC.increaseBytes(blockTargetStride);
+		targetRowD.increaseBytes(blockTargetStride);
+		targetRowE.increaseBytes(blockTargetStride);
+		targetRowF.increaseBytes(blockTargetStride);
+		targetRowG.increaseBytes(blockTargetStride);
+		targetRowH.increaseBytes(blockTargetStride);
+	}
+}
+
+static void blackEdges(const ImageRgbaU8& target, int excludedWidth, int excludedHeight) {
+	// Right side
+	draw_rectangle(target, IRect(excludedWidth, 0, image_getWidth(target) - excludedWidth, excludedHeight), 0);
+	// Bottom and corner
+	draw_rectangle(target, IRect(0, excludedHeight, image_getWidth(target), image_getHeight(target) - excludedHeight), 0);
+}
+
+static void imageImpl_blockMagnify(const ImageRgbaU8& target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
+	if (pixelWidth < 1) { pixelWidth = 1; }
+	if (pixelHeight < 1) { pixelHeight = 1; }
+	bool sameOrder = image_getPackOrderIndex(target) == image_getPackOrderIndex(source);
+	// Find the part of source which fits into target with whole pixels
+	int clipWidth = roundDown(min(image_getWidth(target), image_getWidth(source) * pixelWidth), pixelWidth);
+	int clipHeight = roundDown(min(image_getHeight(target), image_getHeight(source) * pixelHeight), pixelHeight);
+	if (sameOrder) {
+		if (!(image_isSubImage(source) || image_isSubImage(target))) {
+			if (pixelWidth == 2 && pixelHeight == 2) {
+				blockMagnify_2x2(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 3 && pixelHeight == 3) {
+				blockMagnify_3x3(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 4 && pixelHeight == 4) {
+				blockMagnify_4x4(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 5 && pixelHeight == 5) {
+				blockMagnify_5x5(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 6 && pixelHeight == 6) {
+				blockMagnify_6x6(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 7 && pixelHeight == 7) {
+				blockMagnify_7x7(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 8 && pixelHeight == 8) {
+				blockMagnify_8x8(target, source, clipWidth, clipHeight);
+			} else {
+				blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+			}
+		} else {
+			blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+		}
+	} else {
+		blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+	}
+	blackEdges(target, clipWidth, clipHeight);
+}
+
+static void mapRgbaU8(const ImageRgbaU8& target, const ImageGenRgbaU8& lambda, int startX, int startY) {
+	const int targetWidth = image_getWidth(target);
+	const int targetHeight = image_getHeight(target);
+	const int targetStride = image_getStride(target);
+	SafePointer<uint32_t> targetRow = image_getSafePointer<uint32_t>(target);
 	for (int y = startY; y < targetHeight + startY; y++) {
-		SafePointer<Color4xU8> targetPixel = targetRow;
+		SafePointer<uint32_t> targetPixel = targetRow;
 		for (int x = startX; x < targetWidth + startX; x++) {
-			*targetPixel = target.packRgba(lambda(x, y).saturate());
+			*targetPixel = image_saturateAndPack(target, lambda(x, y));
 			targetPixel += 1;
 		}
 		targetRow.increaseBytes(targetStride);
 	}
 }
-void dsr::filter_mapRgbaU8(ImageRgbaU8 target, const ImageGenRgbaU8& lambda, int startX, int startY) {
-	if (target.get() != nullptr) {
-		mapRgbaU8(*target, lambda, startX, startY);
+void filter_mapRgbaU8(const ImageRgbaU8 target, const ImageGenRgbaU8& lambda, int startX, int startY) {
+	if (image_exists(target)) {
+		mapRgbaU8(target, lambda, startX, startY);
 	}
 }
-OrderedImageRgbaU8 dsr::filter_generateRgbaU8(int width, int height, const ImageGenRgbaU8& lambda, int startX, int startY) {
+OrderedImageRgbaU8 filter_generateRgbaU8(int width, int height, const ImageGenRgbaU8& lambda, int startX, int startY) {
 	OrderedImageRgbaU8 result = image_create_RgbaU8(width, height);
 	filter_mapRgbaU8(result, lambda, startX, startY);
 	return result;
 }
 
 template <typename IMAGE_TYPE, typename PIXEL_TYPE, int MIN_VALUE, int MAX_VALUE>
-static void mapMonochrome(IMAGE_TYPE& target, const ImageGenI32& lambda, int startX, int startY) {
-	const int targetWidth = target.width;
-	const int targetHeight = target.height;
-	const int targetStride = target.stride;
-	SafePointer<PIXEL_TYPE> targetRow = imageInternal::getSafeData<PIXEL_TYPE>(target);
+static void mapMonochrome(const IMAGE_TYPE& target, const ImageGenI32& lambda, int startX, int startY) {
+	const int targetWidth = image_getWidth(target);
+	const int targetHeight = image_getHeight(target);
+	const int targetStride = image_getStride(target);
+	SafePointer<PIXEL_TYPE> targetRow = image_getSafePointer<PIXEL_TYPE>(target);
 	for (int y = startY; y < targetHeight + startY; y++) {
 		SafePointer<PIXEL_TYPE> targetPixel = targetRow;
 		for (int x = startX; x < targetWidth + startX; x++) {
@@ -81,32 +799,32 @@ static void mapMonochrome(IMAGE_TYPE& target, const ImageGenI32& lambda, int sta
 		targetRow.increaseBytes(targetStride);
 	}
 }
-void dsr::filter_mapU8(ImageU8 target, const ImageGenI32& lambda, int startX, int startY) {
-	if (target.get() != nullptr) {
-		mapMonochrome<ImageU8Impl, uint8_t, 0, 255>(*target, lambda, startX, startY);
+void filter_mapU8(const ImageU8 target, const ImageGenI32& lambda, int startX, int startY) {
+	if (image_exists(target)) {
+		mapMonochrome<ImageU8, uint8_t, 0, 255>(target, lambda, startX, startY);
 	}
 }
-AlignedImageU8 dsr::filter_generateU8(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
+AlignedImageU8 filter_generateU8(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
 	AlignedImageU8 result = image_create_U8(width, height);
 	filter_mapU8(result, lambda, startX, startY);
 	return result;
 }
-void dsr::filter_mapU16(ImageU16 target, const ImageGenI32& lambda, int startX, int startY) {
-	if (target.get() != nullptr) {
-		mapMonochrome<ImageU16Impl, uint16_t, 0, 65535>(*target, lambda, startX, startY);
+void filter_mapU16(const ImageU16 target, const ImageGenI32& lambda, int startX, int startY) {
+	if (image_exists(target)) {
+		mapMonochrome<ImageU16, uint16_t, 0, 65535>(target, lambda, startX, startY);
 	}
 }
-AlignedImageU16 dsr::filter_generateU16(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
+AlignedImageU16 filter_generateU16(int width, int height, const ImageGenI32& lambda, int startX, int startY) {
 	AlignedImageU16 result = image_create_U16(width, height);
 	filter_mapU16(result, lambda, startX, startY);
 	return result;
 }
 
-static void mapF32(ImageF32Impl& target, const ImageGenF32& lambda, int startX, int startY) {
-	const int targetWidth = target.width;
-	const int targetHeight = target.height;
-	const int targetStride = target.stride;
-	SafePointer<float> targetRow = imageInternal::getSafeData<float>(target);
+static void mapF32(const ImageF32& target, const ImageGenF32& lambda, int startX, int startY) {
+	const int targetWidth = image_getWidth(target);
+	const int targetHeight = image_getHeight(target);
+	const int targetStride = image_getStride(target);
+	SafePointer<float> targetRow = image_getSafePointer<float>(target);
 	for (int y = startY; y < targetHeight + startY; y++) {
 		SafePointer<float> targetPixel = targetRow;
 		for (int x = startX; x < targetWidth + startX; x++) {
@@ -116,12 +834,12 @@ static void mapF32(ImageF32Impl& target, const ImageGenF32& lambda, int startX,
 		targetRow.increaseBytes(targetStride);
 	}
 }
-void dsr::filter_mapF32(ImageF32 target, const ImageGenF32& lambda, int startX, int startY) {
-	if (target.get() != nullptr) {
-		mapF32(*target, lambda, startX, startY);
+void filter_mapF32(const ImageF32 target, const ImageGenF32& lambda, int startX, int startY) {
+	if (image_exists(target)) {
+		mapF32(target, lambda, startX, startY);
 	}
 }
-AlignedImageF32 dsr::filter_generateF32(int width, int height, const ImageGenF32& lambda, int startX, int startY) {
+AlignedImageF32 filter_generateF32(int width, int height, const ImageGenF32& lambda, int startX, int startY) {
 	AlignedImageF32 result = image_create_F32(width, height);
 	filter_mapF32(result, lambda, startX, startY);
 	return result;
@@ -131,29 +849,30 @@ AlignedImageF32 dsr::filter_generateF32(int width, int height, const ImageGenF32
 // -------------------------------- Resize --------------------------------
 
 
-OrderedImageRgbaU8 dsr::filter_resize(const ImageRgbaU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
-	if (source.get() != nullptr) {
+OrderedImageRgbaU8 filter_resize(const ImageRgbaU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
+	if (image_exists(source)) {
 		OrderedImageRgbaU8 resultImage = image_create_RgbaU8(newWidth, newHeight);
-		imageImpl_resizeToTarget(*resultImage, *source, interpolation == Sampler::Linear);
+		resizeToTarget<ImageRgbaU8>(resultImage, source, interpolation == Sampler::Linear);
 		return resultImage;
 	} else {
 		return OrderedImageRgbaU8(); // Null gives null
 	}
 }
 
-AlignedImageU8 dsr::filter_resize(const ImageU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
-	if (source.get() != nullptr) {
+AlignedImageU8 filter_resize(const ImageU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
+	if (image_exists(source)) {
 		AlignedImageU8 resultImage = image_create_U8(newWidth, newHeight);
-		imageImpl_resizeToTarget(*resultImage, *source, interpolation == Sampler::Linear);
+		resizeToTarget<ImageU8>(resultImage, source, interpolation == Sampler::Linear);
 		return resultImage;
 	} else {
 		return AlignedImageU8(); // Null gives null
 	}
 }
 
-void dsr::filter_blockMagnify(ImageRgbaU8 &target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
-	if (target.get() != nullptr && source.get() != nullptr) {
-		imageImpl_blockMagnify(*target, *source, pixelWidth, pixelHeight);
+void filter_blockMagnify(const ImageRgbaU8 &target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
+	if (image_exists(target) && image_exists(source)) {
+		imageImpl_blockMagnify(target, source, pixelWidth, pixelHeight);
 	}
 }
 
+}

+ 18 - 12
Source/DFPSR/api/filterAPI.h

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2020 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,11 +25,17 @@
 #ifndef DFPSR_API_FILTER
 #define DFPSR_API_FILTER
 
-#include "types.h"
+#include "../image/Image.h"
 #include <functional>
 
 namespace dsr {
 
+// Sampling modes
+	enum class Sampler {
+		Nearest, // Taking the nearest value to create square pixels.
+		Linear   // Taking a linear interpolation of the nearest pixels.
+	};
+
 // Image resizing
 	// Create a stretched version of the source image with the given dimensions and default RGBA pack order.
 	OrderedImageRgbaU8 filter_resize(const ImageRgbaU8 &source, Sampler interpolation, int32_t newWidth, int32_t newHeight);
@@ -39,21 +45,21 @@ namespace dsr {
 	//   If source is too small, transparent black pixels (0, 0, 0, 0) fills the outside.
 	//   If source is too large, partial pixels will be cropped away completely and replaced by the black border.
 	//   Letting the images have the same pack order and be aligned to 16-bytes will increase speed.
-	void filter_blockMagnify(ImageRgbaU8 &target, const ImageRgbaU8 &source, int pixelWidth, int pixelHeight);
+	void filter_blockMagnify(const ImageRgbaU8 &target, const ImageRgbaU8 &source, int pixelWidth, int pixelHeight);
 
 // Image generation and filtering
-//   Create new images from Lambda expressions.
-//   Useful for pre-generating images for reference implementations, fast prototyping and texture generation.
+//   Create images from Lambda expressions when speed is not critical.
+//     Capture images within [] and sample pixels from them using image_readPixel_border, image_readPixel_clamp and image_readPixel_tile.
 	// Lambda expressions for generating integer images.
-	using ImageGenRgbaU8 = std::function<ColorRgbaI32(int, int)>;
-	using ImageGenI32 = std::function<int32_t(int, int)>; // Used for U8 and U16 images using different saturations.
-	using ImageGenF32 = std::function<float(int, int)>;
+	using ImageGenRgbaU8 = std::function<ColorRgbaI32(int x, int y)>;
+	using ImageGenI32 = std::function<int32_t(int x, int y)>; // Used for U8 and U16 images using different saturations.
+	using ImageGenF32 = std::function<float(int x, int y)>;
 	// In-place image generation to an existing image.
 	//   The pixel at the upper left corner gets (startX, startY) as x and y arguments to the function.
-	void filter_mapRgbaU8(ImageRgbaU8 target, const ImageGenRgbaU8& lambda, int startX = 0, int startY = 0);
-	void filter_mapU8(ImageU8 target, const ImageGenI32& lambda, int startX = 0, int startY = 0);
-	void filter_mapU16(ImageU16 target, const ImageGenI32& lambda, int startX = 0, int startY = 0);
-	void filter_mapF32(ImageF32 target, const ImageGenF32& lambda, int startX = 0, int startY = 0);
+	void filter_mapRgbaU8(const ImageRgbaU8 target, const ImageGenRgbaU8& lambda, int startX = 0, int startY = 0);
+	void filter_mapU8(const ImageU8 target, const ImageGenI32& lambda, int startX = 0, int startY = 0);
+	void filter_mapU16(const ImageU16 target, const ImageGenI32& lambda, int startX = 0, int startY = 0);
+	void filter_mapF32(const ImageF32 target, const ImageGenF32& lambda, int startX = 0, int startY = 0);
 	// A simpler image generation that constructs the image as a result.
 	// Example:
 	//     int width = 64;

+ 0 - 4
Source/DFPSR/api/fontAPI.cpp

@@ -28,10 +28,6 @@
 
 namespace dsr {
 
-bool font_exists(const RasterFont font) {
-	return font.get() != nullptr;
-}
-
 static const RasterFont defaultFont = RasterFontImpl::createLatinOne(U"UbuntuMono", image_fromAscii(defaultFontAscii));
 
 RasterFont font_getDefault() {

+ 7 - 3
Source/DFPSR/api/fontAPI.h

@@ -24,10 +24,14 @@
 #ifndef DFPSR_API_FONT
 #define DFPSR_API_FONT
 
-#include "types.h"
-#include "../api/stringAPI.h"
+#include "../image/Image.h"
+#include "stringAPI.h"
 
 namespace dsr {
+	// A handle to a raster font
+	class RasterFontImpl;
+	using RasterFont = Handle<RasterFontImpl>;
+
 	// Get a handle to the default font
 	RasterFont font_getDefault();
 	// Create a new font mapped to the Latin-1 character sub-set using a fixed size grid of 16 x 16 sub-images
@@ -42,7 +46,7 @@ namespace dsr {
 	//     image_getHeight(atlas) >= 16
 	RasterFont font_createLatinOne(const String& name, const ImageU8& atlas);
 	// Post-condition: Returns true iff font exists
-	bool font_exists(const RasterFont font);
+	inline bool font_exists(const RasterFont font) { return font.isNotNull(); }
 	// Pre-condition: font must exist
 	// Post-condition: Returns font's name, as given on construction
 	String font_getName(const RasterFont font);

+ 21 - 21
Source/DFPSR/api/guiAPI.cpp

@@ -22,7 +22,7 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
 #include "guiAPI.h"
 #include "timeAPI.h"
@@ -32,31 +32,31 @@
 using namespace dsr;
 
 // To be implemented outside of the core framework
-std::shared_ptr<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
+Handle<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
 
-#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.get() == nullptr) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
+#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.isNull()) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
 
 Window dsr::window_create(const String& title, int32_t width, int32_t height) {
 	if (width < 1) { width = 1; }
 	if (height < 1) { height = 1; }
-	std::shared_ptr<dsr::BackendWindow> backend = createBackendWindow(title, width, height);
-	if (backend.get() != nullptr) {
-		return std::make_shared<DsrWindow>(backend);
+	Handle<dsr::BackendWindow> backend = createBackendWindow(title, width, height);
+	if (backend.isNotNull()) {
+		return handle_create<DsrWindow>(backend).setName("DSR Window");
 	} else {
-		return std::shared_ptr<DsrWindow>();
+		return Handle<DsrWindow>();
 	}
 }
 
 Window dsr::window_create_fullscreen(const String& title) {
-	return std::make_shared<DsrWindow>(createBackendWindow(title, 0, 0));
+	return handle_create<DsrWindow>(createBackendWindow(title, 0, 0)).setName("DSR Window");
 }
 
 bool dsr::window_exists(const Window& window) {
-	return window.get() != nullptr;
+	return window.isNotNull();
 }
 
 bool dsr::component_exists(const Component& component) {
-	return component.get() != nullptr;
+	return component.isNotNull();
 }
 
 void dsr::window_loadInterfaceFromString(const Window& window, const String& content, const ReadableString &fromPath) {
@@ -86,8 +86,8 @@ Component dsr::window_getRoot(const Window& window) {
 
 Component dsr::component_createWithInterfaceFromString(Component& parent, const String& content, const ReadableString &fromPath) {
 	MUST_EXIST(parent, component_createWithInterfaceFromString);
-	Component result = std::dynamic_pointer_cast<VisualComponent>(createPersistentClassFromText(content, fromPath));
-	if (result.get() == nullptr) {
+	Component result = handle_dynamicCast<VisualComponent>(createPersistentClassFromText(content, fromPath));
+	if (result.isNull()) {
 		throwError(U"component_createWithInterfaceFromString: The component could not be created!\n\nLayout:\n", content, "\n");
 	}
 	parent->addChildComponent(result);
@@ -115,7 +115,7 @@ Component dsr::component_findChildByNameAndIndex(const Component& parent, const
 Component dsr::window_findComponentByName(const Window& window, const ReadableString& name, bool mustExist) {
 	MUST_EXIST(window, window_findComponentByName);
 	Component result = window->findComponentByName(name);
-	if (mustExist && result.get() == nullptr) {
+	if (mustExist && result.isNull()) {
 		throwError(U"window_findComponentByName: No child component named ", name, " found!");
 	}
 	return result;
@@ -124,14 +124,14 @@ Component dsr::window_findComponentByName(const Window& window, const ReadableSt
 Component dsr::window_findComponentByNameAndIndex(const Window& window, const ReadableString& name, int index, bool mustExist) {
 	MUST_EXIST(window, window_findComponentByNameAndIndex);
 	Component result = window->findComponentByNameAndIndex(name, index);
-	if (mustExist && result.get() == nullptr) {
+	if (mustExist && result.isNull()) {
 		throwError(U"window_findComponentByName: No child component named ", name, " with index ", index, " found!");
 	}
 	return result;
 }
 
 int dsr::component_getChildCount(const Component& parent) {
-	if (parent.get()) {
+	if (parent.getUnsafe()) {
 		return parent->getChildCount();
 	} else {
 		return -1;
@@ -139,10 +139,10 @@ int dsr::component_getChildCount(const Component& parent) {
 }
 
 Component dsr::component_getChild(const Component& parent, int childIndex) {
-	if (parent.get()) {
-		return std::dynamic_pointer_cast<VisualComponent>(parent->getChild(childIndex));
+	if (parent.getUnsafe()) {
+		return handle_dynamicCast<VisualComponent>(parent->getChild(childIndex));
 	} else {
-		return std::shared_ptr<VisualComponent>(); // Null handle
+		return Handle<VisualComponent>(); // Null handle
 	}
 }
 
@@ -481,12 +481,12 @@ Component dsr::component_create(const Component& parent, const ReadableString& c
 	// Making sure that the default components exist before trying to create a component manually.
 	gui_initialize();
 	// Creating a component from the name
-	Component child = std::dynamic_pointer_cast<VisualComponent>(createPersistentClass(className));
-	if (child) {
+	Component child = handle_dynamicCast<VisualComponent>(createPersistentClass(className));
+	if (child.isNotNull()) {
 		child->setName(identifierName);
 		child->setIndex(index);
 		// Attaching to a parent is optional, but convenient to do in the same call.
-		if (parent) {
+		if (parent.isNotNull()) {
 			parent->addChildComponent(child);
 		}
 	}

+ 24 - 2
Source/DFPSR/api/guiAPI.h

@@ -25,13 +25,15 @@
 #ifndef DFPSR_API_GUI
 #define DFPSR_API_GUI
 
-#include "types.h"
+#include "../base/Handle.h"
 #include "../api/stringAPI.h"
+#include "../image/Image.h"
 #include "../gui/InputEvent.h"
+#include "../gui/VisualTheme.h"
 
 // createBackendWindow should be implemented outside of the core framework
 //   Choose one of the window backends in SDK/native to compile and link with your application.
-// std::shared_ptr<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
+// Handle<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
 
 // Constness on handles doesn't propagate to any inner types
 //   "const Comopnent&" only means that the writable Component handle can be created from a sub-expression
@@ -40,6 +42,26 @@
 
 namespace dsr {
 
+	enum class ReturnCode {
+		Good,
+		KeyNotFound,
+		ParsingFailure
+	};
+
+	// A handle to a window.
+	//  The Window wraps itself around native window backends to abstract away platform specific details.
+	//  It also makes it easy to load and use a graphical interface using the optional component system.
+	class DsrWindow;
+	using Window = Handle<DsrWindow>;
+
+	// A handle to a GUI component.
+	//   Components are an abstraction for graphical user interfaces, which might not always be powerful enough.
+	//   * If you're making something advanced that components cannot do,
+	//     you can also use draw calls and input events directly against the window without using Component.
+	class VisualComponent;
+	using Component = Handle<VisualComponent>;
+
+
 // Window Construction
 	// A portable window will be wrapped around a native window backend supplied from a call to createBackendWindow.
 	Window window_create(const dsr::String& title, int32_t width, int32_t height);

+ 185 - 462
Source/DFPSR/api/imageAPI.cpp

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2022 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,40 +22,68 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
 #include <limits>
 #include <cassert>
 #include "imageAPI.h"
 #include "drawAPI.h"
 #include "fileAPI.h"
-#include "../image/draw.h"
-#include "../image/internal/imageInternal.h"
 #include "../image/stbImage/stbImageWrapper.h"
 #include "../math/scalar.h"
-#include "../base/simd.h"
+#include "../settings.h"
 
-using namespace dsr;
+namespace dsr {
 
-// Constructors
-AlignedImageU8 dsr::image_create_U8(int32_t width, int32_t height) {
-	return AlignedImageU8(std::make_shared<ImageU8Impl>(width, height));
+static const int32_t maximumImageWidth = 65536;
+static const int32_t maximumImageHeight = 65536;
+
+template <typename IMAGE_TYPE>
+IMAGE_TYPE image_create_template(const char * name, int32_t width, int32_t height, PackOrderIndex packOrderIndex) {
+	if (width < 1 || width > maximumImageWidth || height < 1 || height > maximumImageHeight) {
+		sendWarning(U"");
+		// Return an empty image on failure.
+		return IMAGE_TYPE();
+	} else {
+		static const int32_t pixelSize = image_getPixelSize<IMAGE_TYPE>();
+		// Calculate the stride.
+		uintptr_t byteStride = memory_getPaddedSize(width * pixelSize, DSR_MAXIMUM_ALIGNMENT);
+		uint32_t pixelStride = byteStride / pixelSize;
+		// Create the image.
+		return IMAGE_TYPE(buffer_create(byteStride * height).setName(name), 0, width, height, pixelStride, packOrderIndex);
+	}
 }
-AlignedImageU16 dsr::image_create_U16(int32_t width, int32_t height) {
-	return AlignedImageU16(std::make_shared<ImageU16Impl>(width, height));
+
+// Take the dimensions as signed integers to avoid getting extreme dimensions on underflow.
+AlignedImageU8 image_create_U8(int32_t width, int32_t height) {
+	return image_create_template<AlignedImageU8>("U8 pixel buffer", width, height, PackOrderIndex::RGBA);
 }
-AlignedImageF32 dsr::image_create_F32(int32_t width, int32_t height) {
-	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height));
+
+AlignedImageU16 image_create_U16(int32_t width, int32_t height) {
+	return image_create_template<AlignedImageU16>("U16 pixel buffer", width, height, PackOrderIndex::RGBA);
 }
-OrderedImageRgbaU8 dsr::image_create_RgbaU8(int32_t width, int32_t height) {
-	return OrderedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height));
+
+AlignedImageF32 image_create_F32(int32_t width, int32_t height) {
+	return image_create_template<AlignedImageF32>("F32 pixel buffer", width, height, PackOrderIndex::RGBA);
 }
-AlignedImageRgbaU8 dsr::image_create_RgbaU8_native(int32_t width, int32_t height, PackOrderIndex packOrderIndex) {
-	return AlignedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height, packOrderIndex));
+
+OrderedImageRgbaU8 image_create_RgbaU8(int32_t width, int32_t height) {
+	return image_create_template<OrderedImageRgbaU8>("RgbaU8 pixel buffer", width, height, PackOrderIndex::RGBA);
+}
+
+AlignedImageRgbaU8 image_create_RgbaU8_native(int32_t width, int32_t height, PackOrderIndex packOrderIndex) {
+	return image_create_template<OrderedImageRgbaU8>("Native pixel buffer", width, height, packOrderIndex);
+}
+
+// Pre-condition: image exists.
+// Post-condition: Returns true if the stride is larger than the image's width.
+template <typename IMAGE_TYPE>
+inline bool imageIsPadded(const IMAGE_TYPE &image) {
+	return image_getWidth(image) * image_getPixelSize(image) < image_getStride(image);
 }
 
 // Loading from data pointer
-OrderedImageRgbaU8 dsr::image_decode_RgbaU8(const SafePointer<uint8_t> data, int size) {
+OrderedImageRgbaU8 image_decode_RgbaU8(SafePointer<const uint8_t> data, int size) {
 	if (data.isNotNull()) {
 		return image_stb_decode_RgbaU8(data, size);
 	} else {
@@ -63,11 +91,11 @@ OrderedImageRgbaU8 dsr::image_decode_RgbaU8(const SafePointer<uint8_t> data, int
 	}
 }
 // Loading from buffer
-OrderedImageRgbaU8 dsr::image_decode_RgbaU8(const Buffer& fileContent) {
+OrderedImageRgbaU8 image_decode_RgbaU8(const Buffer& fileContent) {
 	return image_decode_RgbaU8(buffer_getSafeData<uint8_t>(fileContent, "image file buffer"), buffer_getSize(fileContent));
 }
 // Loading from file
-OrderedImageRgbaU8 dsr::image_load_RgbaU8(const String& filename, bool mustExist) {
+OrderedImageRgbaU8 image_load_RgbaU8(const String& filename, bool mustExist) {
 	OrderedImageRgbaU8 result;
 	Buffer fileContent = file_loadBuffer(filename, mustExist);
 	if (buffer_exists(fileContent)) {
@@ -79,13 +107,7 @@ OrderedImageRgbaU8 dsr::image_load_RgbaU8(const String& filename, bool mustExist
 	return result;
 }
 
-// Pre-condition: image exists.
-// Post-condition: Returns true if the stride is larger than the image's width.
-static bool imageIsPadded(const ImageRgbaU8 &image) {
-	return image_getWidth(image) * 4 < image_getStride(image);
-}
-
-Buffer dsr::image_encode(const ImageRgbaU8 &image, ImageFileFormat format, int quality) {
+Buffer image_encode(const ImageRgbaU8 &image, ImageFileFormat format, int quality) {
 	if (image_exists(image)) {
 		ImageRgbaU8 orderedImage;
 		if (image_getPackOrderIndex(image) != PackOrderIndex::RGBA) {
@@ -125,7 +147,7 @@ static ImageFileFormat detectImageFileExtension(const String& filename) {
 	return result;
 }
 
-bool dsr::image_save(const ImageRgbaU8 &image, const String& filename, bool mustWork, int quality) {
+bool image_save(const ImageRgbaU8 &image, const String& filename, bool mustWork, int quality) {
 	ImageFileFormat extension = detectImageFileExtension(filename);
 	Buffer buffer;
 	if (extension == ImageFileFormat::Unknown) {
@@ -142,323 +164,140 @@ bool dsr::image_save(const ImageRgbaU8 &image, const String& filename, bool must
 	}
 }
 
-#define GET_OPTIONAL(SOURCE,DEFAULT) \
-	if (image) { \
-		return SOURCE; \
-	} else { \
-		return DEFAULT; \
-	}
-
-// Properties
-int32_t dsr::image_getWidth(const ImageU8& image)     { GET_OPTIONAL(image->width, 0); }
-int32_t dsr::image_getWidth(const ImageU16& image)    { GET_OPTIONAL(image->width, 0); }
-int32_t dsr::image_getWidth(const ImageF32& image)    { GET_OPTIONAL(image->width, 0); }
-int32_t dsr::image_getWidth(const ImageRgbaU8& image) { GET_OPTIONAL(image->width, 0); }
-
-int32_t dsr::image_getHeight(const ImageU8& image)     { GET_OPTIONAL(image->height, 0); }
-int32_t dsr::image_getHeight(const ImageU16& image)    { GET_OPTIONAL(image->height, 0); }
-int32_t dsr::image_getHeight(const ImageF32& image)    { GET_OPTIONAL(image->height, 0); }
-int32_t dsr::image_getHeight(const ImageRgbaU8& image) { GET_OPTIONAL(image->height, 0); }
-
-int32_t dsr::image_getStride(const ImageU8& image)     { GET_OPTIONAL(image->stride, 0); }
-int32_t dsr::image_getStride(const ImageU16& image)    { GET_OPTIONAL(image->stride, 0); }
-int32_t dsr::image_getStride(const ImageF32& image)    { GET_OPTIONAL(image->stride, 0); }
-int32_t dsr::image_getStride(const ImageRgbaU8& image) { GET_OPTIONAL(image->stride, 0); }
-
-IRect dsr::image_getBound(const ImageU8& image)     { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
-IRect dsr::image_getBound(const ImageU16& image)    { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
-IRect dsr::image_getBound(const ImageF32& image)    { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
-IRect dsr::image_getBound(const ImageRgbaU8& image) { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
-
-bool dsr::image_exists(const ImageU8& image)     { GET_OPTIONAL(true, false); }
-bool dsr::image_exists(const ImageU16& image)    { GET_OPTIONAL(true, false); }
-bool dsr::image_exists(const ImageF32& image)    { GET_OPTIONAL(true, false); }
-bool dsr::image_exists(const ImageRgbaU8& image) { GET_OPTIONAL(true, false); }
-
-int dsr::image_useCount(const ImageU8& image)     { return image.use_count(); }
-int dsr::image_useCount(const ImageU16& image)    { return image.use_count(); }
-int dsr::image_useCount(const ImageF32& image)    { return image.use_count(); }
-int dsr::image_useCount(const ImageRgbaU8& image) { return image.use_count(); }
-
-PackOrderIndex dsr::image_getPackOrderIndex(const ImageRgbaU8& image) {
-	GET_OPTIONAL(image->packOrder.packOrderIndex, PackOrderIndex::RGBA);
-}
-
-// Texture
-void dsr::image_makeIntoTexture(ImageRgbaU8& image) {
-	if (image) {
-		image->makeIntoTexture();
-	}
-}
-void dsr::image_generatePyramid(ImageRgbaU8& image) {
-	if (image) {
-		image->generatePyramid();
-	}
-}
-void dsr::image_removePyramid(ImageRgbaU8& image) {
-	if (image) {
-		image->removePyramid();
-	}
-}
-bool dsr::image_hasPyramid(const ImageRgbaU8& image) {
-	GET_OPTIONAL(image->texture.hasMipBuffer(), false);
-}
-bool dsr::image_isTexture(const ImageRgbaU8& image) {
-	GET_OPTIONAL(image->isTexture(), false);
-}
-
-// Pixel access
-#define INSIDE_XY (x >= 0 && x < image->width && y >= 0 && y < image->height)
-#define CLAMP_XY \
-	if (x < 0) { x = 0; } \
-	if (y < 0) { y = 0; } \
-	if (x >= image->width) { x = image->width - 1; } \
-	if (y >= image->height) { y = image->height - 1; }
-#define TILE_XY \
-	x = signedModulo(x, image->width); \
-	y = signedModulo(y, image->height);
-void dsr::image_writePixel(ImageU8& image, int32_t x, int32_t y, int32_t color) {
-	if (image) {
-		if (INSIDE_XY) {
-			if (color < 0) { color = 0; }
-			if (color > 255) { color = 255; }
-			ImageU8Impl::writePixel_unsafe(*image, x, y, color);
-		}
-	}
-}
-void dsr::image_writePixel(ImageU16& image, int32_t x, int32_t y, int32_t color) {
-	if (image) {
-		if (INSIDE_XY) {
-			if (color < 0) { color = 0; }
-			if (color > 65535) { color = 65535; }
-			ImageU16Impl::writePixel_unsafe(*image, x, y, color);
-		}
-	}
-}
-void dsr::image_writePixel(ImageF32& image, int32_t x, int32_t y, float color) {
-	if (image) {
-		if (INSIDE_XY) {
-			ImageF32Impl::writePixel_unsafe(*image, x, y, color);
-		}
-	}
-}
-void dsr::image_writePixel(ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& color) {
-	if (image) {
-		if (INSIDE_XY) {
-			ImageRgbaU8Impl::writePixel_unsafe(*image, x, y, image->packRgba(color.saturate()));
-		}
-	}
-}
-int32_t dsr::image_readPixel_border(const ImageU8& image, int32_t x, int32_t y, int32_t border) {
-	if (image) {
-		if (INSIDE_XY) {
-			return ImageU8Impl::readPixel_unsafe(*image, x, y);
-		} else {
-			return border;
-		}
-	} else {
-		return 0;
-	}
-}
-int32_t dsr::image_readPixel_border(const ImageU16& image, int32_t x, int32_t y, int32_t border) {
-	if (image) {
-		if (INSIDE_XY) {
-			return ImageU16Impl::readPixel_unsafe(*image, x, y);
-		} else {
-			return border;
-		}
-	} else {
-		return 0;
-	}
-}
-float dsr::image_readPixel_border(const ImageF32& image, int32_t x, int32_t y, float border) {
-	if (image) {
-		if (INSIDE_XY) {
-			return ImageF32Impl::readPixel_unsafe(*image, x, y);
-		} else {
-			return border;
-		}
-	} else {
-		return 0.0f;
-	}
-}
-ColorRgbaI32 dsr::image_readPixel_border(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& border) {
-	if (image) {
-		if (INSIDE_XY) {
-			return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
-		} else {
-			return border; // Can return unsaturated colors as error codes
-		}
-	} else {
-		return ColorRgbaI32();
-	}
-}
-uint8_t dsr::image_readPixel_clamp(const ImageU8& image, int32_t x, int32_t y) {
-	if (image) {
-		CLAMP_XY;
-		return ImageU8Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0;
-	}
-}
-uint16_t dsr::image_readPixel_clamp(const ImageU16& image, int32_t x, int32_t y) {
-	if (image) {
-		CLAMP_XY;
-		return ImageU16Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0;
-	}
-}
-float dsr::image_readPixel_clamp(const ImageF32& image, int32_t x, int32_t y) {
-	if (image) {
-		CLAMP_XY;
-		return ImageF32Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0.0f;
-	}
-}
-ColorRgbaI32 dsr::image_readPixel_clamp(const ImageRgbaU8& image, int32_t x, int32_t y) {
-	if (image) {
-		CLAMP_XY;
-		return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
-	} else {
-		return ColorRgbaI32();
-	}
-}
-uint8_t dsr::image_readPixel_tile(const ImageU8& image, int32_t x, int32_t y) {
-	if (image) {
-		TILE_XY;
-		return ImageU8Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0;
-	}
-}
-uint16_t dsr::image_readPixel_tile(const ImageU16& image, int32_t x, int32_t y) {
-	if (image) {
-		TILE_XY;
-		return ImageU16Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0;
-	}
-}
-float dsr::image_readPixel_tile(const ImageF32& image, int32_t x, int32_t y) {
-	if (image) {
-		TILE_XY;
-		return ImageF32Impl::readPixel_unsafe(*image, x, y);
-	} else {
-		return 0.0f;
-	}
-}
-ColorRgbaI32 dsr::image_readPixel_tile(const ImageRgbaU8& image, int32_t x, int32_t y) {
-	if (image) {
-		TILE_XY;
-		return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
-	} else {
-		return ColorRgbaI32();
-	}
-}
-
-void dsr::image_fill(ImageU8& image, int32_t color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+void image_fill(const ImageU8& image, int32_t color) {
+	if (image_exists(image)) {
+		draw_rectangle(image, image_getBound(image), color);
 	}
 }
-void dsr::image_fill(ImageU16& image, int32_t color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+void image_fill(const ImageU16& image, int32_t color) {
+	if (image_exists(image)) {
+		draw_rectangle(image, image_getBound(image), color);
 	}
 }
-void dsr::image_fill(ImageF32& image, float color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+void image_fill(const ImageF32& image, float color) {
+	if (image_exists(image)) {
+		draw_rectangle(image, image_getBound(image), color);
 	}
 }
-void dsr::image_fill(ImageRgbaU8& image, const ColorRgbaI32& color) {
-	if (image) {
-		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+void image_fill(const ImageRgbaU8& image, const ColorRgbaI32& color) {
+	if (image_exists(image)) {
+		draw_rectangle(image, image_getBound(image), color);
 	}
 }
 
-AlignedImageU8 dsr::image_clone(const ImageU8& image) {
-	if (image) {
-		AlignedImageU8 result = image_create_U8(image->width, image->height);
+AlignedImageU8 image_clone(const ImageU8& image) {
+	if (image_exists(image)) {
+		AlignedImageU8 result = image_create_U8(image_getWidth(image), image_getHeight(image));
 		draw_copy(result, image);
 		return result;
 	} else {
 		return AlignedImageU8(); // Null gives null
 	}
 }
-AlignedImageU16 dsr::image_clone(const ImageU16& image) {
-	if (image) {
-		AlignedImageU16 result = image_create_U16(image->width, image->height);
+AlignedImageU16 image_clone(const ImageU16& image) {
+	if (image_exists(image)) {
+		AlignedImageU16 result = image_create_U16(image_getWidth(image), image_getHeight(image));
 		draw_copy(result, image);
 		return result;
 	} else {
 		return AlignedImageU16(); // Null gives null
 	}
 }
-AlignedImageF32 dsr::image_clone(const ImageF32& image) {
-	if (image) {
-		AlignedImageF32 result = image_create_F32(image->width, image->height);
+AlignedImageF32 image_clone(const ImageF32& image) {
+	if (image_exists(image)) {
+		AlignedImageF32 result = image_create_F32(image_getWidth(image), image_getHeight(image));
 		draw_copy(result, image);
 		return result;
 	} else {
 		return AlignedImageF32(); // Null gives null
 	}
 }
-OrderedImageRgbaU8 dsr::image_clone(const ImageRgbaU8& image) {
-	if (image) {
-		OrderedImageRgbaU8 result = image_create_RgbaU8(image->width, image->height);
+OrderedImageRgbaU8 image_clone(const ImageRgbaU8& image) {
+	if (image_exists(image)) {
+		OrderedImageRgbaU8 result = image_create_RgbaU8(image_getWidth(image), image_getHeight(image));
 		draw_copy(result, image);
 		return result;
 	} else {
 		return OrderedImageRgbaU8(); // Null gives null
 	}
 }
-ImageRgbaU8 dsr::image_removePadding(const ImageRgbaU8& image) {
-	if (image) {
-		// TODO: Copy the implementation of getWithoutPadding, to create ImageRgbaU8 directly
-		return ImageRgbaU8(image->getWithoutPadding());
-	} else {
+
+ImageRgbaU8 image_removePadding(const ImageRgbaU8& image) {
+	if (!image_exists(image)) {
 		return ImageRgbaU8(); // Null gives null
+	} else if (imageIsPadded(image)) {
+		return image;
+	} else {
+		uint32_t targetStride = image_getWidth(image) * image_getPixelSize(image);
+		int32_t sourceStride = image_getStride(image);
+		Buffer newBuffer = buffer_create(targetStride * image_getHeight(image));
+		SafePointer<const uint8_t> sourceRow = image_getSafePointer<uint8_t>(image);
+		SafePointer<uint8_t> targetRow = buffer_getSafeData<uint8_t>(newBuffer, "RgbaU8 padding removal target");
+		for (int32_t y = 0; y < image_getHeight(image); y++) {
+			safeMemoryCopy(targetRow, sourceRow, targetStride);
+			sourceRow.increaseBytes(sourceStride);
+			targetRow.increaseBytes(targetStride);
+		}
+		return ImageRgbaU8(newBuffer, 0, image_getWidth(image), image_getHeight(image), targetStride * image_getPixelSize<ImageRgbaU8>(), image_getPackOrderIndex(image));
 	}
 }
 
-AlignedImageU8 dsr::image_get_red(const ImageRgbaU8& image) {
-	if (image) {
-		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
-		return AlignedImageU8(image->getChannel(image->packOrder.redIndex));
+static void extractChannel(SafePointer<uint8_t> targetData, int targetStride, SafePointer<const uint8_t> sourceData, int sourceStride, int sourceChannels, int channelIndex, int width, int height) {
+	SafePointer<const uint8_t> sourceRow = sourceData + channelIndex;
+	SafePointer<uint8_t> targetRow = targetData;
+	for (int y = 0; y < height; y++) {
+		SafePointer<const uint8_t> sourceElement = sourceRow;
+		SafePointer<uint8_t> targetElement = targetRow;
+		for (int x = 0; x < width; x++) {
+			*targetElement = *sourceElement; // Copy one channel from the soruce
+			sourceElement += sourceChannels; // Jump to the same channel in the next source pixel
+			targetElement += 1; // Jump to the next monochrome target pixel
+		}
+		sourceRow.increaseBytes(sourceStride);
+		targetRow.increaseBytes(targetStride);
+	}
+}
+
+static AlignedImageU8 getChannel(const ImageRgbaU8 image, int32_t channelIndex) {
+	// Warning for debug mode
+	static int channelCount = 4;
+	assert(0 <= channelIndex && channelIndex < channelCount);
+	AlignedImageU8 result = image_create_U8(image_getWidth(image), image_getHeight(image));
+	extractChannel(image_getSafePointer<uint8_t>(result), image_getStride(result), image_getSafePointer<uint8_t>(image), image_getStride(image), channelCount, channelIndex, image_getWidth(image), image_getHeight(image));
+	return result;
+}
+
+AlignedImageU8 image_get_red(const ImageRgbaU8& image) {
+	if (image_exists(image)) {
+		return getChannel(image, image_getPackOrder(image).redIndex);
 	} else {
-		return AlignedImageU8(); // Null gives null
+		return AlignedImageU8();
 	}
 }
-AlignedImageU8 dsr::image_get_green(const ImageRgbaU8& image) {
-	if (image) {
-		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
-		return AlignedImageU8(image->getChannel(image->packOrder.greenIndex));
+AlignedImageU8 image_get_green(const ImageRgbaU8& image) {
+	if (image_exists(image)) {
+		return getChannel(image, image_getPackOrder(image).greenIndex);
 	} else {
 		return AlignedImageU8(); // Null gives null
 	}
 }
-AlignedImageU8 dsr::image_get_blue(const ImageRgbaU8& image) {
-	if (image) {
-		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
-		return AlignedImageU8(image->getChannel(image->packOrder.blueIndex));
+AlignedImageU8 image_get_blue(const ImageRgbaU8& image) {
+	if (image_exists(image)) {
+		return getChannel(image, image_getPackOrder(image).blueIndex);
 	} else {
 		return AlignedImageU8(); // Null gives null
 	}
 }
-AlignedImageU8 dsr::image_get_alpha(const ImageRgbaU8& image) {
-	if (image) {
-		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
-		return AlignedImageU8(image->getChannel(image->packOrder.alphaIndex));
+AlignedImageU8 image_get_alpha(const ImageRgbaU8& image) {
+	if (image_exists(image)) {
+		return getChannel(image, image_getPackOrder(image).alphaIndex);
 	} else {
 		return AlignedImageU8(); // Null gives null
 	}
 }
 
 static inline int32_t readColor(const ImageU8& channel, int x, int y) {
-	return ImageU8Impl::readPixel_unsafe(*channel, x, y);
+	return image_accessPixel(channel, x, y);
 }
 static inline int32_t readColor(int32_t color, int x, int y) {
 	return color;
@@ -476,60 +315,59 @@ static OrderedImageRgbaU8 pack_template(int32_t width, int32_t height, R red, G
 }
 
 #define PACK1(FIRST) \
-if (FIRST) { \
-	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+if (image_exists(FIRST)) { \
+	return pack_template(image_getWidth(FIRST), image_getHeight(FIRST), red, green, blue, alpha); \
 } else { \
 	return OrderedImageRgbaU8(); \
 }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha) { PACK1(red); }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK1(green); }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK1(blue); }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK1(alpha); }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha) { PACK1(red); }
+OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK1(green); }
+OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK1(blue); }
+OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK1(alpha); }
 
 #define PACK2(FIRST,SECOND) \
-if (FIRST && SECOND) { \
-	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height) { \
+if (image_exists(FIRST) && image_exists(SECOND)) { \
+	if (image_getWidth(FIRST) != image_getWidth(SECOND) || image_getHeight(FIRST) != image_getHeight(SECOND)) { \
 		throwError("Cannot pack two channels of different size!\n"); \
 	} \
-	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+	return pack_template(image_getWidth(FIRST), image_getHeight(FIRST), red, green, blue, alpha); \
 } else { \
 	return OrderedImageRgbaU8(); \
 }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK2(red,green) }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK2(red,blue) }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK2(red,alpha) }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK2(green,blue) }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK2(green,alpha) }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK2(blue,alpha) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK2(red,green) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK2(red,blue) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK2(red,alpha) }
+OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK2(green,blue) }
+OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK2(green,alpha) }
+OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK2(blue,alpha) }
 
 #define PACK3(FIRST,SECOND,THIRD) \
-if (FIRST && SECOND && THIRD) { \
-	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height \
-	 || FIRST->width != THIRD->width || FIRST->height != THIRD->height) { \
+if (image_exists(FIRST) && image_exists(SECOND) && image_exists(THIRD)) { \
+	if (image_getWidth(FIRST) != image_getWidth(SECOND) || image_getHeight(FIRST) != image_getHeight(SECOND) \
+	 || image_getWidth(FIRST) != image_getWidth(THIRD) || image_getHeight(FIRST) != image_getHeight(THIRD)) { \
 		throwError("Cannot pack three channels of different size!\n"); \
 	} \
-	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+	return pack_template(image_getWidth(FIRST), image_getHeight(FIRST), red, green, blue, alpha); \
 } else { \
 	return OrderedImageRgbaU8(); \
 }
-OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK3(green, blue, alpha) }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK3(red, blue, alpha) }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK3(red, green, alpha) }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK3(red, green, blue) }
+OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK3(green, blue, alpha) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK3(red, blue, alpha) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK3(red, green, alpha) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK3(red, green, blue) }
 
-// TODO: Optimize using zip instructions
 #define PACK4(FIRST,SECOND,THIRD,FOURTH) \
-if (FIRST && SECOND && THIRD && FOURTH) { \
-	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height \
-	 || FIRST->width != THIRD->width || FIRST->height != THIRD->height \
- 	 || FIRST->width != FOURTH->width || FIRST->height != FOURTH->height) { \
+if (image_exists(FIRST) && image_exists(SECOND) && image_exists(THIRD) && image_exists(FOURTH)) { \
+	if (image_getWidth(FIRST) != image_getWidth(SECOND) || image_getHeight(FIRST) != image_getHeight(SECOND) \
+	 || image_getWidth(FIRST) != image_getWidth(THIRD) || image_getHeight(FIRST) != image_getHeight(THIRD) \
+ 	 || image_getWidth(FIRST) != image_getWidth(FOURTH) || image_getHeight(FIRST) != image_getHeight(FOURTH)) { \
 		throwError("Cannot pack four channels of different size!\n"); \
 	} \
-	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+	return pack_template(image_getWidth(FIRST), image_getHeight(FIRST), red, green, blue, alpha); \
 } else { \
 	return OrderedImageRgbaU8(); \
 }
-OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK4(red, green, blue, alpha) }
+OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK4(red, green, blue, alpha) }
 
 // Convert a grayscale image into an ascii image using the given alphabet.
 //   Since all 256 characters cannot be in the alphabet, the encoding is lossy.
@@ -540,7 +378,7 @@ OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, con
 //   width <= stride
 //   size of monochromeImage = height * stride
 // Example alphabet: " .,-_':;!+~=^?*abcdefghijklmnopqrstuvwxyz()[]{}|&@#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-String dsr::image_toAscii(const ImageU8& image, const String& alphabet) {
+String image_toAscii(const ImageU8& image, const String& alphabet) {
 	if (!image_exists(image)) {
 		return U"null";
 	}
@@ -574,13 +412,13 @@ String dsr::image_toAscii(const ImageU8& image, const String& alphabet) {
 	return result;
 }
 
-String dsr::image_toAscii(const ImageU8& image) {
+String image_toAscii(const ImageU8& image) {
 	return image_toAscii(image, U" .,-_':;!+~=^?*abcdefghijklmnopqrstuvwxyz()[]{}|&@#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
 }
 
 // Create a monochrome image from the ascii image in content.
 // String is used instead of ReadableString, so that the content can be decompressed from 8-bit strings in the binary.
-AlignedImageU8 dsr::image_fromAscii(const String& content) {
+AlignedImageU8 image_fromAscii(const String& content) {
 	char alphabet[128];
 	uint8_t alphabetMap[128];
 	char current;
@@ -671,59 +509,20 @@ AlignedImageU8 dsr::image_fromAscii(const String& content) {
 	return result;
 }
 
-// TODO: Try to recycle the memory to reduce overhead from heap allocating heads pointing to existing buffers
-template <typename IMAGE_TYPE, typename VALUE_TYPE>
-static inline IMAGE_TYPE subImage_template(const IMAGE_TYPE& image, const IRect& region) {
-	if (image) {
-		IRect cut = IRect::cut(imageInternal::getBound(*image), region);
-		if (cut.hasArea()) {
-			intptr_t newOffset = image->startOffset + (cut.left() * image->pixelSize) + (cut.top() * image->stride);
-			return IMAGE_TYPE(std::make_shared<VALUE_TYPE>(cut.width(), cut.height(), image->stride, image->buffer, newOffset));
-		}
-	}
-	return IMAGE_TYPE(); // Null if where are no overlapping pixels
-}
-
-template <typename IMAGE_TYPE, typename VALUE_TYPE>
-static inline IMAGE_TYPE subImage_template_withPackOrder(const IMAGE_TYPE& image, const IRect& region) {
-	if (image) {
-		IRect cut = IRect::cut(imageInternal::getBound(*image), region);
-		if (cut.hasArea()) {
-			intptr_t newOffset = image->startOffset + (cut.left() * image->pixelSize) + (cut.top() * image->stride);
-			return IMAGE_TYPE(std::make_shared<VALUE_TYPE>(cut.width(), cut.height(), image->stride, image->buffer, newOffset, image->packOrder));
-		}
-	}
-	return IMAGE_TYPE(); // Null if where are no overlapping pixels
-}
-
-ImageU8 dsr::image_getSubImage(const ImageU8& image, const IRect& region) {
-	return subImage_template<ImageU8, ImageU8Impl>(image, region);
-}
-
-ImageU16 dsr::image_getSubImage(const ImageU16& image, const IRect& region) {
-	return subImage_template<ImageU16, ImageU16Impl>(image, region);
-}
-
-ImageF32 dsr::image_getSubImage(const ImageF32& image, const IRect& region) {
-	return subImage_template<ImageF32, ImageF32Impl>(image, region);
-}
-
-ImageRgbaU8 dsr::image_getSubImage(const ImageRgbaU8& image, const IRect& region) {
-	return subImage_template_withPackOrder<ImageRgbaU8, ImageRgbaU8Impl>(image, region);
-}
-
 template <typename IMAGE_TYPE, int CHANNELS, typename ELEMENT_TYPE>
 ELEMENT_TYPE maxDifference_template(const IMAGE_TYPE& imageA, const IMAGE_TYPE& imageB) {
-	if (imageA.width != imageB.width || imageA.height != imageB.height) {
+	if (image_getWidth(imageA) != image_getWidth(imageB) || image_getHeight(imageA) != image_getHeight(imageB)) {
 		return std::numeric_limits<ELEMENT_TYPE>::max();
 	} else {
+		intptr_t strideA = image_getStride(imageA);
+		intptr_t strideB = image_getStride(imageB);
 		ELEMENT_TYPE maxDifference = 0;
-		const SafePointer<ELEMENT_TYPE> rowDataA = imageInternal::getSafeData<ELEMENT_TYPE>(imageA);
-		const SafePointer<ELEMENT_TYPE> rowDataB = imageInternal::getSafeData<ELEMENT_TYPE>(imageB);
-		for (int y = 0; y < imageA.height; y++) {
-			const SafePointer<ELEMENT_TYPE> pixelDataA = rowDataA;
-			const SafePointer<ELEMENT_TYPE> pixelDataB = rowDataB;
-			for (int x = 0; x < imageA.width; x++) {
+		SafePointer<const ELEMENT_TYPE> rowDataA = image_getSafePointer<ELEMENT_TYPE>(imageA);
+		SafePointer<const ELEMENT_TYPE> rowDataB = image_getSafePointer<ELEMENT_TYPE>(imageB);
+		for (int y = 0; y < image_getHeight(imageA); y++) {
+			SafePointer<const ELEMENT_TYPE> pixelDataA = rowDataA;
+			SafePointer<const ELEMENT_TYPE> pixelDataB = rowDataB;
+			for (int x = 0; x < image_getWidth(imageA); x++) {
 				for (int c = 0; c < CHANNELS; c++) {
 					ELEMENT_TYPE difference = absDiff(*pixelDataA, *pixelDataB);
 					if (difference > maxDifference) {
@@ -733,115 +532,39 @@ ELEMENT_TYPE maxDifference_template(const IMAGE_TYPE& imageA, const IMAGE_TYPE&
 					pixelDataB += 1;
 				}
 			}
-			rowDataA.increaseBytes(imageA.stride);
-			rowDataB.increaseBytes(imageB.stride);
+			rowDataA.increaseBytes(strideA);
+			rowDataB.increaseBytes(strideB);
 		}
 		return maxDifference;
 	}
 }
-uint8_t dsr::image_maxDifference(const ImageU8& imageA, const ImageU8& imageB) {
-	if (imageA && imageB) {
-		return maxDifference_template<ImageU8Impl, 1, uint8_t>(*imageA, *imageB);
+uint8_t image_maxDifference(const ImageU8& imageA, const ImageU8& imageB) {
+	if (image_exists(imageA) && image_exists(imageB)) {
+		return maxDifference_template<ImageU8, 1, uint8_t>(imageA, imageB);
 	} else {
 		return std::numeric_limits<uint8_t>::infinity();
 	}
 }
-uint16_t dsr::image_maxDifference(const ImageU16& imageA, const ImageU16& imageB) {
-	if (imageA && imageB) {
-		return maxDifference_template<ImageU16Impl, 1, uint16_t>(*imageA, *imageB);
+uint16_t image_maxDifference(const ImageU16& imageA, const ImageU16& imageB) {
+	if (image_exists(imageA) && image_exists(imageB)) {
+		return maxDifference_template<ImageU16, 1, uint16_t>(imageA, imageB);
 	} else {
 		return std::numeric_limits<uint16_t>::infinity();
 	}
 }
-float dsr::image_maxDifference(const ImageF32& imageA, const ImageF32& imageB) {
-	if (imageA && imageB) {
-		return maxDifference_template<ImageF32Impl, 1, float>(*imageA, *imageB);
+float image_maxDifference(const ImageF32& imageA, const ImageF32& imageB) {
+	if (image_exists(imageA) && image_exists(imageB)) {
+		return maxDifference_template<ImageF32, 1, float>(imageA, imageB);
 	} else {
 		return std::numeric_limits<float>::infinity();
 	}
 }
-uint8_t dsr::image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB) {
-	if (imageA && imageB) {
-		return maxDifference_template<ImageRgbaU8Impl, 4, uint8_t>(*imageA, *imageB);
+uint8_t image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB) {
+	if (image_exists(imageA) && image_exists(imageB)) {
+		return maxDifference_template<ImageRgbaU8, 4, uint8_t>(imageA, imageB);
 	} else {
 		return std::numeric_limits<uint8_t>::infinity();
 	}
 }
 
-SafePointer<uint8_t> dsr::image_getSafePointer(const ImageU8& image, int rowIndex) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(image.get(), rowIndex);
-	} else {
-		return SafePointer<uint8_t>();
-	}
-}
-SafePointer<uint16_t> dsr::image_getSafePointer(const ImageU16& image, int rowIndex) {
-	if (image) {
-		return imageInternal::getSafeData<uint16_t>(image.get(), rowIndex);
-	} else {
-		return SafePointer<uint16_t>();
-	}
-}
-SafePointer<float> dsr::image_getSafePointer(const ImageF32& image, int rowIndex) {
-	if (image) {
-		return imageInternal::getSafeData<float>(image.get(), rowIndex);
-	} else {
-		return SafePointer<float>();
-	}
-}
-SafePointer<uint32_t> dsr::image_getSafePointer(const ImageRgbaU8& image, int rowIndex) {
-	if (image) {
-		return imageInternal::getSafeData<uint32_t>(image.get(), rowIndex);
-	} else {
-		return SafePointer<uint32_t>();
-	}
-}
-SafePointer<uint8_t> dsr::image_getSafePointer_channels(const ImageRgbaU8& image, int rowIndex) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(image.get(), rowIndex);
-	} else {
-		return SafePointer<uint8_t>();
-	}
-}
-
-void dsr::image_dangerous_replaceDestructor(ImageU8& image, const std::function<void(uint8_t *)>& newDestructor) {
-	if (image) { return buffer_replaceDestructor(image->buffer, newDestructor); }
-}
-void dsr::image_dangerous_replaceDestructor(ImageU16& image, const std::function<void(uint8_t *)>& newDestructor) {
-	if (image) { return buffer_replaceDestructor(image->buffer, newDestructor); }
-}
-void dsr::image_dangerous_replaceDestructor(ImageF32& image, const std::function<void(uint8_t *)>& newDestructor) {
-	if (image) { return buffer_replaceDestructor(image->buffer, newDestructor); }
-}
-void dsr::image_dangerous_replaceDestructor(ImageRgbaU8& image, const std::function<void(uint8_t *)>& newDestructor) {
-	if (image) { return buffer_replaceDestructor(image->buffer, newDestructor); }
-}
-
-uint8_t* dsr::image_dangerous_getData(const ImageU8& image) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
-	} else {
-		return nullptr;
-	}
-}
-uint8_t* dsr::image_dangerous_getData(const ImageU16& image) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
-	} else {
-		return nullptr;
-	}
-}
-uint8_t* dsr::image_dangerous_getData(const ImageF32& image) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
-	} else {
-		return nullptr;
-	}
-}
-uint8_t* dsr::image_dangerous_getData(const ImageRgbaU8& image) {
-	if (image) {
-		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
-	} else {
-		return nullptr;
-	}
 }

+ 390 - 156
Source/DFPSR/api/imageAPI.h

@@ -1,7 +1,7 @@
 
 // zlib open source license
 //
-// Copyright (c) 2017 to 2022 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,17 +22,25 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
+// Everything stored directly in the image types is immutable to allow value types to behave like reference types using the data that they point to.
+// Image types can not be dynamically casted, because the inheritance is entirely static without any virtual functions.
+
 #ifndef DFPSR_API_IMAGE
 #define DFPSR_API_IMAGE
 
-#include "types.h"
-#include "../base/SafePointer.h"
+#include "../image/Image.h"
+#include "../image/Color.h"
+#include "../base/heap.h"
+#include "../math/scalar.h"
 
 namespace dsr {
 
 // Constructors
-	// Each row's start and stride is aligned to 16-bytes using padding at the end
-	//   This allow using in-place writing with aligned 16-byte SIMD vectors
+	// Pre-conditions:
+	//   1 <= width <= 65536
+	//   1 <= height <= 65536
+	// Post-condition:
+	//   Returns a new image of width x height pixels, or an empty image on failure.
 	AlignedImageU8 image_create_U8(int32_t width, int32_t height);
 	AlignedImageU16 image_create_U16(int32_t width, int32_t height);
 	AlignedImageF32 image_create_F32(int32_t width, int32_t height);
@@ -40,108 +48,294 @@ namespace dsr {
 	AlignedImageRgbaU8 image_create_RgbaU8_native(int32_t width, int32_t height, PackOrderIndex packOrderIndex);
 
 // Properties
-	// Returns image's width in pixels or 0 on null image
-	int32_t image_getWidth(const ImageU8& image);
-	int32_t image_getWidth(const ImageU16& image);
-	int32_t image_getWidth(const ImageF32& image);
-	int32_t image_getWidth(const ImageRgbaU8& image);
-	// Returns image's height in pixels or 0 on null image
-	int32_t image_getHeight(const ImageU8& image);
-	int32_t image_getHeight(const ImageU16& image);
-	int32_t image_getHeight(const ImageF32& image);
-	int32_t image_getHeight(const ImageRgbaU8& image);
-	// Returns image's stride in bytes or 0 on null image
-	//   Stride is the offset from the beginning of one row to another
-	//   May be larger than width times pixel size
-	//     * If padding is used to align with 16-bytes
-	//     * Or the buffer is shared with a larger image
-	int32_t image_getStride(const ImageU8& image);
-	int32_t image_getStride(const ImageU16& image);
-	int32_t image_getStride(const ImageF32& image);
-	int32_t image_getStride(const ImageRgbaU8& image);
-	// Get a rectangle from the image's dimensions with the top left corner set to (0, 0)
-	//   Useful for clipping to an image's bounds or subdividing space for a graphical user interface
-	IRect image_getBound(const ImageU8& image);
-	IRect image_getBound(const ImageU16& image);
-	IRect image_getBound(const ImageF32& image);
-	IRect image_getBound(const ImageRgbaU8& image);
-	// Returns false on null, true otherwise
-	bool image_exists(const ImageU8& image);
-	bool image_exists(const ImageU16& image);
-	bool image_exists(const ImageF32& image);
-	bool image_exists(const ImageRgbaU8& image);
-	// Returns the number of handles to the image
-	//   References to a handle doesn't count, only when a handle is stored by value
-	int image_useCount(const ImageU8& image);
-	int image_useCount(const ImageU16& image);
-	int image_useCount(const ImageF32& image);
-	int image_useCount(const ImageRgbaU8& image);
-	// Returns the image's pack order index
-	PackOrderIndex image_getPackOrderIndex(const ImageRgbaU8& image);
-
-// Texture
-	// Pre-condition: image must exist for something to happen
-	// Side-effect: If image is not a valid texture, it will be resized into a suitable power-of-two dimension.
-	// Applied automatically when calling image_generatePyramid for the first time on the image.
-	// Warning! May invalidate all SafePointers and raw pointers to the image's data.
-	void image_makeIntoTexture(ImageRgbaU8& image);
-	// Pre-condition: image must exist for something to happen
-	// Side-effects:
-	//  If the image does not have valid texture dimensions, it will be resized using image_makeIntoTexture before generating the pyramid.
-	//  Reallocates the image's buffer and uses the new memory to write smaller versions of the image.
-	// Afterwards, image_hasPyramid should return true for the image
-	// Warning! May invalidate all SafePointers and raw pointers to the image's data.
-	void image_generatePyramid(ImageRgbaU8& image);
-	// Pre-condition: image must exist
-	// Side-effect: Removes image's mip-map pyramid, including its buffer to save memory
-	// Afterwards, image_hasPyramid should return false for the image
-	void image_removePyramid(ImageRgbaU8& image);
-	// Post-condition: Returns true iff image contains a mip-map pyramid generated by image_generatePyramid
-	// Returns false without a warning if the image handle is empty
-	bool image_hasPyramid(const ImageRgbaU8& image);
-	// Post-condition:
-	//   Returns true iff image fulfills the criterias for being a texture
-	//   Returns false without a warning if the image handle is empty
-	// Texture criterias:
-	//  * Each dimension of width and height should be a power-of-two from 32 to 16384
-	//    width = 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384
-	//    height = 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192 or 16384
-	//    Large enough to be padding-free with 1024-bit memory alignment.
-	//    Small enough to allow expressing the total size in bytes using a signed 32-bit integer
-	//  * If it's a sub-image, it must also consume the whole with of the original image so that width times pixel size equals the stride
-	//    Textures may not contain padding in the rows, but it's okay to use sub-images from a vertical atlas where the whole width is consumed
-	bool image_isTexture(const ImageRgbaU8& image);
+	// Returns image's width in pixels, or 0 from an empty image
+	inline int32_t image_getWidth(const Image& image) { return image.impl_dimensions.getWidth(); }
+	// Returns image's height in pixels, or 0 from an empty image
+	inline int32_t image_getHeight(const Image& image) { return image.impl_dimensions.getHeight(); }
+
+	// Stride is the offset from the beginning of one row to another.
+	//   May be larger than the image's width to align with cache lines or share pixel data with a wider image.
+	// When you add a variable offset to a pointer in C++, the added offset is multiplied by the element size because the address is always stored in bytes.
+	//   Because all pixels have a power of two size, the multiplication can be optimized into a bit shift.
+	//   On ARM, adding whole elements to a pointer is just as fast as adding bytes, by shifting and adding in the same instruction.
+	//   On Intel/AMD, shifting and adding needs two instructions, so then it makes sense to pre-calculate the stride as bytes and cast to uint8_t* when adding.
+
+	// Returns image's stride in whole pixels, or 0 from an empty image
+	//   Used when incrementing indices instead of pointers.
+	inline int32_t image_getPixelStride(const Image& image) { return (intptr_t)image.impl_dimensions.getPixelStride(); }
+	// Returns image's stride in bytes, or 0 from an empty image.
+	inline int32_t image_getStride(const Image&       image) { return (image_getPixelStride(image) << (uintptr_t)image.impl_dimensions.getLog2PixelSize()); }
+	inline int32_t image_getStride(const ImageU8&     image) { return image_getPixelStride(image);      } // pixelStride * sizeof(uint8_t )
+	inline int32_t image_getStride(const ImageU16&    image) { return image_getPixelStride(image) << 1; } // pixelStride * sizeof(uint16_t)
+	inline int32_t image_getStride(const ImageF32&    image) { return image_getPixelStride(image) << 2; } // pixelStride * sizeof(float   )
+	inline int32_t image_getStride(const ImageRgbaU8& image) { return image_getPixelStride(image) << 2; } // pixelStride * sizeof(uint32_t)
+	// Returns image's offset from the allocation start in whole pixels, or 0 from an empty image
+	inline int64_t image_getPixelStartOffset(const Image& image) { return (int64_t)image.impl_dimensions.getPixelStartOffset(); }
+	// Returns image's offset from the allocation start in bytes, or 0 from an empty image
+	inline int64_t image_getStartOffset(const ImageU8&     image) { return image_getPixelStartOffset(image);      } // pixelStartOffset * sizeof(uint8_t )
+	inline int64_t image_getStartOffset(const ImageU16&    image) { return image_getPixelStartOffset(image) << 1; } // pixelStartOffset * sizeof(uint16_t)
+	inline int64_t image_getStartOffset(const ImageF32&    image) { return image_getPixelStartOffset(image) << 2; } // pixelStartOffset * sizeof(float   )
+	inline int64_t image_getStartOffset(const ImageRgbaU8& image) { return image_getPixelStartOffset(image) << 2; } // pixelStartOffset * sizeof(uint32_t)
+
+	// Get a rectangle from the image's dimensions with the top left corner set to (0, 0).
+	//   Useful for clipping to an image's bounds or subdividing space for a graphical user interface.
+	//   Returns IRect(0, 0, 0, 0) for empty images.
+	inline IRect image_getBound(const Image& image) { return IRect(0, 0, image.impl_dimensions.getWidth(), image.impl_dimensions.getHeight()); }
+
+	// Returns false on null, true otherwise.
+	inline bool image_exists(const Image& image) { return image.impl_buffer.isNotNull(); }
+
+	// TODO: Rename into image_getUseCount for easier use.
+	// Returns the number of handles to the image.
+	//   References to a handle doesn't count, only when a handle is stored by value.
+	inline uintptr_t image_useCount(const Image& image) { return image.impl_buffer.getUseCount(); }
+
+	// Returns the image's pack order index.
+	inline PackOrderIndex image_getPackOrderIndex(const ImageRgbaU8& image) { return image.impl_dimensions.getPackOrderIndex(); }
+	// Returns the image's pack order, containing bit masks and offsets needed to pack and unpack colors.
+	inline PackOrder image_getPackOrder(const ImageRgbaU8& image) { return PackOrder::getPackOrder(image.impl_dimensions.getPackOrderIndex()); };
+
+	// Returns true iff the pixel at (x, y) is inside of image.
+	inline bool image_isPixelInside(const Image& image, int32_t x, int32_t y) {
+		return x >= 0 && x < image_getWidth(image) && y >= 0 && y < image_getHeight(image);
+	}
+
+	// Returns the size of one pixel in bytes dynamically by looking it up.
+	inline int32_t image_getPixelSize(const Image& image) { return image.impl_dimensions.getPixelSize(); }
+	// Returns the size of one pixel in bytes statically from the type.
+	template <typename T> int32_t image_getPixelSize() { return T::impl_pixelSize; }
+
+// Channel packing
+	// Extract one channel
+	AlignedImageU8 image_get_red  (const ImageRgbaU8& image);
+	AlignedImageU8 image_get_green(const ImageRgbaU8& image);
+	AlignedImageU8 image_get_blue (const ImageRgbaU8& image);
+	AlignedImageU8 image_get_alpha(const ImageRgbaU8& image);
+
+	// Pack one channel
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha);
+	// Pack two channels
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
+	// Pack three channels
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
+	// Pack four channels
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
+
+	// Pack a color to draw with using the image's pack order, as it would be represented as a pixel in the buffer.
+	inline uint32_t image_pack(const ImageRgbaU8& image, uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) {
+		return PackOrder::getPackOrder(image.impl_dimensions.getPackOrderIndex()).packRgba(red, green, blue, alpha);
+	}
+	// Saturate and pack a color for an image's pack order, as it would be represented as a pixel in the buffer.
+	inline uint32_t image_saturateAndPack(const ImageRgbaU8& image, const ColorRgbaI32& color) {
+		return PackOrder::getPackOrder(image.impl_dimensions.getPackOrderIndex()).saturateAndPackRgba(color);
+	}
+	// Truncate and pack a color for an image's pack order, as it would be represented as a pixel in the buffer.
+	inline uint32_t image_truncateAndPack(const ImageRgbaU8& image, const ColorRgbaI32& color) {
+		return PackOrder::getPackOrder(image.impl_dimensions.getPackOrderIndex()).truncateAndPackRgba(color);
+	}
+	// Unpack a color back into an expanded and ordered RGBA format.
+	//  packedColor is expressed in image's pack order.
+	inline ColorRgbaI32 image_unpack(const ImageRgbaU8& image, uint32_t packedColor) {
+		return PackOrder::getPackOrder(image.impl_dimensions.getPackOrderIndex()).unpackRgba(packedColor);
+	}
 
 // Pixel access
+	// Pre-condition:
+	//   The pixel at (x, y) must exist within the image, or else the program may crash.
+	//   image_isPixelInsize(image, x, y)
+	// Post-condition:
+	//   Returns a reference to the pixel at (x, y) in image.
+	inline uint8_t &image_accessPixel(const ImageU8& image, int32_t x, int32_t y) {
+		uintptr_t pixelOffset = image_getPixelStartOffset(image) + y * image_getPixelStride(image) + x;
+		return *(buffer_getSafeData<uint8_t>(image.impl_buffer, "ImageU8 pixel access buffer") + pixelOffset);
+	}
+	inline uint16_t &image_accessPixel(const ImageU16& image, int32_t x, int32_t y) {
+		uintptr_t pixelOffset = image_getPixelStartOffset(image) + y * image_getPixelStride(image) + x;
+		return *(buffer_getSafeData<uint16_t>(image.impl_buffer, "ImageU16 pixel access buffer") + pixelOffset);
+	}
+	inline float &image_accessPixel(const ImageF32& image, int32_t x, int32_t y) {
+		uintptr_t pixelOffset = image_getPixelStartOffset(image) + y * image_getPixelStride(image) + x;
+		return *(buffer_getSafeData<float>(image.impl_buffer, "ImageF32 pixel access buffer") + pixelOffset);
+	}
+	inline uint32_t &image_accessPixel(const ImageRgbaU8& image, int32_t x, int32_t y) {
+		uintptr_t pixelOffset = image_getPixelStartOffset(image) + y * image_getPixelStride(image) + x;
+		return *(buffer_getSafeData<uint32_t>(image.impl_buffer, "ImageRgbaU8 pixel access buffer") + pixelOffset);
+	}
+
 	// Write a pixel to an image.
 	//   Out of bound is ignored silently without writing.
 	//   Empty images will be ignored safely.
 	//   Packed is faster if the color can be packed in advance for multiple pixels or comes directly from an image of the same rgba order.
-	void image_writePixel(ImageU8& image, int32_t x, int32_t y, int32_t color); // Saturated to 0..255
-	void image_writePixel(ImageU16& image, int32_t x, int32_t y, int32_t color); // Saturated to 0..65535
-	void image_writePixel(ImageF32& image, int32_t x, int32_t y, float color);
-	void image_writePixel(ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& color); // Saturated to 0..255
-	// Read a pixel from an image.
+	// Saturated to 0..255
+	inline void image_writePixel(const ImageU8& image, int32_t x, int32_t y, int32_t color) {
+		if (image_isPixelInside(image, x, y)) image_accessPixel(image, x, y) = clamp(0, color, 255);
+	}
+	// Saturated to 0..65535
+	inline void image_writePixel(const ImageU16& image, int32_t x, int32_t y, int32_t color) {
+		if (image_isPixelInside(image, x, y)) image_accessPixel(image, x, y) = clamp(0, color, 65535);
+	}
+	// No saturation needed
+	inline void image_writePixel(const ImageF32& image, int32_t x, int32_t y, float color) {
+		if (image_isPixelInside(image, x, y)) image_accessPixel(image, x, y) = color;
+	}
+	// Saturated to 0..255 in all channels
+	inline void image_writePixel(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& color) {
+		if (image_isPixelInside(image, x, y)) image_accessPixel(image, x, y) = image_saturateAndPack(image, color);
+	}
+	// Pre-packed color using image_saturateAndPack to create the pixel in advance.
+	inline void image_writePixel(const ImageRgbaU8& image, int32_t x, int32_t y, uint32_t packedColor) {
+		if (image_isPixelInside(image, x, y)) image_accessPixel(image, x, y) = packedColor;
+	}
+	// Read a pixel from an image with a solid border outside.
 	//   Out of bound will return the border color.
+	//   The border color does not have to be constrained to the limits of pixel storage.
 	//   Empty images will return zero.
-	int32_t image_readPixel_border(const ImageU8& image, int32_t x, int32_t y, int32_t border = 0); // Can have negative value as border
-	int32_t image_readPixel_border(const ImageU16& image, int32_t x, int32_t y, int32_t border = 0); // Can have negative value as border
-	float image_readPixel_border(const ImageF32& image, int32_t x, int32_t y, float border = 0.0f);
-	ColorRgbaI32 image_readPixel_border(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& border = ColorRgbaI32()); // Can have negative value as border
-	// Read a pixel from an image.
+	inline int32_t image_readPixel_border(const ImageU8& image, int32_t x, int32_t y, int32_t border = 0) {
+		if (!image_exists(image)) {
+			return 0;
+		} else if (image_isPixelInside(image, x, y)) {
+			return image_accessPixel(image, x, y);
+		} else {
+			return border;
+		}
+	}
+	// The packed version is identical to the unpacked version, so we make a wrapper for template functions to call.
+	//inline int32_t image_readPixel_border_packed(const ImageU8& image, int32_t x, int32_t y, int32_t border = 0) { return image_readPixel_border(image, x, y, border); }
+	inline int32_t image_readPixel_border(const ImageU16& image, int32_t x, int32_t y, int32_t border = 0) {
+		if (!image_exists(image)) {
+			return 0;
+		} else if (image_isPixelInside(image, x, y)) {
+			return image_accessPixel(image, x, y);
+		} else {
+			return border;
+		}
+	}
+	//inline int32_t image_readPixel_border_packed(const ImageU16& image, int32_t x, int32_t y, int32_t border = 0) { return image_readPixel_border(image, x, y, border); }
+	inline float image_readPixel_border(const ImageF32& image, int32_t x, int32_t y, float border = 0.0f) {
+		if (!image_exists(image)) {
+			return 0.0f;
+		} else if (image_isPixelInside(image, x, y)) {
+			return image_accessPixel(image, x, y);
+		} else {
+			return border;
+		}
+	}
+	//inline float image_readPixel_border_packed(const ImageF32& image, int32_t x, int32_t y, int32_t border = 0) { return image_readPixel_border(image, x, y, border); }
+	inline ColorRgbaI32 image_readPixel_border(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& border = ColorRgbaI32()) {
+		if (!image_exists(image)) {
+			return ColorRgbaI32(0, 0, 0, 0);
+		} else if (image_isPixelInside(image, x, y)) {
+			return image_unpack(image, image_accessPixel(image, x, y));
+		} else {
+			return border;
+		}
+	}
+	// Read the color directly as it is packed in image's pack order.
+	inline uint32_t image_readPixel_border_packed(const ImageRgbaU8& image, int32_t x, int32_t y, uint32_t border = 0) {
+		if (!image_exists(image)) {
+			return 0;
+		} else if (image_isPixelInside(image, x, y)) {
+			return image_accessPixel(image, x, y);
+		} else {
+			return border;
+		}
+	}
+	// Read a pixel from an image stretched edges.
 	//   Out of bound will return the closest pixel.
 	//   Empty images will return zero.
-	uint8_t image_readPixel_clamp(const ImageU8& image, int32_t x, int32_t y);
-	uint16_t image_readPixel_clamp(const ImageU16& image, int32_t x, int32_t y);
-	float image_readPixel_clamp(const ImageF32& image, int32_t x, int32_t y);
-	ColorRgbaI32 image_readPixel_clamp(const ImageRgbaU8& image, int32_t x, int32_t y);
-	// Read a pixel from an image.
+	inline uint8_t image_readPixel_clamp(const ImageU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, clamp(0, x, image_getWidth(image) - 1), clamp(0, y, image_getHeight(image) - 1));
+		} else {
+			return 0;
+		}
+	}
+	//inline uint8_t image_readPixel_clamp_packed(const ImageU8& image, int32_t x, int32_t y) { return image_readPixel_clamp(image, x, y); }
+	inline uint16_t image_readPixel_clamp(const ImageU16& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, clamp(0, x, image_getWidth(image) - 1), clamp(0, y, image_getHeight(image) - 1));
+		} else {
+			return 0;
+		}
+	}
+	//inline uint16_t image_readPixel_clamp_packed(const ImageU16& image, int32_t x, int32_t y) { return image_readPixel_clamp(image, x, y); }
+	inline float image_readPixel_clamp(const ImageF32& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, clamp(0, x, image_getWidth(image) - 1), clamp(0, y, image_getHeight(image) - 1));
+		} else {
+			return 0.0f;
+		}
+	}
+	//inline float image_readPixel_clamp_packed(const ImageF32& image, int32_t x, int32_t y) { return image_readPixel_clamp(image, x, y); }
+	inline ColorRgbaI32 image_readPixel_clamp(const ImageRgbaU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_unpack(image, image_accessPixel(image, clamp(0, x, image_getWidth(image) - 1), clamp(0, y, image_getHeight(image) - 1)));
+		} else {
+			return ColorRgbaI32();
+		}
+	}
+	// Read the color directly as it is packed in image's pack order.
+	inline uint32_t image_readPixel_clamp_packed(const ImageRgbaU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, clamp(0, x, image_getWidth(image) - 1), clamp(0, y, image_getHeight(image) - 1));
+		} else {
+			return 0;
+		}
+	}
+
+	// Read a pixel from an image with tiling.
 	//   Out of bound will take the coordinates in modulo of the size.
 	//   Empty images will return zero.
-	uint8_t image_readPixel_tile(const ImageU8& image, int32_t x, int32_t y);
-	uint16_t image_readPixel_tile(const ImageU16& image, int32_t x, int32_t y);
-	float image_readPixel_tile(const ImageF32& image, int32_t x, int32_t y);
-	ColorRgbaI32 image_readPixel_tile(const ImageRgbaU8& image, int32_t x, int32_t y);
+	inline uint8_t image_readPixel_tile(const ImageU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, signedModulo(x, image_getWidth(image)), signedModulo(y, image_getHeight(image)));
+		} else {
+			return 0;
+		}
+	}
+	//inline uint8_t image_readPixel_tile_packed(const ImageU8& image, int32_t x, int32_t y) { return image_readPixel_tile(image, x, y); }
+	inline uint16_t image_readPixel_tile(const ImageU16& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, signedModulo(x, image_getWidth(image)), signedModulo(y, image_getHeight(image)));
+		} else {
+			return 0;
+		}
+	}
+	//inline uint16_t image_readPixel_tile_packed(const ImageU16& image, int32_t x, int32_t y) { return image_readPixel_tile(image, x, y); }
+	inline float image_readPixel_tile(const ImageF32& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, signedModulo(x, image_getWidth(image)), signedModulo(y, image_getHeight(image)));
+		} else {
+			return 0.0f;
+		}
+	}
+	//inline float image_readPixel_tile_packed(const ImageF32& image, int32_t x, int32_t y) { return image_readPixel_tile(image, x, y); }
+	inline ColorRgbaI32 image_readPixel_tile(const ImageRgbaU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_unpack(image, image_accessPixel(image, signedModulo(x, image_getWidth(image)), signedModulo(y, image_getHeight(image))));
+		} else {
+			return ColorRgbaI32();
+		}
+	}
+	// Read the color directly as it is packed in image's pack order.
+	inline uint32_t image_readPixel_tile_packed(const ImageRgbaU8& image, int32_t x, int32_t y) {
+		if (image_exists(image)) {
+			return image_accessPixel(image, signedModulo(x, image_getWidth(image)), signedModulo(y, image_getHeight(image)));
+		} else {
+			return 0;
+		}
+	}
 
 // Loading
 	// Load an image from a file by giving the filename including folder path and extension.
@@ -155,7 +349,7 @@ namespace dsr {
 	// A faster and more flexible way to load compressed images from memory.
 	// If you just want to point directly to a memory location to avoid allocating many small buffers, you can use a safe pointer and a size in bytes.
 	// Failure will return an empty handle.
-	OrderedImageRgbaU8 image_decode_RgbaU8(const SafePointer<uint8_t> data, int size);
+	OrderedImageRgbaU8 image_decode_RgbaU8(SafePointer<const uint8_t> data, int size);
 
 // Saving
 	// Save the image to the path specified by filename and return true iff the operation was successful.
@@ -176,10 +370,10 @@ namespace dsr {
 	Buffer image_encode(const ImageRgbaU8 &image, ImageFileFormat format, int quality = 90);
 
 // Fill all pixels with a uniform color
-	void image_fill(ImageU8& image, int32_t color);
-	void image_fill(ImageU16& image, int32_t color);
-	void image_fill(ImageF32& image, float color);
-	void image_fill(ImageRgbaU8& image, const ColorRgbaI32& color);
+	void image_fill(const ImageU8& image, int32_t color);
+	void image_fill(const ImageU16& image, int32_t color);
+	void image_fill(const ImageF32& image, float color);
+	void image_fill(const ImageRgbaU8& image, const ColorRgbaI32& color);
 
 // Clone
 	// Get a deep clone of an image's content while discarding any pack order, padding and texture pyramids.
@@ -193,32 +387,6 @@ namespace dsr {
 	// Used when external image libraries don't allow giving stride as a separate argument.
 	ImageRgbaU8 image_removePadding(const ImageRgbaU8& image);
 
-// Channel packing
-	// Extract one channel
-	AlignedImageU8 image_get_red(const ImageRgbaU8& image);
-	AlignedImageU8 image_get_green(const ImageRgbaU8& image);
-	AlignedImageU8 image_get_blue(const ImageRgbaU8& image);
-	AlignedImageU8 image_get_alpha(const ImageRgbaU8& image);
-	// Pack one channel
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha);
-	// Pack two channels
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
-	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
-	// Pack three channels
-	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
-	// Pack four channels
-	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
-
 // Ascii images
 	String image_toAscii(const ImageU8& image, const String &alphabet);
 	String image_toAscii(const ImageU8& image);
@@ -227,49 +395,115 @@ namespace dsr {
 // Comparisons
 	// Get the maximum pixelwise difference between two images of the same format, or the highest possible value on failure
 	//   Useful for regression tests
-	uint8_t image_maxDifference(const ImageU8& imageA, const ImageU8& imageB);
-	uint16_t image_maxDifference(const ImageU16& imageA, const ImageU16& imageB);
-	float image_maxDifference(const ImageF32& imageA, const ImageF32& imageB);
-	uint8_t image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB);
+	uint8_t  image_maxDifference(const ImageU8&     imageA, const ImageU8&     imageB);
+	uint16_t image_maxDifference(const ImageU16&    imageA, const ImageU16&    imageB);
+	float    image_maxDifference(const ImageF32&    imageA, const ImageF32&    imageB);
+	uint8_t  image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB);
 
-// Sub-images are viewports to another image's data
+// TODO: Create sub-image constructors in the image types.
+
+// Sub-images are read/write views to a smaller region of the same pixel data.
 	// Get a sub-image sharing buffer and side-effects with the parent image
 	// Returns the overlapping region if out of bound
 	// Returns a null image if there are no overlapping pixels to return
-	ImageU8 image_getSubImage(const ImageU8& image, const IRect& region);
-	ImageU16 image_getSubImage(const ImageU16& image, const IRect& region);
-	ImageF32 image_getSubImage(const ImageF32& image, const IRect& region);
-	ImageRgbaU8 image_getSubImage(const ImageRgbaU8& image, const IRect& region);
+	inline ImageU8 image_getSubImage(const ImageU8& image, const IRect& region) {
+		static_assert(sizeof(ImageU8) == sizeof(Image));
+		return ImageU8(image, region);
+	}
+	inline ImageU16 image_getSubImage(const ImageU16& image, const IRect& region) {
+		static_assert(sizeof(ImageU16) == sizeof(Image));
+		return ImageU16(image, region);
+	}
+	inline ImageF32 image_getSubImage(const ImageF32& image, const IRect& region) {
+		static_assert(sizeof(ImageF32) == sizeof(Image));
+		return ImageF32(image, region);
+	}
+	inline ImageRgbaU8 image_getSubImage(const ImageRgbaU8& image, const IRect& region) {
+		static_assert(sizeof(ImageRgbaU8) == sizeof(Image));
+		return ImageRgbaU8(image, region);
+	}
+	// Check dynamically if the image was created as a sub-image.
+	// Returns true if the image is a sub-image, created using image_getSubImage.
+	// Returns false if the image is not a sub-image, created using image_create or default constructed as an empty image.
+	inline bool image_isSubImage(const Image& image) {
+		return image.impl_dimensions.isSubImage();
+	}
 
 // Bound-checked pointer access (relatively safe compared to a raw pointer)
-	// Returns a bound-checked pointer to the first byte at rowIndex
-	// Bound-checked safe-pointers are equally fast as raw pointers in release mode
-	// Warning! Bound-checked pointers are not reference counted, because that would be too slow for real-time graphics
-	SafePointer<uint8_t> image_getSafePointer(const ImageU8& image, int rowIndex = 0);
-	SafePointer<uint16_t> image_getSafePointer(const ImageU16& image, int rowIndex = 0);
-	SafePointer<float> image_getSafePointer(const ImageF32& image, int rowIndex = 0);
-	SafePointer<uint32_t> image_getSafePointer(const ImageRgbaU8& image, int rowIndex = 0);
-	// Get a pointer iterating over individual channels instead of whole pixels
-	SafePointer<uint8_t> image_getSafePointer_channels(const ImageRgbaU8& image, int rowIndex = 0);
+	// Returns a bound-checked pointer to the first pixel.
+	template <typename T = uint8_t>
+	inline SafePointer<uint8_t> image_getSafePointer(const ImageU8& image) {
+		return image.impl_buffer.getSafe<uint8_t>("Pointer to ImageU8 pixels").increaseBytes(image_getStartOffset(image));
+	}
+	// Returns a bound-checked pointer to the first pixel at rowIndex.
+	template <typename T = uint8_t>
+	inline SafePointer<uint8_t> image_getSafePointer(const ImageU8& image, int32_t rowIndex) {
+		return image_getSafePointer(image).increaseBytes(image_getStride(image) * rowIndex);
+	}
+	// Returns a bound-checked pointer to the first pixel.
+	template <typename T = uint16_t>
+	inline SafePointer<T> image_getSafePointer(const ImageU16& image) {
+		return image.impl_buffer.getSafe<T>("Pointer to ImageU16 pixels").increaseBytes(image_getStartOffset(image));
+	}
+	// Returns a bound-checked pointer to the first pixel at rowIndex.
+	template <typename T = uint16_t>
+	inline SafePointer<T> image_getSafePointer(const ImageU16& image, int32_t rowIndex) {
+		return image_getSafePointer<T>(image).increaseBytes(image_getStride(image) * rowIndex);
+	}
+	// Returns a bound-checked pointer to the first pixel.
+	template <typename T = float>
+	inline SafePointer<T> image_getSafePointer(const ImageF32& image) {
+		return image.impl_buffer.getSafe<T>("Pointer to ImageF32 pixels").increaseBytes(image_getStartOffset(image));
+	}
+	// Returns a bound-checked pointer to the first pixel at rowIndex.
+	template <typename T = float>
+	inline SafePointer<T> image_getSafePointer(const ImageF32& image, int32_t rowIndex) {
+		return image_getSafePointer<T>(image).increaseBytes(image_getStride(image) * rowIndex);
+	}
+	// Returns a bound-checked pointer to the first pixel.
+	template <typename T = uint32_t>
+	inline SafePointer<T> image_getSafePointer(const ImageRgbaU8& image) {
+		return image.impl_buffer.getSafe<T>("Pointer to ImageRgbaU8 pixels").increaseBytes(image_getStartOffset(image));
+	}
+	// Returns a bound-checked pointer to the first pixel at rowIndex.
+	template <typename T = uint32_t>
+	inline SafePointer<T> image_getSafePointer(const ImageRgbaU8& image, int32_t rowIndex) {
+		return image_getSafePointer<T>(image).increaseBytes(image_getStride(image) * rowIndex);
+	}
+	// Returns a bound-checked pointer to the first channel in the first pixel.
+	inline SafePointer<uint8_t> image_getSafePointer_channels(const ImageRgbaU8& image) {
+		return image.impl_buffer.getSafe<uint8_t>("Pointer to ImageRgbaU8 channels").increaseBytes(image_getStartOffset(image));
+	}
+	// Returns a bound-checked pointer to the first channel in the first pixel at rowIndex.
+	inline SafePointer<uint8_t> image_getSafePointer_channels(const ImageRgbaU8& image, int32_t rowIndex) {
+		return image.impl_buffer.getSafe<uint8_t>("Pointer to ImageRgbaU8 channels").increaseBytes(image_getStartOffset(image)).increaseBytes(image_getStride(image) * rowIndex);
+	}
 
 // The dangerous image API
 // Use of these methods can be spotted using a search for "_dangerous_" in your code
-	// Replaces the destructor in image's buffer.
-	//   newDestructor is responsible for freeing the given data.
-	//   Use when the buffer's pointer is being sent to a function that promises to free the memory
-	//   For example: Creating buffers being wrapped as XLib images
-	void image_dangerous_replaceDestructor(ImageU8& image, const std::function<void(uint8_t *)>& newDestructor);
-	void image_dangerous_replaceDestructor(ImageU16& image, const std::function<void(uint8_t *)>& newDestructor);
-	void image_dangerous_replaceDestructor(ImageF32& image, const std::function<void(uint8_t *)>& newDestructor);
-	void image_dangerous_replaceDestructor(ImageRgbaU8& image, const std::function<void(uint8_t *)>& newDestructor);
+	// Replaces the destructor in image's buffer, which.
+	//   newDestructor should not free the given data, only invoke destruction of any external resources that may depend on it before the data is freed automatically.
+	inline void image_dangerous_replaceDestructor(ImageU8& image, const HeapDestructor &newDestructor) {
+		if (image_exists(image)) { return buffer_replaceDestructor(image.impl_buffer, newDestructor); }
+	}
+	inline void image_dangerous_replaceDestructor(ImageU16& image, const HeapDestructor &newDestructor) {
+		if (image_exists(image)) { return buffer_replaceDestructor(image.impl_buffer, newDestructor); }
+	}
+	inline void image_dangerous_replaceDestructor(ImageF32& image, const HeapDestructor &newDestructor) {
+		if (image_exists(image)) { return buffer_replaceDestructor(image.impl_buffer, newDestructor); }
+	}
+	inline void image_dangerous_replaceDestructor(ImageRgbaU8& image, const HeapDestructor &newDestructor) {
+		if (image_exists(image)) { return buffer_replaceDestructor(image.impl_buffer, newDestructor); }
+	}
+
 	// Returns a pointer to the image's pixels
 	// Warning! Reading elements larger than 8 bits will have lower and higher bytes stored based on local endianness
 	// Warning! Using bytes outside of the [0 .. stride * height - 1] range may cause crashes and undefined behaviour
 	// Warning! Using the pointer after the image's lifetime may cause crashes from trying to access freed memory
-	uint8_t* image_dangerous_getData(const ImageU8& image);
-	uint8_t* image_dangerous_getData(const ImageU16& image);
-	uint8_t* image_dangerous_getData(const ImageF32& image);
-	uint8_t* image_dangerous_getData(const ImageRgbaU8& image);
+	inline uint8_t* image_dangerous_getData(const ImageU8&     image) { return image.impl_buffer.getUnsafe() + image_getStartOffset(image); }
+	inline uint8_t* image_dangerous_getData(const ImageU16&    image) { return image.impl_buffer.getUnsafe() + image_getStartOffset(image); }
+	inline uint8_t* image_dangerous_getData(const ImageF32&    image) { return image.impl_buffer.getUnsafe() + image_getStartOffset(image); }
+	inline uint8_t* image_dangerous_getData(const ImageRgbaU8& image) { return image.impl_buffer.getUnsafe() + image_getStartOffset(image); }
 }
 
 #endif

+ 14 - 12
Source/DFPSR/api/mediaMachineAPI.cpp

@@ -21,12 +21,14 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
 #include "mediaMachineAPI.h"
 #include "../machine/VirtualMachine.h"
 #include "../machine/mediaFilters.h"
 #include "../api/imageAPI.h"
+#include "../api/drawAPI.h"
+#include "../api/filterAPI.h"
 
 namespace dsr {
 
@@ -1042,7 +1044,7 @@ static const InsSig mediaMachineInstructions[] = {
 // API implementation
 
 static void checkMachine(const MediaMachine& machine) {
-	if (machine.get() == nullptr) {
+	if (machine.isNull()) {
 		throwError("The given media machine does not exist!");
 	}
 }
@@ -1055,10 +1057,10 @@ static void checkMethodIndex(const MediaMachine& machine, int methodIndex) {
 }
 
 MediaMachine machine_create(const ReadableString& code) {
-	std::shared_ptr<PlanarMemory> memory = std::make_shared<MediaMemory>();
+	Handle<PlanarMemory> memory = handle_create<MediaMemory>().setName("MediaMemory");
 	static const int mediaMachineInstructionCount = sizeof(mediaMachineInstructions) / sizeof(InsSig);
 	static const int mediaMachineTypeCount = sizeof(mediaMachineTypes) / sizeof(VMTypeDef);
-	return MediaMachine(std::make_shared<VirtualMachine>(code, memory, mediaMachineInstructions, mediaMachineInstructionCount, mediaMachineTypes, mediaMachineTypeCount));
+	return MediaMachine(handle_create<VirtualMachine>(code, memory, mediaMachineInstructions, mediaMachineInstructionCount, mediaMachineTypes, mediaMachineTypeCount).setName("MediaMachine"));
 }
 
 void machine_executeMethod(MediaMachine& machine, int methodIndex) {
@@ -1098,46 +1100,46 @@ void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIn
 		printText("Input ", inputIndex, " of ", machine->methods[methodIndex].inputCount, " (", machine->methods[methodIndex].locals[inputIndex].name, ") to ", machine->methods[methodIndex].name, " = ", input, "\n");
 	#endif
 	checkMethodIndex(machine, methodIndex);
-	setInputByIndex(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, FixedPoint::fromWhole(input));
+	setInputByIndex(((MediaMemory*)machine->memory.getUnsafe())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, FixedPoint::fromWhole(input));
 }
 void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const FixedPoint& input) {
 	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
 		printText("Input ", inputIndex, " of ", machine->methods[methodIndex].inputCount, " (", machine->methods[methodIndex].locals[inputIndex].name, ") to ", machine->methods[methodIndex].name, " = ", input, "\n");
 	#endif
 	checkMethodIndex(machine, methodIndex);
-	setInputByIndex(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, input);
+	setInputByIndex(((MediaMemory*)machine->memory.getUnsafe())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, input);
 }
 void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const AlignedImageU8& input) {
 	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
 		printText("Input ", inputIndex, " of ", machine->methods[methodIndex].inputCount, " (", machine->methods[methodIndex].locals[inputIndex].name, ") to ", machine->methods[methodIndex].name, " = monochrome image of ", image_getWidth(input), "x", image_getHeight(input), " pixels\n");
 	#endif
 	checkMethodIndex(machine, methodIndex);
-	setInputByIndex(((MediaMemory*)machine->memory.get())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, inputIndex, input);
+	setInputByIndex(((MediaMemory*)machine->memory.getUnsafe())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, inputIndex, input);
 }
 void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const OrderedImageRgbaU8& input) {
 	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
 		printText("Input ", inputIndex, " of ", machine->methods[methodIndex].inputCount, " (", machine->methods[methodIndex].locals[inputIndex].name, ") to ", machine->methods[methodIndex].name, " = rgba image of ", image_getWidth(input), "x", image_getHeight(input), " pixels\n");
 	#endif
 	checkMethodIndex(machine, methodIndex);
-	setInputByIndex(((MediaMemory*)machine->memory.get())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, inputIndex, input);
+	setInputByIndex(((MediaMemory*)machine->memory.getUnsafe())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, inputIndex, input);
 }
 
 // Get output by index
 FixedPoint machine_getFixedPointOutputByIndex(const MediaMachine& machine, int methodIndex, int outputIndex) {
 	checkMethodIndex(machine, methodIndex);
-	return accessOutputByIndex<FixedPoint>(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, outputIndex);
+	return accessOutputByIndex<FixedPoint>(((MediaMemory*)machine->memory.getUnsafe())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, outputIndex);
 }
 AlignedImageU8 machine_getImageU8OutputByIndex(const MediaMachine& machine, int methodIndex, int outputIndex) {
 	checkMethodIndex(machine, methodIndex);
-	return accessOutputByIndex<AlignedImageU8>(((MediaMemory*)machine->memory.get())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, outputIndex);
+	return accessOutputByIndex<AlignedImageU8>(((MediaMemory*)machine->memory.getUnsafe())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, outputIndex);
 }
 OrderedImageRgbaU8 machine_getImageRgbaU8OutputByIndex(const MediaMachine& machine, int methodIndex, int outputIndex) {
 	checkMethodIndex(machine, methodIndex);
-	return accessOutputByIndex<OrderedImageRgbaU8>(((MediaMemory*)machine->memory.get())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, outputIndex);
+	return accessOutputByIndex<OrderedImageRgbaU8>(((MediaMemory*)machine->memory.getUnsafe())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, outputIndex);
 }
 
 bool machine_exists(const MediaMachine& machine) {
-	return machine.get() != nullptr;
+	return machine.isNotNull();
 }
 
 int machine_findMethod(const MediaMachine& machine, const ReadableString& methodName) {

+ 10 - 1
Source/DFPSR/api/mediaMachineAPI.h

@@ -24,11 +24,20 @@
 #ifndef DFPSR_API_MEDIA_MACHINE
 #define DFPSR_API_MEDIA_MACHINE
 
+#include "../image/Image.h"
+#include "../base/Handle.h"
 #include "../math/FixedPoint.h"
-#include "../api/types.h"
 
 namespace dsr {
 
+// A handle to a media machine.
+//   Media machines can be used to generate, filter and analyze images.
+//   Everything running in a media machine is guaranteed to be 100% deterministic to the last bit.
+//     This reduces the amount of code where maintenance has to be performed during porting.
+//     It also means that any use of float or double is forbidden.
+struct VirtualMachine;
+using MediaMachine = Handle<VirtualMachine>;
+
 // TODO: Complete VirtualMachine with conditional jumps and document the language dialect used by MediaMachine.
 // Side-effect: Creates a media machine from Media Machine Code (*.mmc file).
 // Post-condition: Returns a reference counted MediaMachine handle to the virtual machine. 

+ 29 - 35
Source/DFPSR/api/modelAPI.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2019 David Forsgren Piuva
+// Copyright (c) 2019 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -21,7 +21,7 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
 #include "modelAPI.h"
 #include "imageAPI.h"
@@ -30,17 +30,17 @@
 #include <limits>
 #include "../base/virtualStack.h"
 
-#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.get() == nullptr) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
+#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.isNull()) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
 
 namespace dsr {
 
 Model model_create() {
-	return std::make_shared<ModelImpl>();
+	return handle_create<ModelImpl>().setName("Model");
 }
 
 Model model_clone(const Model& model) {
 	MUST_EXIST(model,model_clone);
-	return std::make_shared<ModelImpl>(model->filter, model->partBuffer, model->positionBuffer);
+	return handle_create<ModelImpl>(model->filter, model->partBuffer, model->positionBuffer).setName("Cloned Model");
 }
 
 void model_setFilter(const Model& model, Filter filter) {
@@ -54,7 +54,7 @@ Filter model_getFilter(const Model& model) {
 }
 
 bool model_exists(const Model& model) {
-	return model.get() != nullptr;
+	return model.isNotNull();
 }
 
 int model_addEmptyPart(Model& model, const String &name) {
@@ -162,12 +162,12 @@ int model_getPolygonVertexCount(const Model& model, int partIndex, int polygonIn
 	return model->getPolygonVertexCount(partIndex, polygonIndex);
 }
 
-ImageRgbaU8 model_getDiffuseMap(const Model& model, int partIndex) {
+TextureRgbaU8 model_getDiffuseMap(const Model& model, int partIndex) {
 	MUST_EXIST(model,model_getDiffuseMap);
 	return model->getDiffuseMap(partIndex);
 }
 
-void model_setDiffuseMap(Model& model, int partIndex, const ImageRgbaU8 &diffuseMap) {
+void model_setDiffuseMap(Model& model, int partIndex, const TextureRgbaU8 &diffuseMap) {
 	MUST_EXIST(model,model_setDiffuseMap);
 	model->setDiffuseMap(diffuseMap, partIndex);
 }
@@ -177,12 +177,12 @@ void model_setDiffuseMapByName(Model& model, int partIndex, ResourcePool &pool,
 	model->setDiffuseMapByName(pool, filename, partIndex);
 }
 
-ImageRgbaU8 model_getLightMap(Model& model, int partIndex) {
+TextureRgbaU8 model_getLightMap(Model& model, int partIndex) {
 	MUST_EXIST(model,model_getLightMap);
 	return model->getLightMap(partIndex);
 }
 
-void model_setLightMap(Model& model, int partIndex, const ImageRgbaU8 &lightMap) {
+void model_setLightMap(Model& model, int partIndex, const TextureRgbaU8 &lightMap) {
 	MUST_EXIST(model,model_setLightMap);
 	model->setLightMap(lightMap, partIndex);
 }
@@ -194,13 +194,13 @@ void model_setLightMapByName(Model& model, int partIndex, ResourcePool &pool, co
 
 // Single-threaded rendering for the simple cases where you just want it to work
 void model_render(const Model& model, const Transform3D &modelToWorldTransform, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer, const Camera &camera) {
-	if (model.get() != nullptr) {
-		model->render((CommandQueue*)nullptr, colorBuffer, depthBuffer, modelToWorldTransform, camera);
+	if (model.isNotNull()) {
+		model->render((CommandQueue*)nullptr, image_exists(colorBuffer) ? &colorBuffer : nullptr, image_exists(depthBuffer) ? &depthBuffer : nullptr, modelToWorldTransform, camera);
 	}
 }
 void model_renderDepth(const Model& model, const Transform3D &modelToWorldTransform, ImageF32& depthBuffer, const Camera &camera) {
-	if (model.get() != nullptr) {
-		model->renderDepth(depthBuffer, modelToWorldTransform, camera);
+	if (model.isNotNull()) {
+		model->renderDepth(image_exists(depthBuffer) ? &depthBuffer : nullptr, modelToWorldTransform, camera);
 	}
 }
 
@@ -537,7 +537,7 @@ struct RendererImpl {
 		//           Because the model is being borrowed for vertex animation
 		//           To prevent the command queue from getting full hold as much as possible in a sorted list of instances
 		//           When the command queue is full, the solid instances will be drawn front to back before filtered is drawn back to front
-		model->render(&this->commandQueue, this->colorBuffer, this->depthBuffer, modelToWorldTransform, camera);
+		model->render(&this->commandQueue, image_exists(this->colorBuffer) ? &(this->colorBuffer) : nullptr, image_exists(this->depthBuffer) ? &(this->depthBuffer) : nullptr, modelToWorldTransform, camera);
 	}
 	void endFrame(bool debugWireframe) {
 		if (!this->receiving) {
@@ -668,15 +668,15 @@ struct RendererImpl {
 };
 
 Renderer renderer_create() {
-	return std::make_shared<RendererImpl>();
+	return handle_create<RendererImpl>().setName("Renderer");
 }
 
 bool renderer_exists(const Renderer& renderer) {
-	return renderer.get() != nullptr;
+	return renderer.isNotNull();
 }
 
 void renderer_begin(Renderer& renderer, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer) {
-	MUST_EXIST(renderer,renderer_begin);
+	MUST_EXIST(renderer, renderer_begin);
 	renderer->beginFrame(colorBuffer, depthBuffer);
 }
 
@@ -689,8 +689,8 @@ void renderer_begin(Renderer& renderer, ImageRgbaU8& colorBuffer, ImageF32& dept
 //         This allow rendering many instances using the same model at different times
 //         Enabling vertex light, reflection maps and bone animation
 void renderer_giveTask(Renderer& renderer, const Model& model, const Transform3D &modelToWorldTransform, const Camera &camera) {
-	MUST_EXIST(renderer,renderer_giveTask);
-	if (model.get() != nullptr) {
+	MUST_EXIST(renderer, renderer_giveTask);
+	if (model.isNotNull()) {
 		renderer->giveTask(model, modelToWorldTransform, camera);
 	}
 }
@@ -699,48 +699,42 @@ void renderer_giveTask_triangle(Renderer& renderer,
   const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC,
   const FVector4D &colorA, const FVector4D &colorB, const FVector4D &colorC,
   const FVector4D &texCoordA, const FVector4D &texCoordB, const FVector4D &texCoordC,
-  const ImageRgbaU8& diffuseMap, const ImageRgbaU8& lightMap,
+  const TextureRgbaU8& diffuseMap, const TextureRgbaU8& lightMap,
   Filter filter, const Camera &camera) {
 	#ifndef NDEBUG
-		if (image_exists(diffuseMap) && !image_isTexture(diffuseMap)) {
-			throwError("If renderer_addTriangle is given a diffuse map, it must be a valid texture according to the criterias of image_isTexture!");
-		}
-		if (image_exists(lightMap) && !image_isTexture(lightMap)) {
-			throwError("If renderer_addTriangle is given a light map, it must be a valid texture according to the criterias of image_isTexture!");
-		}
-		MUST_EXIST(renderer,renderer_addTriangle);
+		MUST_EXIST(renderer, renderer_addTriangle);
 	#endif
 	renderTriangleFromData(
-	  &(renderer->commandQueue), renderer->colorBuffer.get(), renderer->depthBuffer.get(), camera,
+	  &(renderer->commandQueue), &(renderer->colorBuffer), &(renderer->depthBuffer), camera,
 	  posA, posB, posC,
-	  filter, diffuseMap.get(), lightMap.get(),
+	  filter, &(diffuseMap), &(lightMap),
 	  TriangleTexCoords(texCoordA, texCoordB, texCoordC),
 	  TriangleColors(colorA, colorB, colorC)
 	);
 }
 
 void renderer_occludeFromBox(Renderer& renderer, const FVector3D& minimum, const FVector3D& maximum, const Transform3D &modelToWorldTransform, const Camera &camera, bool debugSilhouette) {
-	MUST_EXIST(renderer,renderer_occludeFromBox);
+	MUST_EXIST(renderer, renderer_occludeFromBox);
 	renderer->occludeFromBox(minimum, maximum, modelToWorldTransform, camera, debugSilhouette);
 }
 
 void renderer_occludeFromExistingTriangles(Renderer& renderer) {
-	MUST_EXIST(renderer,renderer_optimize);
+	MUST_EXIST(renderer, renderer_optimize);
 	renderer->occludeFromExistingTriangles();
 }
 
 void renderer_occludeFromTopRows(Renderer& renderer, const Camera &camera) {
-	MUST_EXIST(renderer,renderer_occludeFromTopRows);
+	MUST_EXIST(renderer, renderer_occludeFromTopRows);
 	renderer->occludeFromTopRows(camera);
 }
 
 bool renderer_isBoxVisible(Renderer& renderer, const FVector3D &minimum, const FVector3D &maximum, const Transform3D &modelToWorldTransform, const Camera &camera) {
-	MUST_EXIST(renderer,renderer_isBoxVisible);
+	MUST_EXIST(renderer, renderer_isBoxVisible);
 	return !(renderer->isBoxOccluded(minimum, maximum, modelToWorldTransform, camera));
 }
 
 void renderer_end(Renderer& renderer, bool debugWireframe) {
-	MUST_EXIST(renderer,renderer_end);
+	MUST_EXIST(renderer, renderer_end);
 	renderer->endFrame(debugWireframe);
 }
 

+ 19 - 9
Source/DFPSR/api/modelAPI.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// Copyright (c) 2018 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -24,15 +24,25 @@
 #ifndef DFPSR_API_MODEL
 #define DFPSR_API_MODEL
 
-#include "types.h"
 #include "../math/FVector.h"
-
-// TODO: How should these be exposed to the caller?
+#include "../image/Texture.h"
 #include "../render/Camera.h"
 #include "../render/ResourcePool.h"
+
+// TODO: Create a folder with types.
+namespace dsr {
+	// A handle to a model.
+	class ModelImpl;
+	using Model = Handle<ModelImpl>;
+}
+
 #include "../render/model/format/dmf1.h"
 
 namespace dsr {
+	// A handle to a multi-threaded rendering context.
+	class RendererImpl;
+	using Renderer = Handle<RendererImpl>;
+
 	// Normalized texture coordinates:
 	//   (0.0f, 0.0f) is the texture coordinate for the upper left corner of the upper left pixel in the 2D texture.
 	//   (1.0f, 0.0f) is the texture coordinate for the upper right corner of the upper right pixel in the 2D texture.
@@ -200,7 +210,7 @@ namespace dsr {
 	// Post-condition:
 	//   Returns an image handle to the diffuse texture in the part at partIndex in model.
 	//   If the part has no diffuse image then an empth handle is returned.
-	ImageRgbaU8 model_getDiffuseMap(const Model& model, int partIndex);
+	TextureRgbaU8 model_getDiffuseMap(const Model& model, int partIndex);
 	// Set the part's diffuse texture.
 	//   A texture is just an image fulfilling the criterias of image_isTexture to allow fast texture sampling and pyramid generation.
 	// Pre-condition:
@@ -209,7 +219,7 @@ namespace dsr {
 	// Side-effect:
 	//   Sets the diffuse texture in the part at partIndex in model to diffuseMap.
 	//   If diffuseMap is an empty image handle, then the diffuse texture will be replaced by the default solid white color.
-	void model_setDiffuseMap(Model& model, int partIndex, const ImageRgbaU8 &diffuseMap);
+	void model_setDiffuseMap(Model& model, int partIndex, const TextureRgbaU8 &diffuseMap);
 	// Automatically find the diffuse texture by name in the resource pool and assign it.
 	// Pre-condition:
 	//   model must refer to an existing model.
@@ -229,7 +239,7 @@ namespace dsr {
 	// Post-condition:
 	//   Returns an image handle to the light texture in the part at partIndex in model.
 	//   If the part has no light image then an empth handle is returned.
-	ImageRgbaU8 model_getLightMap(Model& model, int partIndex);
+	TextureRgbaU8 model_getLightMap(Model& model, int partIndex);
 	// Set the part's light texture.
 	//   A texture is just an image fulfilling the criterias of image_isTexture to allow fast texture sampling.
 	//   Even though no texture-pyramid is used for light-maps, it still has to look up
@@ -240,7 +250,7 @@ namespace dsr {
 	// Side-effect:
 	//   Sets the diffuse texture in the part at partIndex in model to diffuseMap.
 	//   If diffuseMap is an empty image handle, then the diffuse texture will be replaced by the default solid white color.
-	void model_setLightMap(Model& model, int partIndex, const ImageRgbaU8 &lightMap);
+	void model_setLightMap(Model& model, int partIndex, const TextureRgbaU8 &lightMap);
 	// Automatically find the light texture by name in the resource pool and assign it.
 	// Pre-condition:
 	//   model must refer to an existing model.
@@ -357,7 +367,7 @@ namespace dsr {
 	  const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC,
 	  const FVector4D &colorA, const FVector4D &colorB, const FVector4D &colorC,
 	  const FVector4D &texCoordA, const FVector4D &texCoordB, const FVector4D &texCoordC,
-	  const ImageRgbaU8& diffuseMap, const ImageRgbaU8& lightMap,
+	  const TextureRgbaU8& diffuseMap, const TextureRgbaU8& lightMap,
 	  Filter filter, const Camera &camera);
 	// Use already given triangles as occluders.
 	//   Used after calls to renderer_giveTask have filled the buffer with triangles, but before they are drawn using renderer_end.

+ 216 - 315
Source/DFPSR/api/stringAPI.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2020 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,24 +22,34 @@
 //    distribution.
 
 // Gets access to private members by making them public for the whole module
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
+#include <iostream>
+#include <sstream>
 #include <fstream>
 #include <streambuf>
-#include <cstring>
+#include <thread>
+#include <mutex>
 #include <stdexcept>
 #include "stringAPI.h"
 #include "../api/fileAPI.h"
+#include "../settings.h"
 
 using namespace dsr;
 
-static void atomic_append(String &target, const char* source);
-static void atomic_append(String &target, const ReadableString& source);
-static void atomic_append(String &target, const char32_t* source);
-static void atomic_append(String &target, const std::string& source);
+// The print buffer keeps its buffer size from previous printing to avoid reallocating memory every time something is printed.
+//   It is stored separatelly for each calling thread to avoid conflicts.
+static thread_local String printBuffer;
+String &dsr::string_getPrintBuffer() {
+	return printBuffer;
+}
+
+static void atomic_append_ascii(String &target, const char* source);
+static void atomic_append_readable(String &target, const ReadableString& source);
+static void atomic_append_utf32(String &target, const DsrChar* source);
 
-static int64_t strlen_utf32(const char32_t *content) {
-	int64_t length = 0;
+static intptr_t strlen_utf32(const DsrChar *content) {
+	intptr_t length = 0;
 	while (content[length] != 0) {
 		length++;
 	}
@@ -54,68 +64,19 @@ static char toAscii(DsrChar c) {
 	}
 }
 
-ReadableString::ReadableString() {}
-
-ReadableString::~ReadableString() {}
-
 ReadableString::ReadableString(const DsrChar *content)
-: readSection(content), length(strlen_utf32(content)) {}
-
-ReadableString::ReadableString(const String& source) {
-	this->readSection = source.readSection;
-	this->length = source.length;
-	this->buffer = source.buffer;
-}
+: view(content, strlen_utf32(content)) {}
 
-// Not the fastest constructor, but won't bloat the public header
-// Hopefully most compilers know how to optimize this
-static ReadableString createSubString(const DsrChar *content, int64_t length, const Buffer &buffer) {
-	ReadableString result;
-	result.readSection = content;
-	result.length = length;
-	result.buffer = buffer;
-	return result;
-}
-
-static String createSubString_shared(const DsrChar *content, int64_t length, const Buffer &buffer, char32_t* writeSection) {
-	String result;
-	result.readSection = content;
-	result.length = length;
-	result.buffer = buffer;
-	result.writeSection = writeSection;
-	return result;
-}
+ReadableString::ReadableString(const String& source)
+: characters(source.characters), view(source.view) {}
 
 String::String() {}
-String::String(const char* source) { atomic_append(*this, source); }
-String::String(const char32_t* source) { atomic_append(*this, source); }
-String::String(const std::string& source) { atomic_append(*this, source); }
-String::String(const String& source) {
-	// Share immutable buffer
-	this->readSection = source.readSection;
-	this->length = source.length;
-	this->buffer = source.buffer;
-	this->writeSection = source.writeSection;
-}
-String::String(const ReadableString& source) {
-	if (buffer_exists(source.buffer)) {
-		this->readSection = source.readSection;
-		this->length = source.length;
-		this->buffer = source.buffer;
-		this->writeSection = const_cast<char32_t*>(source.readSection); // Still safe because of immutability
-	} else {
-		// No buffer to share, just appending the content
-		atomic_append(*this, source);
-	}
-}
-
-DsrChar ReadableString::operator[] (int64_t index) const {
-	if (index < 0 || index >= this->length) {
-		return U'\0';
-	} else {
-		return this->readSection[index];
-	}
-}
+String::String(const char* source) { atomic_append_ascii(*this, source); }
+String::String(const DsrChar* source) { atomic_append_utf32(*this, source); }
+String::String(const String& source)
+: ReadableString(source.characters, source.view){}
+String::String(const ReadableString& source)
+: ReadableString(source.characters, source.view) {}
 
 String& Printable::toStream(String& target) const {
 	return this->toStreamIndented(target, U"");
@@ -131,33 +92,14 @@ String Printable::toString() const {
 	return this->toStringIndented(U"");
 }
 
-std::ostream& Printable::toStreamIndented(std::ostream& out, const ReadableString& indentation) const {
-	String result;
-	this->toStreamIndented(result, indentation);
-	for (int64_t i = 0; i < result.length; i++) {
-		out.put(toAscii(result.readSection[i]));
-	}
-	return out;
-}
-
-std::ostream& Printable::toStream(std::ostream& out) const {
-	return this->toStreamIndented(out, U"");
-}
-
-std::string Printable::toStdString() const {
-	std::ostringstream result;
-	this->toStream(result);
-	return result.str();
-}
-
 Printable::~Printable() {}
 
 bool dsr::string_match(const ReadableString& a, const ReadableString& b) {
-	if (a.length != b.length) {
+	if (a.view.length != b.view.length) {
 		return false;
 	} else {
-		for (int64_t i = 0; i < a.length; i++) {
-			if (a.readSection[i] != b.readSection[i]) {
+		for (intptr_t i = 0; i < a.view.length; i++) {
+			if (a[i] != b[i]) {
 				return false;
 			}
 		}
@@ -166,11 +108,11 @@ bool dsr::string_match(const ReadableString& a, const ReadableString& b) {
 }
 
 bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b) {
-	if (a.length != b.length) {
+	if (a.view.length != b.view.length) {
 		return false;
 	} else {
-		for (int64_t i = 0; i < a.length; i++) {
-			if (towupper(a.readSection[i]) != towupper(b.readSection[i])) {
+		for (intptr_t i = 0; i < a.view.length; i++) {
+			if (towupper(a[i]) != towupper(b[i])) {
 				return false;
 			}
 		}
@@ -178,23 +120,10 @@ bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableStr
 	}
 }
 
-std::ostream& ReadableString::toStream(std::ostream& out) const {
-	for (int64_t i = 0; i < this->length; i++) {
-		out.put(toAscii(this->readSection[i]));
-	}
-	return out;
-}
-
-std::string ReadableString::toStdString() const {
-	std::ostringstream result;
-	this->toStream(result);
-	return result.str();
-}
-
 String dsr::string_upperCase(const ReadableString &text) {
 	String result;
-	string_reserve(result, text.length);
-	for (int64_t i = 0; i < text.length; i++) {
+	string_reserve(result, text.view.length);
+	for (intptr_t i = 0; i < text.view.length; i++) {
 		string_appendChar(result, towupper(text[i]));
 	}
 	return result;
@@ -202,15 +131,15 @@ String dsr::string_upperCase(const ReadableString &text) {
 
 String dsr::string_lowerCase(const ReadableString &text) {
 	String result;
-	string_reserve(result, text.length);
-	for (int64_t i = 0; i < text.length; i++) {
+	string_reserve(result, text.view.length);
+	for (intptr_t i = 0; i < text.view.length; i++) {
 		string_appendChar(result, towlower(text[i]));
 	}
 	return result;
 }
 
-static int64_t findFirstNonWhite(const ReadableString &text) {
-	for (int64_t i = 0; i < text.length; i++) {
+static intptr_t findFirstNonWhite(const ReadableString &text) {
+	for (intptr_t i = 0; i < text.view.length; i++) {
 		DsrChar c = text[i];
 		if (!character_isWhiteSpace(c)) {
 			return i;
@@ -219,8 +148,8 @@ static int64_t findFirstNonWhite(const ReadableString &text) {
 	return -1;
 }
 
-static int64_t findLastNonWhite(const ReadableString &text) {
-	for (int64_t i = text.length - 1; i >= 0; i--) {
+static intptr_t findLastNonWhite(const ReadableString &text) {
+	for (intptr_t i = text.view.length - 1; i >= 0; i--) {
 		DsrChar c = text[i];
 		if (!character_isWhiteSpace(c)) {
 			return i;
@@ -231,8 +160,8 @@ static int64_t findLastNonWhite(const ReadableString &text) {
 
 // Allow passing literals without allocating heap memory for the result
 ReadableString dsr::string_removeOuterWhiteSpace(const ReadableString &text) {
-	int64_t first = findFirstNonWhite(text);
-	int64_t last = findLastNonWhite(text);
+	intptr_t first = findFirstNonWhite(text);
+	intptr_t last = findLastNonWhite(text);
 	if (first == -1) {
 		// Only white space
 		return ReadableString();
@@ -244,9 +173,9 @@ ReadableString dsr::string_removeOuterWhiteSpace(const ReadableString &text) {
 
 String dsr::string_mangleQuote(const ReadableString &rawText) {
 	String result;
-	string_reserve(result, rawText.length + 2);
+	string_reserve(result, rawText.view.length + 2);
 	string_appendChar(result, U'\"'); // Begin quote
-	for (int64_t i = 0; i < rawText.length; i++) {
+	for (intptr_t i = 0; i < rawText.view.length; i++) {
 		DsrChar c = rawText[i];
 		if (c == U'\"') { // Double quote
 			string_append(result, U"\\\"");
@@ -277,13 +206,13 @@ String dsr::string_mangleQuote(const ReadableString &rawText) {
 }
 
 String dsr::string_unmangleQuote(const ReadableString& mangledText) {
-	int64_t firstQuote = string_findFirst(mangledText, '\"');
-	int64_t lastQuote = string_findLast(mangledText, '\"');
+	intptr_t firstQuote = string_findFirst(mangledText, '\"');
+	intptr_t lastQuote = string_findLast(mangledText, '\"');
 	String result;
 	if (firstQuote == -1 || lastQuote == -1 || firstQuote == lastQuote) {
 		throwError(U"Cannot unmangle using string_unmangleQuote without beginning and ending with quote signs!\n", mangledText, "\n");
 	} else {
-		for (int64_t i = firstQuote + 1; i < lastQuote; i++) {
+		for (intptr_t i = firstQuote + 1; i < lastQuote; i++) {
 			DsrChar c = mangledText[i];
 			if (c == U'\\') { // Escape character
 				DsrChar c2 = mangledText[i + 1];
@@ -334,7 +263,7 @@ String dsr::string_unmangleQuote(const ReadableString& mangledText) {
 	return result;
 }
 
-static void uintToString_arabic(String& target, uint64_t value) {
+void dsr::string_fromUnsigned(String& target, uint64_t value) {
 	static const int bufferSize = 20;
 	DsrChar digits[bufferSize];
 	int64_t usedSize = 0;
@@ -357,12 +286,12 @@ static void uintToString_arabic(String& target, uint64_t value) {
 	}
 }
 
-static void intToString_arabic(String& target, int64_t value) {
+void dsr::string_fromSigned(String& target, int64_t value, DsrChar negationCharacter) {
 	if (value >= 0) {
-		uintToString_arabic(target, (uint64_t)value);
+		string_fromUnsigned(target, (uint64_t)value);
 	} else {
-		string_appendChar(target, U'-');
-		uintToString_arabic(target, (uint64_t)(-value));
+		string_appendChar(target, negationCharacter);
+		string_fromUnsigned(target, (uint64_t)(-value));
 	}
 }
 
@@ -385,7 +314,7 @@ static double decimalMultipliers[MAX_DECIMALS] = {
 	1000000000000000.0,
 	10000000000000000.0
 };
-static void doubleToString_arabic(String& target, double value, int decimalCount = 6, bool removeTrailingZeroes = true, DsrChar decimalCharacter = U'.', DsrChar negationCharacter = U'-') {
+void dsr::string_fromDouble(String& target, double value, int decimalCount, bool removeTrailingZeroes, DsrChar decimalCharacter, DsrChar negationCharacter) {
 	if (decimalCount < 1) decimalCount = 1;
 	if (decimalCount > MAX_DECIMALS) decimalCount = MAX_DECIMALS;
 	double remainder = value;
@@ -396,7 +325,7 @@ static void doubleToString_arabic(String& target, double value, int decimalCount
 	}
 	// Get whole part
 	uint64_t whole = (uint64_t)remainder;
-	uintToString_arabic(target, whole);
+	string_fromUnsigned(target, whole);
 	remainder = remainder - whole;
 	// Print the decimal
 	string_appendChar(target, decimalCharacter);
@@ -429,11 +358,11 @@ static void doubleToString_arabic(String& target, double value, int decimalCount
 }
 
 #define TO_RAW_ASCII(TARGET, SOURCE) \
-	char TARGET[SOURCE.length + 1]; \
-	for (int64_t i = 0; i < SOURCE.length; i++) { \
+	char TARGET[SOURCE.view.length + 1]; \
+	for (intptr_t i = 0; i < SOURCE.view.length; i++) { \
 		TARGET[i] = toAscii(SOURCE[i]); \
 	} \
-	TARGET[SOURCE.length] = '\0';
+	TARGET[SOURCE.view.length] = '\0';
 
 // A function definition for receiving a stream of bytes
 //   Instead of using std's messy inheritance
@@ -453,8 +382,8 @@ static void feedCharacter(const UTF32WriterFunction &receiver, DsrChar character
 // Appends the content of buffer as a BOM-free Latin-1 file into target
 // fileLength is ignored when nullTerminated is true
 template <bool nullTerminated>
-static void feedStringFromFileBuffer_Latin1(const UTF32WriterFunction &receiver, const uint8_t* buffer, int64_t fileLength = 0) {
-	for (int64_t i = 0; i < fileLength || nullTerminated; i++) {
+static void feedStringFromFileBuffer_Latin1(const UTF32WriterFunction &receiver, const uint8_t* buffer, intptr_t fileLength = 0) {
+	for (intptr_t i = 0; i < fileLength || nullTerminated; i++) {
 		DsrChar character = (DsrChar)(buffer[i]);
 		if (nullTerminated && character == 0) { return; }
 		feedCharacter(receiver, character);
@@ -463,8 +392,8 @@ static void feedStringFromFileBuffer_Latin1(const UTF32WriterFunction &receiver,
 // Appends the content of buffer as a BOM-free UTF-8 file into target
 // fileLength is ignored when nullTerminated is true
 template <bool nullTerminated>
-static void feedStringFromFileBuffer_UTF8(const UTF32WriterFunction &receiver, const uint8_t* buffer, int64_t fileLength = 0) {
-	for (int64_t i = 0; i < fileLength || nullTerminated; i++) {
+static void feedStringFromFileBuffer_UTF8(const UTF32WriterFunction &receiver, const uint8_t* buffer, intptr_t fileLength = 0) {
+	for (intptr_t i = 0; i < fileLength || nullTerminated; i++) {
 		uint8_t byteA = buffer[i];
 		if (byteA < (uint32_t)0b10000000) {
 			// Single byte (1xxxxxxx)
@@ -502,7 +431,7 @@ static void feedStringFromFileBuffer_UTF8(const UTF32WriterFunction &receiver, c
 }
 
 template <bool LittleEndian>
-uint16_t read16bits(const uint8_t* buffer, int64_t startOffset) {
+uint16_t read16bits(const uint8_t* buffer, intptr_t startOffset) {
 	uint16_t byteA = buffer[startOffset];
 	uint16_t byteB = buffer[startOffset + 1];
 	if (LittleEndian) {
@@ -515,8 +444,8 @@ uint16_t read16bits(const uint8_t* buffer, int64_t startOffset) {
 // Appends the content of buffer as a BOM-free UTF-16 file into target as UTF-32
 // fileLength is ignored when nullTerminated is true
 template <bool LittleEndian, bool nullTerminated>
-static void feedStringFromFileBuffer_UTF16(const UTF32WriterFunction &receiver, const uint8_t* buffer, int64_t fileLength = 0) {
-	for (int64_t i = 0; i < fileLength || nullTerminated; i += 2) {
+static void feedStringFromFileBuffer_UTF16(const UTF32WriterFunction &receiver, const uint8_t* buffer, intptr_t fileLength = 0) {
+	for (intptr_t i = 0; i < fileLength || nullTerminated; i += 2) {
 		// Read the first 16-bit word
 		uint16_t wordA = read16bits<LittleEndian>(buffer, i);
 		// Check if another word is needed
@@ -539,7 +468,7 @@ static void feedStringFromFileBuffer_UTF16(const UTF32WriterFunction &receiver,
 }
 // Sends the decoded UTF-32 characters from the encoded buffer into target.
 // The text encoding should be specified using a BOM at the start of buffer, otherwise Latin-1 is assumed.
-static void feedStringFromFileBuffer(const UTF32WriterFunction &receiver, const uint8_t* buffer, int64_t fileLength) {
+static void feedStringFromFileBuffer(const UTF32WriterFunction &receiver, const uint8_t* buffer, intptr_t fileLength) {
 	// After removing the BOM bytes, the rest can be seen as a BOM-free text file with a known format
 	if (fileLength >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF) { // UTF-8
 		feedStringFromFileBuffer_UTF8<false>(receiver, buffer + 3, fileLength - 3);
@@ -590,7 +519,7 @@ static void feedStringFromRawData(const UTF32WriterFunction &receiver, const uin
 String dsr::string_dangerous_decodeFromData(const void* data, CharacterEncoding encoding) {
 	String result;
 	// Measure the size of the result by scanning the content in advance
-	int64_t characterCount = 0;
+	intptr_t characterCount = 0;
 	UTF32WriterFunction measurer = [&characterCount](DsrChar character) {
 		characterCount++;
 	};
@@ -608,18 +537,18 @@ String dsr::string_dangerous_decodeFromData(const void* data, CharacterEncoding
 String dsr::string_loadFromMemory(Buffer fileContent) {
 	String result;
 	// Measure the size of the result by scanning the content in advance
-	int64_t characterCount = 0;
+	intptr_t characterCount = 0;
 	UTF32WriterFunction measurer = [&characterCount](DsrChar character) {
 		characterCount++;
 	};
-	feedStringFromFileBuffer(measurer, buffer_dangerous_getUnsafeData(fileContent), buffer_getSize(fileContent));
+	feedStringFromFileBuffer(measurer, fileContent.getUnsafe(), fileContent.getUsedSize());
 	// Pre-allocate the correct amount of memory based on the simulation
 	string_reserve(result, characterCount);
 	// Stream output to the result string
 	UTF32WriterFunction receiver = [&result](DsrChar character) {
 		string_appendChar(result, character);
 	};
-	feedStringFromFileBuffer(receiver, buffer_dangerous_getUnsafeData(fileContent), buffer_getSize(fileContent));
+	feedStringFromFileBuffer(receiver, fileContent.getUnsafe(), fileContent.getUsedSize());
 	return result;
 }
 
@@ -715,7 +644,7 @@ static void encodeText(const ByteWriterFunction &receiver, String content, bool
 		}
 	}
 	// Write encoded content
-	for (int64_t i = 0; i < string_length(content); i++) {
+	for (intptr_t i = 0; i < string_length(content); i++) {
 		DsrChar character = content[i];
 		if (character == U'\n') {
 			if (lineEncoding == LineEncoding::CrLf) {
@@ -779,12 +708,12 @@ bool dsr::string_save(const ReadableString& filename, const ReadableString& cont
 }
 
 Buffer dsr::string_saveToMemory(const ReadableString& content, CharacterEncoding characterEncoding, LineEncoding lineEncoding, bool writeByteOrderMark, bool writeNullTerminator) {
-	int64_t byteCount = 0;
+	intptr_t byteCount = 0;
 	ByteWriterFunction counter = [&byteCount](uint8_t value) {
 		byteCount++;
 	};
 	ENCODE_TEXT(counter, content, characterEncoding, lineEncoding, writeByteOrderMark, writeNullTerminator);
-	Buffer result = buffer_create(byteCount);
+	Buffer result = buffer_create(byteCount).setName("Buffer holding an encoded string");
 	SafePointer<uint8_t> byteWriter = buffer_getSafeData<uint8_t>(result, "Buffer for string encoding");
 	ByteWriterFunction receiver = [&byteWriter](uint8_t value) {
 		*byteWriter = value;
@@ -794,89 +723,79 @@ Buffer dsr::string_saveToMemory(const ReadableString& content, CharacterEncoding
 	return result;
 }
 
-static int64_t getNewBufferSize(int64_t minimumSize) {
-	if (minimumSize <= 128) {
-		return 128;
-	} else if (minimumSize <= 512) {
-		return 512;
-	} else if (minimumSize <= 2048) {
-		return 2048;
-	} else if (minimumSize <= 8192) {
-		return 8192;
-	} else if (minimumSize <= 32768) {
-		return 32768;
-	} else if (minimumSize <= 131072) {
-		return 131072;
-	} else if (minimumSize <= 524288) {
-		return 524288;
-	} else if (minimumSize <= 2097152) {
-		return 2097152;
-	} else if (minimumSize <= 8388608) {
-		return 8388608;
-	} else if (minimumSize <= 33554432) {
-		return 33554432;
-	} else if (minimumSize <= 134217728) {
-		return 134217728;
-	} else if (minimumSize <= 536870912) {
-		return 536870912;
-	} else {
-		return minimumSize;
-	}
+static uintptr_t getStartOffset(const ReadableString &source) {
+	// Get the allocation
+	const uint8_t* origin = (uint8_t*)(source.characters.getUnsafe());
+	const uint8_t* start = (uint8_t*)(source.view.getUnchecked());
+	assert(start <= origin);
+	// Get the offset from the parent
+	return (start - origin) / sizeof(DsrChar);
+}
+
+static Handle<DsrChar> allocateCharacters(intptr_t minimumLength) {
+	// Allocate memory.
+	Handle<DsrChar> result = handle_createArray<DsrChar>(AllocationInitialization::Uninitialized, minimumLength);
+	// Check how much space we got.
+	uintptr_t availableSpace = heap_getAllocationSize(result.getUnsafe());
+	// Expand to use all available memory in the allocation.
+	uintptr_t newSize = heap_setUsedSize(result.getUnsafe(), availableSpace);
+	// Clear the memory to zeroes, just to be safe against non-deterministic bugs.
+	safeMemorySet(result.getSafe("Cleared String pointer"), 0, newSize);
+	return result;
 }
-// Replaces the buffer with a new buffer holding at least newLength characters
+
+// Replaces the buffer with a new buffer holding at least minimumLength characters
 // Guarantees that the new buffer is not shared by other strings, so that it may be written to freely
-static void reallocateBuffer(String &target, int64_t newLength, bool preserve) {
+static void reallocateBuffer(String &target, intptr_t minimumLength, bool preserve) {
 	// Holding oldData alive while copying to the new buffer
-	Buffer oldBuffer = target.buffer; // Kept for reference counting only, do not remove.
-	const char32_t* oldData = target.readSection;
-	target.buffer = buffer_create(getNewBufferSize(newLength * sizeof(DsrChar)));
-	target.readSection = target.writeSection = reinterpret_cast<char32_t*>(buffer_dangerous_getUnsafeData(target.buffer));
-	if (preserve && oldData) {
-		memcpy(target.writeSection, oldData, target.length * sizeof(DsrChar));
+	Handle<DsrChar> oldBuffer = target.characters; // Kept for reference counting only, do not remove.
+	Impl_CharacterView oldData = target.view;
+	target.characters = allocateCharacters(minimumLength);
+	target.view = Impl_CharacterView(target.characters.getUnsafe(), oldData.length);
+	if (preserve && oldData.length > 0) {
+		safeMemoryCopy(target.view.getSafe("New characters being copied from an old buffer"), oldData.getSafe("Old characters being copied to a new buffer"), oldData.length * sizeof(DsrChar));
 	}
 }
-// Call before writing to the buffer
-//   This hides that Strings share buffers when assigning by value or taking partial strings
-static void cloneIfShared(String &target) {
-	if (target.buffer.use_count() > 1) {
-		reallocateBuffer(target, target.length, true);
+// Call before writing to the buffer.
+//   This hides that Strings share buffers when assigning by value or taking partial strings.
+static void cloneIfNeeded(String &target) {
+	// If there is no buffer or the buffer is shared, it needs to allocate its own buffer.
+	if (target.characters.isNull() || target.characters.getUseCount() > 1) {
+		reallocateBuffer(target, target.view.length, true);
 	}
 }
 
 void dsr::string_clear(String& target) {
-	cloneIfShared(target);
-	target.length = 0;
+	// We we start writing from the beginning, then we must have our own allocation to avoid overwriting the characters in other strings.
+	cloneIfNeeded(target);
+	target.view.length = 0;
 }
 
 // The number of DsrChar characters that can be contained in the allocation before reaching the buffer's end
 //   This doesn't imply that it's always okay to write to the remaining space, because the buffer may be shared
-static int64_t getCapacity(const ReadableString &source) {
-	if (buffer_exists(source.buffer)) {
-		// Get the allocation
-		uint8_t* data = buffer_dangerous_getUnsafeData(source.buffer);
-		uint8_t* start = (uint8_t*)(source.readSection);
-		// Get the offset from the parent
-		intptr_t offset = start - data;
+static intptr_t getCapacity(const ReadableString &source) {
+	if (source.characters.isNotNull()) {
+		uintptr_t bufferElements = source.characters.getElementCount();
 		// Subtract offset from the buffer size to get the remaining space
-		return (buffer_getSize(source.buffer) - offset) / sizeof(DsrChar);
+		return bufferElements - getStartOffset(source);
 	} else {
 		return 0;
 	}
 }
 
-static void expand(String &target, int64_t newLength, bool affectUsedLength) {
-	cloneIfShared(target);
-	if (newLength > target.length) {
+static void expand(String &target, intptr_t newLength, bool affectUsedLength) {
+	cloneIfNeeded(target);
+	if (newLength > target.view.length) {
 		if (newLength > getCapacity(target)) {
 			reallocateBuffer(target, newLength, true);
 		}
 		if (affectUsedLength) {
-			target.length = newLength;
+			target.view.length = newLength;
 		}
 	}
 }
 
-void dsr::string_reserve(String& target, int64_t minimumLength) {
+void dsr::string_reserve(String& target, intptr_t minimumLength) {
 	expand(target, minimumLength, false);
 }
 
@@ -894,95 +813,77 @@ void dsr::string_reserve(String& target, int64_t minimumLength) {
 //     If it doesn't share the buffer
 //       * Then no risk of writing
 #define APPEND(TARGET, SOURCE, LENGTH, MASK) { \
-	int64_t oldLength = (TARGET).length; \
-	expand((TARGET), oldLength + (int64_t)(LENGTH), true); \
-	for (int64_t i = 0; i < (int64_t)(LENGTH); i++) { \
-		(TARGET).writeSection[oldLength + i] = ((SOURCE)[i]) & MASK; \
+	intptr_t oldLength = (TARGET).view.length; \
+	expand((TARGET), oldLength + (intptr_t)(LENGTH), true); \
+	for (intptr_t i = 0; i < (intptr_t)(LENGTH); i++) { \
+		(TARGET).view.writeCharacter(oldLength + i, ((SOURCE)[i]) & MASK); \
 	} \
 }
 // TODO: See if ascii litterals can be checked for values above 127 in compile-time
-static void atomic_append(String &target, const char* source) { APPEND(target, source, strlen(source), 0xFF); }
+static void atomic_append_ascii(String &target, const char* source) { APPEND(target, source, strlen(source), 0xFF); }
 // TODO: Use memcpy when appending input of the same format
-static void atomic_append(String &target, const ReadableString& source) { APPEND(target, source, source.length, 0xFFFFFFFF); }
-static void atomic_append(String &target, const char32_t* source) { APPEND(target, source, strlen_utf32(source), 0xFFFFFFFF); }
-static void atomic_append(String &target, const std::string& source) { APPEND(target, source.c_str(), (int64_t)source.size(), 0xFF); }
-
+static void atomic_append_readable(String &target, const ReadableString& source) { APPEND(target, source, source.view.length, 0xFFFFFFFF); }
+static void atomic_append_utf32(String &target, const DsrChar* source) { APPEND(target, source, strlen_utf32(source), 0xFFFFFFFF); }
 void dsr::string_appendChar(String& target, DsrChar value) { APPEND(target, &value, 1, 0xFFFFFFFF); }
 
-String& dsr::string_toStreamIndented(String& target, const Printable& source, const ReadableString& indentation) {
-	return source.toStreamIndented(target, indentation);
-}
-String& dsr::string_toStreamIndented(String& target, const char* value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	atomic_append(target, value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	atomic_append(target, value);
+String& dsr::string_toStreamIndented(String& target, const char *value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	atomic_append_ascii(target, value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const char32_t* value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	atomic_append(target, value);
+String& dsr::string_toStreamIndented(String& target, const DsrChar *value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	atomic_append_utf32(target, value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const float& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	doubleToString_arabic(target, (double)value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const double& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	doubleToString_arabic(target, value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const int64_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	intToString_arabic(target, value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const uint64_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	uintToString_arabic(target, value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const int32_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	intToString_arabic(target, (int64_t)value);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const uint32_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	uintToString_arabic(target, (uint64_t)value);
+String& dsr::string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	atomic_append_readable(target, value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const int16_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	intToString_arabic(target, (int64_t)value);
+String& dsr::string_toStreamIndented(String& target, const double &value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	string_fromDouble(target, (double)value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const uint16_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	uintToString_arabic(target, (uint64_t)value);
+String& dsr::string_toStreamIndented(String& target, const int64_t &value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	string_fromSigned(target, value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const int8_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	intToString_arabic(target, (int64_t)value);
+String& dsr::string_toStreamIndented(String& target, const uint64_t &value, const ReadableString& indentation) {
+	atomic_append_readable(target, indentation);
+	string_fromUnsigned(target, value);
 	return target;
 }
-String& dsr::string_toStreamIndented(String& target, const uint8_t& value, const ReadableString& indentation) {
-	atomic_append(target, indentation);
-	uintToString_arabic(target, (uint64_t)value);
-	return target;
+
+// The print mutex makes sure that messages from multiple threads don't get mixed up.
+static std::mutex printMutex;
+
+static std::ostream& toStream(std::ostream& out, const ReadableString &source) {
+	for (intptr_t i = 0; i < source.view.length; i++) {
+		out.put(toAscii(source.view[i]));
+	}
+	return out;
 }
 
 static const std::function<void(const ReadableString &message, MessageType type)> defaultMessageAction = [](const ReadableString &message, MessageType type) {
 	if (type == MessageType::Error) {
-		throw std::runtime_error(message.toStdString());
+		#ifdef DSR_HARD_EXIT_ON_ERROR
+			// Print the error.
+			toStream(std::cerr, message);
+			// Free all heap allocations.
+			heap_hardExitCleaning();
+			// Terminate with a non-zero value to indicate failure.
+			std::exit(1);
+		#else
+			Buffer ascii = string_saveToMemory(message, CharacterEncoding::Raw_Latin1, LineEncoding::CrLf, false, true);
+			throw std::runtime_error((char*)ascii.getUnsafe());
+		#endif
 	} else {
-		message.toStream(std::cout);
+		printMutex.lock();
+			toStream(std::cout, message);
+		printMutex.unlock();
 	}
 };
 
@@ -990,9 +891,6 @@ static std::function<void(const ReadableString &message, MessageType type)> glob
 
 void dsr::string_sendMessage(const ReadableString &message, MessageType type) {
 	globalMessageAction(message, type);
-	if (type == MessageType::Error) {
-		throw std::runtime_error("The message handler provided using string_assignMessageHandler did not throw an exception or terminate the program for the given error!\n");
-	}
 }
 
 void dsr::string_sendMessage_default(const ReadableString &message, MessageType type) {
@@ -1008,8 +906,8 @@ void dsr::string_unassignMessageHandler() {
 }
 
 void dsr::string_split_callback(std::function<void(ReadableString separatedText)> action, const ReadableString& source, DsrChar separator, bool removeWhiteSpace) {
-	int64_t sectionStart = 0;
-	for (int64_t i = 0; i < source.length; i++) {
+	intptr_t sectionStart = 0;
+	for (intptr_t i = 0; i < source.view.length; i++) {
 		DsrChar c = source[i];
 		if (c == separator) {
 			ReadableString element = string_exclusiveRange(source, sectionStart, i);
@@ -1021,38 +919,41 @@ void dsr::string_split_callback(std::function<void(ReadableString separatedText)
 			sectionStart = i + 1;
 		}
 	}
-	if (source.length > sectionStart) {
+	if (source.view.length > sectionStart) {
 		if (removeWhiteSpace) {
-			action(string_removeOuterWhiteSpace(string_exclusiveRange(source, sectionStart, source.length)));
+			action(string_removeOuterWhiteSpace(string_exclusiveRange(source, sectionStart, source.view.length)));
 		} else {
-			action(string_exclusiveRange(source, sectionStart, source.length));
+			action(string_exclusiveRange(source, sectionStart, source.view.length));
 		}
 	}
 }
 
+static String createSubString(const Handle<DsrChar> &characters, const Impl_CharacterView &view) {
+	String result;
+	result.characters = characters;
+	result.view = view;
+	return result;
+}
+
 List<String> dsr::string_split(const ReadableString& source, DsrChar separator, bool removeWhiteSpace) {
 	List<String> result;
-	String commonBuffer;
-	if (buffer_exists(source.buffer)) {
+	if (source.view.length > 0) {
 		// Re-use the existing buffer
-		commonBuffer = createSubString_shared(source.readSection, source.length, source.buffer, const_cast<char32_t*>(source.readSection));
-	} else {
-		// Clone the whole input into one allocation to avoid fragmenting the heap with many small allocations
-		commonBuffer = source;
+		String commonBuffer = createSubString(source.characters, source.view);
+		// Source is allocated as String
+		string_split_callback([&result, removeWhiteSpace](String element) {
+			if (removeWhiteSpace) {
+				result.push(string_removeOuterWhiteSpace(element));
+			} else {
+				result.push(element);
+			}
+		}, commonBuffer, separator, removeWhiteSpace);
 	}
-	// Source is allocated as String
-	string_split_callback([&result, removeWhiteSpace](String element) {
-		if (removeWhiteSpace) {
-			result.push(string_removeOuterWhiteSpace(element));
-		} else {
-			result.push(element);
-		}
-	}, commonBuffer, separator, removeWhiteSpace);
 	return result;
 }
 
-int64_t dsr::string_splitCount(const ReadableString& source, DsrChar separator) {
-	int64_t result;
+intptr_t dsr::string_splitCount(const ReadableString& source, DsrChar separator) {
+	intptr_t result;
 	string_split_callback([&result](ReadableString element) {
 		result++;
 	}, source, separator);
@@ -1064,7 +965,7 @@ int64_t dsr::string_toInteger(const ReadableString& source) {
 	bool negated;
 	result = 0;
 	negated = false;
-	for (int64_t i = 0; i < source.length; i++) {
+	for (intptr_t i = 0; i < source.view.length; i++) {
 		DsrChar c = source[i];
 		if (c == '-' || c == '~') {
 			negated = !negated;
@@ -1091,7 +992,7 @@ double dsr::string_toDouble(const ReadableString& source) {
 	negated = false;
 	reachedDecimal = false;
 	digitDivider = 1;
-	for (int64_t i = 0; i < source.length; i++) {
+	for (intptr_t i = 0; i < source.view.length; i++) {
 		DsrChar c = source[i];
 		if (c == '-' || c == '~') {
 			negated = !negated;
@@ -1113,12 +1014,12 @@ double dsr::string_toDouble(const ReadableString& source) {
 	}
 }
 
-int64_t dsr::string_length(const ReadableString& source) {
-	return source.length;
+intptr_t dsr::string_length(const ReadableString& source) {
+	return source.view.length;
 }
 
-int64_t dsr::string_findFirst(const ReadableString& source, DsrChar toFind, int64_t startIndex) {
-	for (int64_t i = startIndex; i < source.length; i++) {
+intptr_t dsr::string_findFirst(const ReadableString& source, DsrChar toFind, intptr_t startIndex) {
+	for (intptr_t i = startIndex; i < source.view.length; i++) {
 		if (source[i] == toFind) {
 			return i;
 		}
@@ -1126,8 +1027,8 @@ int64_t dsr::string_findFirst(const ReadableString& source, DsrChar toFind, int6
 	return -1;
 }
 
-int64_t dsr::string_findLast(const ReadableString& source, DsrChar toFind) {
-	for (int64_t i = source.length - 1; i >= 0; i--) {
+intptr_t dsr::string_findLast(const ReadableString& source, DsrChar toFind) {
+	for (intptr_t i = source.view.length - 1; i >= 0; i--) {
 		if (source[i] == toFind) {
 			return i;
 		}
@@ -1135,33 +1036,33 @@ int64_t dsr::string_findLast(const ReadableString& source, DsrChar toFind) {
 	return -1;
 }
 
-ReadableString dsr::string_exclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t exclusiveEnd) {
+ReadableString dsr::string_exclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t exclusiveEnd) {
 	// Return empty string for each complete miss
-	if (inclusiveStart >= source.length || exclusiveEnd <= 0) { return ReadableString(); }
+	if (inclusiveStart >= source.view.length || exclusiveEnd <= 0) { return ReadableString(); }
 	// Automatically clamping to valid range
 	if (inclusiveStart < 0) { inclusiveStart = 0; }
-	if (exclusiveEnd > source.length) { exclusiveEnd = source.length; }
+	if (exclusiveEnd > source.view.length) { exclusiveEnd = source.view.length; }
 	// Return the overlapping interval
-	return createSubString(&(source.readSection[inclusiveStart]), exclusiveEnd - inclusiveStart, source.buffer);
+	return createSubString(source.characters, Impl_CharacterView(source.view.getUnchecked() + inclusiveStart, exclusiveEnd - inclusiveStart));
 }
 
-ReadableString dsr::string_inclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t inclusiveEnd) {
+ReadableString dsr::string_inclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t inclusiveEnd) {
 	return string_exclusiveRange(source, inclusiveStart, inclusiveEnd + 1);
 }
 
-ReadableString dsr::string_before(const ReadableString& source, int64_t exclusiveEnd) {
+ReadableString dsr::string_before(const ReadableString& source, intptr_t exclusiveEnd) {
 	return string_exclusiveRange(source, 0, exclusiveEnd);
 }
 
-ReadableString dsr::string_until(const ReadableString& source, int64_t inclusiveEnd) {
+ReadableString dsr::string_until(const ReadableString& source, intptr_t inclusiveEnd) {
 	return string_inclusiveRange(source, 0, inclusiveEnd);
 }
 
-ReadableString dsr::string_from(const ReadableString& source, int64_t inclusiveStart) {
-	return string_exclusiveRange(source, inclusiveStart, source.length);
+ReadableString dsr::string_from(const ReadableString& source, intptr_t inclusiveStart) {
+	return string_exclusiveRange(source, inclusiveStart, source.view.length);
 }
 
-ReadableString dsr::string_after(const ReadableString& source, int64_t exclusiveStart) {
+ReadableString dsr::string_after(const ReadableString& source, intptr_t exclusiveStart) {
 	return string_from(source, exclusiveStart + 1);
 }
 
@@ -1197,7 +1098,7 @@ bool dsr::character_isWhiteSpace(DsrChar c) {
 
 // The greedy approach works here, because there's no ambiguity
 bool dsr::string_isInteger(const ReadableString& source, bool allowWhiteSpace) {
-	int64_t readIndex = 0;
+	intptr_t readIndex = 0;
 	if (allowWhiteSpace) {
 		PATTERN_STAR(WhiteSpace);
 	}
@@ -1207,7 +1108,7 @@ bool dsr::string_isInteger(const ReadableString& source, bool allowWhiteSpace) {
 	if (allowWhiteSpace) {
 		PATTERN_STAR(WhiteSpace);
 	}
-	return readIndex == source.length;
+	return readIndex == source.view.length;
 }
 
 // To avoid consuming the all digits on Digit* before reaching Digit+ when there is no decimal, whole integers are judged by string_isInteger
@@ -1217,7 +1118,7 @@ bool dsr::string_isDouble(const ReadableString& source, bool allowWhiteSpace) {
 		// No decimal detected
 		return string_isInteger(source, allowWhiteSpace);
 	} else {
-		int64_t readIndex = 0;
+		intptr_t readIndex = 0;
 		if (allowWhiteSpace) {
 			PATTERN_STAR(WhiteSpace);
 		}
@@ -1233,10 +1134,10 @@ bool dsr::string_isDouble(const ReadableString& source, bool allowWhiteSpace) {
 		if (allowWhiteSpace) {
 			PATTERN_STAR(WhiteSpace);
 		}
-		return readIndex == source.length;
+		return readIndex == source.view.length;
 	}
 }
 
-int64_t dsr::string_getBufferUseCount(const ReadableString& text) {
-	return text.buffer.use_count();
+uintptr_t dsr::string_getBufferUseCount(const ReadableString& text) {
+	return text.characters.getUseCount();
 }

+ 209 - 96
Source/DFPSR/api/stringAPI.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2020 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,15 +25,13 @@
 #define DFPSR_API_STRING
 
 #include <cstdint>
-#include <iostream>
-#include <sstream>
-#include <string>
 #include <functional>
 #include "bufferAPI.h"
+#include "../base/SafePointer.h"
 #include "../collection/List.h"
 
-// Define DFPSR_INTERNAL_ACCESS before any include to get internal access to exposed types
-#ifdef DFPSR_INTERNAL_ACCESS
+// Define DSR_INTERNAL_ACCESS before any include to get internal access to exposed types
+#ifdef DSR_INTERNAL_ACCESS
 	#define IMPL_ACCESS public
 #else
 	#define IMPL_ACCESS protected
@@ -61,47 +59,71 @@ enum class LineEncoding {
 
 class String;
 
-// Replacing String with a ReadableString reference for input arguments can make passing of U"" literals faster.
-//   Unlike String, it cannot be constructed from a "" literal,
-//   because it's not allowed to create a new allocation for the UTF-32 conversion.
+// Helper type for strings.
+struct Impl_CharacterView {
+	DsrChar *data = nullptr;
+	intptr_t length = 0;
+	Impl_CharacterView() {}
+	Impl_CharacterView(Handle<DsrChar> characters)
+	: data(characters.getUnsafe()), length(characters.getElementCount()) {}
+	Impl_CharacterView(const DsrChar *data, intptr_t length)
+	: data(const_cast<DsrChar *>(data)), length(length) {
+		if (data == nullptr) this->length = 0;
+	}
+	inline DsrChar *getUnchecked() const {
+		return const_cast<DsrChar*>(this->data);
+	}
+	inline DsrChar operator [] (intptr_t index) const {
+		if (index < 0 || index >= this->length) {
+			return U'\0';
+		} else {
+			return this->data[index];
+		}
+	}
+	inline void writeCharacter(intptr_t index, DsrChar character) {
+		if (index < 0 || index >= this->length) {
+			// TODO: Throw an error without causing bottomless recursion.
+		} else {
+			this->data[index] = character;
+		}
+	}
+	inline SafePointer<DsrChar> getSafe(const char *name) const {
+		return SafePointer<DsrChar>(name, this->getUnchecked(), this->length * sizeof(DsrChar));
+	}
+};
+
+// Replacing String with a ReadableString reference for input arguments can make passing of U"" literals faster,
+//   because String is not allowed to assume anything about how long the literal will be available.
+// Unlike String, it cannot be constructed from a "" literal, because it is not allowed to heap allocate new memory
+//   for the conversion, only hold existing buffers alive with reference counting when casted from String.
 class ReadableString {
 IMPL_ACCESS:
 	// A reference counted pointer to the buffer to allow passing strings around without having to clone the buffer each time
 	// ReadableString only uses it for reference counting but String use it for reallocating
-	Buffer buffer;
-	const char32_t* readSection = nullptr;
-	int64_t length = 0;
+	Handle<DsrChar> characters;
+	// Pointing to a subset of the buffer or memory that is not shared.
+	Impl_CharacterView view;
+	// TODO: Merge the pointer and length into a new View type for unified bound checks. Then remove the writer pointer.
+	//SafePointer<const DsrChar> reader;
+	//intptr_t length = 0;
 public:
+	// TODO: Inline the [] operator for faster reading of characters.
+	//       Use the padded read internally, because the old version was hard-coded for buffers padded to default alignment.
 	// Returning the character by value prevents writing to memory that might be a constant literal or shared with other strings
-	DsrChar operator[] (int64_t index) const;
+	inline DsrChar operator[] (intptr_t index) const {
+		return this->view[index];
+	}
 public:
 	// Empty string U""
-	ReadableString();
+	ReadableString() {}
 	// Implicit casting from U"text"
 	ReadableString(const DsrChar *content);
+	ReadableString(Handle<DsrChar> characters, Impl_CharacterView view)
+	: characters(characters), view(view) {}
 	// Create from String by sharing the buffer
 	ReadableString(const String& source);
 	// Destructor
-	virtual ~ReadableString();
-public:
-	// Converting to unknown character encoding using only the ascii character subset
-	virtual std::ostream& toStream(std::ostream& out) const;
-	virtual std::string toStdString() const;
-};
-
-// Used as format tags around numbers passed to string_append or string_combine
-// New types can implement printing to String by making wrappers from this class
-class Printable {
-public:
-	// The method for appending the printable object into the target string
-	virtual String& toStreamIndented(String& target, const ReadableString& indentation) const = 0;
-	String& toStream(String& target) const;
-	String toStringIndented(const ReadableString& indentation) const;
-	String toString() const;
-	std::ostream& toStreamIndented(std::ostream& out, const ReadableString& indentation) const;
-	std::ostream& toStream(std::ostream& out) const;
-	std::string toStdString() const;
-	virtual ~Printable();
+	~ReadableString() {} // Do not override the non-virtual destructor.
 };
 
 // A safe and simple string type
@@ -111,58 +133,73 @@ public:
 //     Endianness is native
 //     No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
 class String : public ReadableString {
-IMPL_ACCESS:
-	// Same as readSection, but with write access for appending more text
-	char32_t* writeSection = nullptr;
+//IMPL_ACCESS:
+	// TODO: Have a single pointer to the data in ReadableString and let the API be responsible for type safety.
+	//SafePointer<DsrChar> writer;
 public:
 	// Constructors
 	String();
 	String(const char* source);
-	String(const char32_t* source);
-	String(const std::string& source);
+	String(const DsrChar* source);
 	String(const ReadableString& source);
 	String(const String& source);
 };
 
-// Define this overload for non-virtual source types that cannot inherit from Printable
-String& string_toStreamIndented(String& target, const Printable& source, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const char* value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const char32_t* value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const float& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const double& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const int64_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const uint64_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const int32_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const uint32_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const int16_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const uint16_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const int8_t& value, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const uint8_t& value, const ReadableString& indentation);
-
-// Templates reused for all types
-// The source must inherit from Printable or have its own string_toStreamIndented overload
-template<typename T>
-String& string_toStream(String& target, const T& source) {
-	return string_toStreamIndented(target, source, U"");
+// Used as format tags around numbers passed to string_append or string_combine
+// New types can implement printing to String by making wrappers from this class
+class Printable {
+public:
+	// The method for appending the printable object into the target string
+	virtual String& toStreamIndented(String& target, const ReadableString& indentation) const = 0;
+	String& toStream(String& target) const;
+	String toStringIndented(const ReadableString& indentation) const;
+	String toString() const;
+	virtual ~Printable();
+};
+
+String& string_toStreamIndented(String& target, const char *value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const DsrChar *value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const ReadableString &value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const double &value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const int64_t &value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const uint64_t &value, const ReadableString& indentation);
+inline String& string_toStreamIndented(String& target, const float &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (double)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const int32_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (int64_t)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const int16_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (int64_t)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const int8_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (int64_t)value, indentation);
 }
+inline String& string_toStreamIndented(String& target, const uint32_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (uint64_t)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const uint16_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (uint64_t)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const uint8_t &value, const ReadableString& indentation) {
+	return string_toStreamIndented(target, (uint64_t)value, indentation);
+}
+inline String& string_toStreamIndented(String& target, const Printable& value, const ReadableString& indentation) {
+	return value.toStreamIndented(target, indentation);
+}
+
 template<typename T>
 String string_toStringIndented(const T& source, const ReadableString& indentation) {
 	String result;
 	string_toStreamIndented(result, source, indentation);
 	return result;
 }
+
 template<typename T>
 String string_toString(const T& source) {
-	return string_toStringIndented(source, U"");
-}
-template<typename T>
-std::ostream& string_toStreamIndented(std::ostream& target, const T& source, const ReadableString& indentation) {
-	return target << string_toStringIndented(source, indentation);
-}
-template<typename T>
-std::ostream& string_toStream(std::ostream& target, const T& source) {
-	return target << string_toString(source);
+	String result;
+	string_toStreamIndented(result, source, U"");
+	return result;
 }
 
 
@@ -175,37 +212,37 @@ std::ostream& string_toStream(std::ostream& target, const T& source) {
 void string_clear(String& target);
 // Post-condition: Returns the length of source.
 //   Example: string_length(U"ABC") == 3
-int64_t string_length(const ReadableString& source);
+intptr_t string_length(const ReadableString& source);
 // Post-condition: Returns the base-zero index of source's first occurence of toFind, starting from startIndex. Returns -1 if not found.
 //   Example: string_findFirst(U"ABCABCABC", U'A') == 0
 //   Example: string_findFirst(U"ABCABCABC", U'B') == 1
 //   Example: string_findFirst(U"ABCABCABC", U'C') == 2
 //   Example: string_findFirst(U"ABCABCABC", U'D') == -1
-int64_t string_findFirst(const ReadableString& source, DsrChar toFind, int64_t startIndex = 0);
+intptr_t string_findFirst(const ReadableString& source, DsrChar toFind, intptr_t startIndex = 0);
 // Post-condition: Returns the base-zero index of source's last occurence of toFind.  Returns -1 if not found.
 //   Example: string_findLast(U"ABCABCABC", U'A') == 6
 //   Example: string_findLast(U"ABCABCABC", U'B') == 7
 //   Example: string_findLast(U"ABCABCABC", U'C') == 8
 //   Example: string_findLast(U"ABCABCABC", U'D') == -1
-int64_t string_findLast(const ReadableString& source, DsrChar toFind);
+intptr_t string_findLast(const ReadableString& source, DsrChar toFind);
 // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to before the character at exclusiveEnd
 //   Example: string_exclusiveRange(U"0123456789", 2, 4) == U"23"
-ReadableString string_exclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t exclusiveEnd);
+ReadableString string_exclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t exclusiveEnd);
 // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to after the character at inclusiveEnd
 //   Example: string_inclusiveRange(U"0123456789", 2, 4) == U"234"
-ReadableString string_inclusiveRange(const ReadableString& source, int64_t inclusiveStart, int64_t inclusiveEnd);
+ReadableString string_inclusiveRange(const ReadableString& source, intptr_t inclusiveStart, intptr_t inclusiveEnd);
 // Post-condition: Returns a sub-string of source from the start to before the character at exclusiveEnd
 //   Example: string_before(U"0123456789", 5) == U"01234"
-ReadableString string_before(const ReadableString& source, int64_t exclusiveEnd);
+ReadableString string_before(const ReadableString& source, intptr_t exclusiveEnd);
 // Post-condition: Returns a sub-string of source from the start to after the character at inclusiveEnd
 //   Example: string_until(U"0123456789", 5) == U"012345"
-ReadableString string_until(const ReadableString& source, int64_t inclusiveEnd);
+ReadableString string_until(const ReadableString& source, intptr_t inclusiveEnd);
 // Post-condition: Returns a sub-string of source from before the character at inclusiveStart to the end
 //   Example: string_from(U"0123456789", 5) == U"56789"
-ReadableString string_from(const ReadableString& source, int64_t inclusiveStart);
+ReadableString string_from(const ReadableString& source, intptr_t inclusiveStart);
 // Post-condition: Returns a sub-string of source from after the character at exclusiveStart to the end
 //   Example: string_after(U"0123456789", 5) == U"6789"
-ReadableString string_after(const ReadableString& source, int64_t exclusiveStart);
+ReadableString string_after(const ReadableString& source, intptr_t exclusiveStart);
 
 // Split source into a list of strings.
 // Post-condition:
@@ -226,7 +263,7 @@ inline void string_split_callback(const ReadableString& source, DsrChar separato
 }
 // Split source using separator, only to return the number of splits.
 // Useful for pre-allocation.
-int64_t string_splitCount(const ReadableString& source, DsrChar separator);
+intptr_t string_splitCount(const ReadableString& source, DsrChar separator);
 
 // Post-condition: Returns true iff c is a digit.
 //   Digit <- '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
@@ -259,10 +296,27 @@ bool string_isDouble(const ReadableString& source, bool allowWhiteSpace = true);
 // The result is signed, because the input might unexpectedly have a negation sign.
 // The result is large, so that one can easily check the range before assigning to a smaller integer type.
 int64_t string_toInteger(const ReadableString& source);
+// Side-effect: Appends value as a base ten integer at the end of target.
+void string_fromUnsigned(String& target, uint64_t value);
+// Post-condition: Returns value written as a base ten integer.
+inline String string_fromUnsigned(int64_t value) {
+	String result; string_fromUnsigned(result, value); return result;
+}
+// Side-effect: Appends value as a base ten integer at the end of target.
+void string_fromSigned(String& target, int64_t value, DsrChar negationCharacter = U'-');
+// Post-condition: Returns value written as a base ten integer.
+inline String string_fromSigned(int64_t value, DsrChar negationCharacter = U'-') {
+	String result; string_fromSigned(result, value, negationCharacter); return result;
+}
 // Pre-condition: source must be a valid double according to string_isDouble. Otherwise unexpected characters are simply ignored.
 // Post-condition: Returns the double precision floating-point representation of source.
 double string_toDouble(const ReadableString& source);
-
+// Side-effect: Appends value as a base ten decimal number at the end of target.
+void string_fromDouble(String& target, double value, int decimalCount = 6, bool removeTrailingZeroes = true, DsrChar decimalCharacter = U'.', DsrChar negationCharacter = U'-');
+// Post-condition: Returns value written as a base ten decimal number.
+inline String string_fromDouble(double value, int decimalCount = 6, bool removeTrailingZeroes = true, DsrChar decimalCharacter = U'.', DsrChar negationCharacter = U'-') {
+	String result; string_fromDouble(result, value, decimalCount, removeTrailingZeroes, decimalCharacter, negationCharacter); return result;
+}
 // Loading will try to find a byte order mark and can handle UTF-8 and UTF-16.
 //   Failure to find a byte order mark will assume that the file's content is raw Latin-1,
 //   because automatic detection would cause random behaviour.
@@ -330,10 +384,10 @@ String string_mangleQuote(const ReadableString &rawText);
 String string_unmangleQuote(const ReadableString& mangledText);
 
 // Post-condition: Returns the number of strings using the same buffer, including itself.
-int64_t string_getBufferUseCount(const ReadableString& text);
+uintptr_t string_getBufferUseCount(const ReadableString& text);
 
 // Ensures safely that at least minimumLength characters can he held in the buffer
-void string_reserve(String& target, int64_t minimumLength);
+void string_reserve(String& target, intptr_t minimumLength);
 
 // Append/push one character (to avoid integer to string conversion)
 void string_appendChar(String& target, DsrChar value);
@@ -341,12 +395,12 @@ void string_appendChar(String& target, DsrChar value);
 // Append one element
 template<typename TYPE>
 inline void string_append(String& target, const TYPE &value) {
-	string_toStream(target, value);
+	string_toStreamIndented(target, value, U"");
 }
 // Append multiple elements
 template<typename HEAD, typename... TAIL>
 inline void string_append(String& target, HEAD head, TAIL&&... tail) {
-	string_append(target, head);
+	string_toStreamIndented(target, head, U"");
 	string_append(target, tail...);
 }
 // Combine a number of strings, characters and numbers
@@ -364,11 +418,11 @@ inline String string_combine(ARGS&&... args) {
 
 // Operations
 inline String operator+ (const ReadableString& a, const ReadableString& b) { return string_combine(a, b); }
-inline String operator+ (const char32_t* a, const ReadableString& b) { return string_combine(a, b); }
-inline String operator+ (const ReadableString& a, const char32_t* b) { return string_combine(a, b); }
+inline String operator+ (const DsrChar* a, const ReadableString& b) { return string_combine(a, b); }
+inline String operator+ (const ReadableString& a, const DsrChar* b) { return string_combine(a, b); }
 inline String operator+ (const String& a, const String& b) { return string_combine(a, b); }
-inline String operator+ (const char32_t* a, const String& b) { return string_combine(a, b); }
-inline String operator+ (const String& a, const char32_t* b) { return string_combine(a, b); }
+inline String operator+ (const DsrChar* a, const String& b) { return string_combine(a, b); }
+inline String operator+ (const String& a, const DsrChar* b) { return string_combine(a, b); }
 inline String operator+ (const String& a, const ReadableString& b) { return string_combine(a, b); }
 inline String operator+ (const ReadableString& a, const String& b) { return string_combine(a, b); }
 
@@ -383,6 +437,11 @@ enum class MessageType {
 	DebugPrinting // Print debug information to the terminal, if debug mode is active.
 };
 
+// Get a reference to the thread-local buffer used for printing messages.
+//   Can be combined with string_clear, string_append and string_sendMessage to send long messages in a thread-safe way.
+//   Clear, fill and send.
+String &string_getPrintBuffer();
+
 // Send a message
 void string_sendMessage(const ReadableString &message, MessageType type);
 // Send a message directly to the default message handler, ignoring string_assignMessageHandler.
@@ -403,22 +462,28 @@ void string_unassignMessageHandler();
 // Throw an error, which must terminate the application or throw an error
 template<typename... ARGS>
 void throwError(ARGS... args) {
-	String result = string_combine(args...);
-	string_sendMessage(result, MessageType::Error);
+	String *target = &(string_getPrintBuffer());
+	string_clear(*target);
+	string_append(*target, args...);
+	string_sendMessage(*target, MessageType::Error);
 }
 
 // Send a warning, which might throw an exception, terminate the application or anything else that the application requests using string_handleMessages
 template<typename... ARGS>
 void sendWarning(ARGS... args) {
-	String result = string_combine(args...);
-	string_sendMessage(result, MessageType::Warning);
+	String *target = &(string_getPrintBuffer());
+	string_clear(*target);
+	string_append(*target, args...);
+	string_sendMessage(*target, MessageType::Warning);
 }
 
 // Print information to the terminal or something else listening for messages using string_handleMessages
 template<typename... ARGS>
 void printText(ARGS... args) {
-	String result = string_combine(args...);
-	string_sendMessage(result, MessageType::StandardPrinting);
+	String *target = &(string_getPrintBuffer());
+	string_clear(*target);
+	string_append(*target, args...);
+	string_sendMessage(*target, MessageType::StandardPrinting);
 }
 
 // Debug messages are automatically disabled in release mode, so that you don't have to worry about accidentally releasing a program with poor performance from constantly printing to the terminal
@@ -432,11 +497,59 @@ void printText(ARGS... args) {
 	// Print debugText in debug mode
 	template<typename... ARGS>
 	void debugText(ARGS... args) {
-		String result = string_combine(args...);
-		string_sendMessage(result, MessageType::DebugPrinting);
+	String *target = &(string_getPrintBuffer());
+		string_clear(*target);
+		string_append(*target, args...);
+		string_sendMessage(*target, MessageType::DebugPrinting);
 	}
 #endif
 
+// Used to generate fixed size ascii strings, which is useful when heap allocations are not possible
+//   or you need a safe format until you know which encoding a system call needs to support Unicode.
+template <intptr_t SIZE>
+struct FixedAscii {
+	char characters[SIZE];
+	// Create a fixed size ascii string from a null terminated ascii string.
+	// Crops if text is too long.
+	FixedAscii(const char *text) {
+		bool terminated = false;
+		for (intptr_t i = 0; i < SIZE - 1; i++) {
+			char c = text[i];
+			if (c == '\0') {
+				terminated = true;
+			}
+			if (terminated) {
+				this->characters[i] = '\0';
+			} else if (c > 127) {
+				this->characters[i] = '?';
+			} else {
+				this->characters[i] = c;
+			}
+		}
+		this->characters[SIZE - 1] = '\0';
+	}
+	FixedAscii(const ReadableString &text) {
+		bool terminated = false;
+		for (intptr_t i = 0; i < SIZE - 1; i++) {
+			char c = text[i];
+			if (c == '\0') {
+				terminated = true;
+			}
+			if (terminated) {
+				this->characters[i] = '\0';
+			} else if (c > 127) {
+				this->characters[i] = '?';
+			} else {
+				this->characters[i] = c;
+			}
+		}
+		this->characters[SIZE - 1] = '\0';
+	}
+	operator const char *() const {
+		return characters;
+	}
+};
+
 }
 
 #endif

+ 126 - 0
Source/DFPSR/api/textureAPI.cpp

@@ -0,0 +1,126 @@
+
+// zlib open source license
+//
+// Copyright (c) 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "textureAPI.h"
+#include "imageAPI.h"
+#include "filterAPI.h"
+			#include "drawAPI.h"
+
+namespace dsr {
+
+static int findLog2Size(uint32_t size) {
+	static const uint32_t maxLog2Size = 15; // 32768 pixels
+	for (uint32_t log2Size = 0; log2Size < maxLog2Size; log2Size++) {
+		if ((uint32_t(1u) << log2Size) >= size) {
+			// Found a size that is large enough.
+			return log2Size;
+		}
+	}
+	// Reached the upper limit.
+	return maxLog2Size;
+}
+
+// TODO: Optimize using addition and SafePointer.
+static void downsample(const TextureRgbaU8 &texture, uint32_t targetLevel) {
+	uint32_t sourceLevel = targetLevel - 1;
+	uint32_t targetWidth = texture_getWidth(texture, targetLevel);
+	uint32_t targetHeight = texture_getHeight(texture, targetLevel);
+	for (uint32_t y = 0; y < targetHeight; y++) {
+		for (uint32_t x = 0; x < targetWidth; x++) {
+			uint32_t upperLeft  = texture_readPixel(texture, x * 2    , y * 2    , sourceLevel);
+			uint32_t upperRight = texture_readPixel(texture, x * 2 + 1, y * 2    , sourceLevel);
+			uint32_t lowerLeft  = texture_readPixel(texture, x * 2    , y * 2 + 1, sourceLevel);
+			uint32_t lowerRight = texture_readPixel(texture, x * 2 + 1, y * 2 + 1, sourceLevel);
+			uint32_t mixedColor = packOrder_packBytes(
+			  (packOrder_getRed  (upperLeft) + packOrder_getRed  (upperRight) + packOrder_getRed  (lowerLeft) + packOrder_getRed  (lowerRight)) / 4,
+			  (packOrder_getGreen(upperLeft) + packOrder_getGreen(upperRight) + packOrder_getGreen(lowerLeft) + packOrder_getGreen(lowerRight)) / 4,
+			  (packOrder_getBlue (upperLeft) + packOrder_getBlue (upperRight) + packOrder_getBlue (lowerLeft) + packOrder_getBlue (lowerRight)) / 4,
+			  (packOrder_getAlpha(upperLeft) + packOrder_getAlpha(upperRight) + packOrder_getAlpha(lowerLeft) + packOrder_getAlpha(lowerRight)) / 4
+			);
+			texture_writePixel(texture, x, y, targetLevel, mixedColor);
+		}
+	}
+}
+
+TextureRgbaU8 texture_create_RgbaU8(int32_t width, int32_t height, int32_t resolutions) {
+	if (resolutions < 1) {
+		throwError(U"Tried to create a texture without any resolutions stored, which would be empty!\n");
+		return TextureRgbaU8();
+	} else if (width < 1 || height < 1) {
+		throwError(U"Tried to create a texture of ", width, U" x ", height, U" pixels, which would be empty!\n");
+		return TextureRgbaU8();
+	} else if (width > 32768 || height > 32768) {
+		throwError(U"Tried to create a texture of ", width, U" x ", height, U" pixels, which exceeds the maximum texture dimensions of 32768 x 32768 pixels!\n");
+		return TextureRgbaU8();
+	} else {
+		return TextureRgbaU8(findLog2Size(width), findLog2Size(height), resolutions - 1);
+	}
+}
+
+static uint64_t testCounter = 0;
+
+void texture_generatePyramid(const TextureRgbaU8& texture) {
+	uint32_t mipLevelCount = texture_getMipLevelCount(texture);
+	for (uint32_t targetLevel = 1; targetLevel < mipLevelCount; targetLevel++) {
+		downsample(texture, targetLevel);
+	}
+}
+
+TextureRgbaU8 texture_create_RgbaU8(const ImageRgbaU8& image, int32_t resolutions) {
+	if (!image_exists(image)) {
+		// An empty image returns an empty pyramid.
+		return TextureRgbaU8();
+	} else {
+		// Allocate a pyramid image.
+		TextureRgbaU8 result = texture_create_RgbaU8(image_getWidth(image), image_getHeight(image), resolutions);
+		uint32_t width = texture_getMaxWidth(result);
+		uint32_t height = texture_getMaxHeight(result);
+		// Create an image of the same size as the largest resolution.
+		OrderedImageRgbaU8 resized = filter_resize(image, Sampler::Linear, width, height);
+		testCounter++;
+		// Copy from the resized image to the highest resolution in the pyramid.
+		for (uint32_t y = 0; y < height; y++) {
+			SafePointer<uint32_t> source = image_getSafePointer(resized, y);
+			SafePointer<uint32_t> target = texture_getSafePointer(result, 0u, y);
+			safeMemoryCopy(target, source, width * sizeof(uint32_t));
+		}
+		texture_generatePyramid(result);
+		return result;
+	}
+}
+
+ImageRgbaU8 texture_getMipLevelImage(const TextureRgbaU8& texture, int32_t mipLevel) {
+	if (!texture_exists(texture)) {
+		throwError(U"Can not get a mip level as an image from a texture that does not exist!\n");
+		return ImageRgbaU8();
+	} else if (mipLevel < 0 || mipLevel > texture_getSmallestMipLevel(texture)) {
+		throwError(U"Can not get a non-existing mip level at index ", mipLevel, U" from a texture with layers 0..", texture_getSmallestMipLevel(texture), U"!\n");
+		throwError(U"");
+		return ImageRgbaU8();
+	} else {
+		return ImageRgbaU8(texture.impl_buffer, texture_getPixelOffsetToLayer<false, uint32_t>(texture, mipLevel), texture_getWidth(texture, mipLevel), texture_getHeight(texture, mipLevel), texture_getWidth(texture, mipLevel), PackOrderIndex::RGBA);
+	}
+}
+
+}

+ 530 - 0
Source/DFPSR/api/textureAPI.h

@@ -0,0 +1,530 @@
+
+// zlib open source license
+//
+// Copyright (c) 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// Everything stored directly in the image types is immutable to allow value types to behave like reference types using the data that they point to.
+// Image types can not be dynamically casted, because the inheritance is entirely static without any virtual functions.
+
+// TODO: Create a fast way to generate masks from an exponential scale floating mip level taken from sampling distances.
+// float samplingDistance (input expressed as some kind of distance in the uv coordinates between two adjacent pixels)
+// uint32_t tileXYMask (tiling should be applied to X and Y using the same mask after limiting to 16 bit integers)
+// uint32_t maxLevelMask
+// So how do we get the weights without shifting bits by the actual bit offset?
+//   Maybe add one to the mask to get a single bit and then multiply.
+/*
+	TODO: Try to handle negative texture coordinates and let positive UV be an optimization flag to enable when known to be valid.
+	      Convert to int32_t with less range and convert to unsigned correctly in modulo of 24 bits.
+
+	// Use leading zeroes to create a mask, which can be turned into a power of two by adding one.
+	// 0001000000000000 -> 0001111111111111
+	// 0001011001000100 -> 0001111111111111
+	// 0001111111111111 -> 0001111111111111
+	// 0000010000000000 -> 0000011111111111
+	// 0000010110010001 -> 0000011111111111
+	// 0000011111111111 -> 0000011111111111
+	// 0000000000100000 -> 0000000000111111
+	// 0000000000101100 -> 0000000000111111
+	// 0000000000111111 -> 0000000000111111
+	uint16_t maskFromLeadingZeroes(uint16_t value) {
+		// Turning 10 into 11
+		uint16_t result = value | (value >> 1);
+		// Turning 1100 into 1111
+		result = result | (result >> 2);
+		// Turning 11110000 into 11111111
+		result = result | (result >> 4);
+		// Turning 1111111100000000 into 1111111111111111
+		result = result | (result >> 8);
+	}
+
+	Generate masks for sampling a specific texture at a specific mip level.
+	  They can then be reused for multiple samples.
+	Pre-condition:
+	  0.0f < samplingDistance
+	  Use min, max, absm et cetera to create a positive sampling distance.
+	void createMasks(float samplingDistance) {
+		uint32_t density = truncateToU32(reciprocal(samplingDistance));
+		// Intel SSE2 does not have dynamic offset bit shifts, because it can only shift by constant bit offsets or dynamic byte offsets.
+		// SSE2, AVX2 and NEON have low 16-bit unsigned multiplication.
+		//   _mm_mullo_epi16, _mm256_mullo_epi16 and vmulq_u16
+		//   Using lower bits might however not be enough and might take more time than simply shifting with scalar operations.
+		//   Then we might as well use SIMD comparisons and make bit masks the way to implement it on all platforms.
+		//     Because returning 1 can be used to return a mask as a fallback.
+		//     And one can also create many overloads for direct selection without the mask in between for future optimization.
+		//   Let textures created from images have 4 mip levels by default, and allow increasing the maximum depth with an optional argument.
+		//     Then make three comparisons to select a mip level.
+		uint16_t mask = maskFromLeadingZeroes(density);
+		// scale is a power of two 16-bit integer used to multiply uv coordinates.
+		//   But SSE2 also does not have 32-bit integer multiplication, so stay in 16 bits or use bit shifts!
+		//   Split into whole pixels and weights before the multiplication somehow.
+		uint16_t scale = mask + 1;
+		// Cast directly to uint16_t with saturation.
+		tileXMask = texture.minimumWidth * scale;
+		tileYMask = texture.minimumHeight * scale;
+		startOffset = texture.startOffsetMask * scale * scale;
+	}
+*/
+
+#ifndef DFPSR_API_TEXTURE
+#define DFPSR_API_TEXTURE
+
+#include "../image/Texture.h"
+#include "../image/Image.h"
+#ifndef NDEBUG
+	#include "../api/stringAPI.h"
+#endif
+#include "../base/DsrTraits.h"
+
+namespace dsr {
+	// Post-condition: Returns true iff texture exists.
+	inline bool texture_exists(const Texture &texture) { return texture.impl_buffer.isNotNull(); }
+
+	// Post-condition: Returns the width in pixels for the highest resolution at mip level 0.
+	inline int32_t texture_getMaxWidth(const Texture &texture) { return int32_t(1) << texture.impl_log2width; }
+
+	// Post-condition: Returns the width in pixels for the resolution at mipLevel.
+	inline int32_t texture_getWidth(const Texture &texture, uint32_t mipLevel) { return int32_t(1) << (texture.impl_log2width - mipLevel); }
+
+	// Post-condition: Returns the height in pixels for the highest resolution at mip level 0.
+	inline int32_t texture_getMaxHeight(const Texture &texture) { return int32_t(1) << texture.impl_log2height; }
+
+	// Post-condition: Returns the height in pixels for the resolution at mipLevel.
+	inline int32_t texture_getHeight(const Texture &texture, uint32_t mipLevel) { return int32_t(1) << (texture.impl_log2height - mipLevel); }
+
+	// Get the maximum mip level, with zero overhead.
+	// Post-condition: Returns an index to the highest mip level.
+	inline int32_t texture_getSmallestMipLevel(const TextureRgbaU8& texture) { return texture.impl_maxMipLevel; }
+
+	// Get the number of mip levels, or zero if the texture does not exist.
+	//   Useful for looping over all mip levels in a texture, by automatically skipping texture with no mip levels.
+	// Post-condition: Returns the number of mip levels.
+	inline int32_t texture_getMipLevelCount(const TextureRgbaU8& texture) { return texture_exists(texture) ? texture.impl_maxMipLevel + 1 : 0; }
+
+	// Post-condition: Returns true iff texture has more than one mip level, so that updating the highest resolution needs to update lower layers.
+	inline bool texture_hasPyramid(const Texture &texture) { return texture.impl_maxMipLevel != 0; }
+
+	// Side-effect: Update all lower resolutions from the highest resolution using a basic linear average.
+	void texture_generatePyramid(const TextureRgbaU8& texture);
+
+	// mipLevel starts from 0 at the highest resolution and ends with the lowest resolution.
+	// Pre-condition:
+	//   0 <= mipLevel <= 15
+	// Post-condition:
+	//   Returns the number of pixels from lower resolutions before the start of mipLevel.
+	//   Always returns 0 when there is only one mip level available.
+	template<
+	  bool HIGHEST_RESOLUTION = false,
+	  typename U, // uint32_t, U32x4, U32x8, U32xX
+	  DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
+	inline U texture_getPixelOffsetToLayer(const TextureRgbaU8 &texture, U mipLevel) {
+		if (HIGHEST_RESOLUTION) {
+			return U(texture.impl_startOffset);
+		} else {
+			return U(texture.impl_startOffset) & (U(texture.impl_maxLevelMask) >> bitShiftLeftImmediate<1>(mipLevel));
+		}
+	}
+
+	// mipLevel starts from 0 at the highest resolution and ends with the lowest resolution.
+	// Optimization arguments:
+	//   * SQUARE can be set to true when you know in compile time that texture has the same width and height.
+	//   * SINGLE_LAYER can be set to true when you know in compile time that there will only be a single resolution in the texture.
+	//   * XY_INSIDE can be set to true if you know that the pixel coordinates will always be within texture bounds (0 <= x < width, 0 <= y < height) without tiling.
+	//   * MIP_INSIDE can be set to true if you know that the mip level will always be within used indices (mipLevel <= texture_getSmallestMipLevel(texture)) without clamping.
+	//     Either way, mipLevel must always be within the 0..15 range, because dynamic bit shifting might truncate offsets that are too big.
+	//   * HIGHEST_RESOLUTION can be set to true if you want to ignore mipLevel and always sample the highest resolution at mipLevel 0.
+	// Pre-condition:
+	//   mipLevel <= 15
+	// Post-condition:
+	//   Returns the number of pixels before the pixel at (x, y) in mipLevel.
+	template<
+	  bool SQUARE = false,             // Width and height must be the same.
+	  bool SINGLE_LAYER = false,       // Demanding that the texture only has a single layer.
+	  bool XY_INSIDE = false,          // No pixels may be sampled outside.
+	  bool MIP_INSIDE = false,         // Mip level may not go outside of existing layer indices.
+	  bool HIGHEST_RESOLUTION = false, // Ignoring any lower layers.
+	  typename U, // uint32_t, U32x4, U32x8, U32xX
+	  DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
+	inline U texture_getPixelOffset(const TextureRgbaU8 &texture, U x, U y, U mipLevel) {
+		// TODO: Reuse the tile masks when sampling a whole neighborhood for bi-linear sampling.
+		// Clamp the mip-level using bitwise operations in a logarithmic scale, by masking out excess bits with zeroes and filling missing bits with ones.
+		U tileMaskX = U(texture.impl_maxWidthAndMask );
+		U tileMaskY = U(texture.impl_maxHeightAndMask);
+		if (!HIGHEST_RESOLUTION) {
+			tileMaskX = tileMaskX >> mipLevel;
+			tileMaskY = tileMaskY >> mipLevel;
+		}
+		if (!MIP_INSIDE) {
+			// If the mip level index might be higher than what is used in the texture, make sure that the tile masks have at least enough bits for the lowest texture resolution.
+			tileMaskX = tileMaskX | texture.impl_minWidthOrMask;
+			if (!SQUARE) {
+				tileMaskY = tileMaskY | texture.impl_minHeightOrMask;
+			}
+		}
+		U log2PixelStride = U(texture.impl_log2width);
+		if (!HIGHEST_RESOLUTION) {
+			log2PixelStride = log2PixelStride - mipLevel;
+		}
+		if (!XY_INSIDE) {
+			x = x & tileMaskX;
+			if (SQUARE) {
+				// Apply the same mask to both for square images, so that the other mask can be optimized away.
+				y = y & tileMaskX;
+			} else {
+				// Apply a separate mask for Y coordinates when the texture might not be square.
+				y = y & tileMaskY;
+			}
+		}
+		U coordinateOffset = ((y << log2PixelStride) | x);
+		#ifndef NDEBUG
+			// In debug mode, wrong use of optimization arguments will throw errors.
+			if (SQUARE && (texture.impl_log2width != texture.impl_log2height)) {
+				throwError(U"texture_getPixelOffset was told that the texture would have square dimensions using SQUARE, but ", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" is not square!\n");
+			}
+			if (SINGLE_LAYER && (texture_getSmallestMipLevel(texture) > 0)) {
+				throwError(U"texture_getPixelOffset was told that the texture would only have a single layer using SINGLE_LAYER, but it has ", texture_getSmallestMipLevel(texture) + 1, U" layers!\n");
+			}
+			if (XY_INSIDE && !(allLanesEqual(x & ~tileMaskX, U(0)) && allLanesEqual(y & ~tileMaskY, U(0)))) {
+				throwError(U"texture_getPixelOffset was told that the pixel coordinates would stay inside using XY_INSIDE, but the coordinate (", x, U", ", y, U") is not within", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" pixels!\n");
+			}
+			if (!HIGHEST_RESOLUTION) {
+				if (!allLanesLesserOrEqual(mipLevel, U(15u))) {
+					throwError(U"texture_getPixelOffset got mip level ", mipLevel, U", which is not within the fixed range of 0..15!\n");
+				}
+				if (MIP_INSIDE) {
+					if (!allLanesLesserOrEqual(mipLevel, U(texture_getSmallestMipLevel(texture)))) {
+						throwError(U"texture_getPixelOffset was told that the mip level would stay within valid indices using MIP_INSIDE, but mip level ", mipLevel, U" is not within 0..", texture_getSmallestMipLevel(texture), U"!\n");
+					}
+				}
+			}
+		#endif
+		if (SINGLE_LAYER) {
+			return coordinateOffset;
+		} else {
+			U startOffset = texture_getPixelOffsetToLayer<HIGHEST_RESOLUTION, U>(texture, mipLevel);
+			return startOffset + coordinateOffset;
+		}
+	}
+
+	template<
+	  bool SQUARE = false,
+	  bool SINGLE_LAYER = false,
+	  bool XY_INSIDE = false,
+	  bool MIP_INSIDE = false,
+	  bool HIGHEST_RESOLUTION = false,
+	  typename U, // uint32_t, U32x4, U32x8, U32xX
+	  DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))>
+	inline U texture_readPixel(const TextureRgbaU8 &texture, U x, U y, U mipLevel) {
+		#ifndef NDEBUG
+			if (!texture_exists(texture)) {
+				throwError(U"Tried to read pixels from a texture that does not exist!\n");
+			}
+			if (!HIGHEST_RESOLUTION) {
+				if (!allLanesLesserOrEqual(mipLevel, U(15u))) {
+					throwError(U"Tried to read pixels from mip level ", mipLevel, U", which is outside of the allowed 4-bit range 0..4!\n");
+				}
+			}
+		#endif
+		SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for pixel reading");
+		return gather_U32(data, texture_getPixelOffset<SQUARE, SINGLE_LAYER, XY_INSIDE, MIP_INSIDE, HIGHEST_RESOLUTION, U>(texture, x, y, mipLevel));
+	}
+
+	// Pre-condition:
+	//   0 <= mipLevel <= 15
+	inline void texture_writePixel(const TextureRgbaU8 &texture, uint32_t x, uint32_t y, uint32_t mipLevel, uint32_t packedColor) {
+		#ifndef NDEBUG
+			if (!texture_exists(texture)) {
+				throwError(U"Tried to write a pixel to a texture that does not exist!\n");
+			}
+			if (mipLevel > 15u) {
+				throwError(U"Tried to write a pixel to mip level ", mipLevel, U", which is outside of the allowed 4-bit range 0..4!\n");
+			}
+		#endif
+		SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for pixel writing");
+		data[texture_getPixelOffset<false, false, false, false, false, uint32_t>(texture, x, y, mipLevel)] = packedColor;
+	}
+
+	// TODO: Use these template arguments in RgbaMultiply.h to improve performance for square textures with at least 4 mip levels and UV coordinates inside of the texture.
+	// TODO: Can EXISTS be an argument to disable when non-existing images should be replaced with U(255u) for fast prototyping?
+	// Sample the nearest pixel in a normalized UV scale where one unit equals one lap around the image.
+	// Pre-condition:
+	//   0.0f <= u, 0.0f <= v
+	//   Negative texture coordinates are not allowed, because they are converted to unsigned integers for bitwise operations.
+	template<
+	  bool SQUARE = false,
+	  bool SINGLE_LAYER = false,
+	  bool MIP_INSIDE = false,
+	  bool HIGHEST_RESOLUTION = false,
+	  typename U, // uint32_t, U32x4, U32x8, U32xX
+	  typename F, // float, F32x4, F32x8, F32xX, F32xF
+	  DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U) && DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F))>
+	inline U texture_sample_nearest(const TextureRgbaU8 &texture, F u, F v, U mipLevel) {
+		U scaleU = U(1u) << U(texture.impl_log2width );
+		U scaleV = U(1u) << U(texture.impl_log2height);
+		if (!HIGHEST_RESOLUTION) {
+			scaleU = scaleU >> mipLevel;
+			scaleV = scaleV >> mipLevel;
+		}
+		U xPixel = truncateToU32(u * floatFromU32(scaleU));
+		U yPixel = truncateToU32(v * floatFromU32(scaleV));
+		return texture_readPixel<SQUARE, SINGLE_LAYER, false, MIP_INSIDE, HIGHEST_RESOLUTION, U>(texture, xPixel, yPixel, mipLevel);
+	}
+
+	// Returns (colorA * weightA + colorB * weightB) / 256 as bytes
+	// weightA and weightB should contain pairs of the same 16-bit weights for each of the 4 pixels in the corresponding A and B colors
+	template <typename U32, typename U16, DSR_ENABLE_IF(
+	  DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32) &&
+	  DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16)
+	)>
+	inline U32 weightColors(const U32 &colorA, const U16 &weightA, const U32 &colorB, const U16 &weightB) {
+		U32 lowMask(0x00FF00FFu);
+		U16 lowColorA = reinterpret_U16FromU32(colorA & lowMask);
+		U16 lowColorB = reinterpret_U16FromU32(colorB & lowMask);
+		U32 highMask(0xFF00FF00u);
+		U16 highColorA = reinterpret_U16FromU32(bitShiftRightImmediate<8>(colorA & highMask));
+		U16 highColorB = reinterpret_U16FromU32(bitShiftRightImmediate<8>(colorB & highMask));
+		U32 lowColor = reinterpret_U32FromU16(((lowColorA * weightA) + (lowColorB * weightB)));
+		U32 highColor = reinterpret_U32FromU16(((highColorA * weightA) + (highColorB * weightB)));
+		return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
+	}
+
+	// The more significant bits must be zero so that the lower bits can fill the space.
+	//   lowBits[x] < 2^16
+	template <typename U32, DSR_ENABLE_IF(
+	  DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32)
+	)>
+	inline auto repeatAs16Bits(const U32 &lowBits) {
+		return reinterpret_U16FromU32(lowBits | bitShiftLeftImmediate<16>(lowBits));
+	}
+
+	// Returns 256 - weight
+	template <typename U16, DSR_ENABLE_IF(
+	  DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16)
+	)>
+	inline U16 invertWeight(const U16 &weight) {
+		return U16(0x01000100u) - weight;
+	}
+
+	/* TODO: Use for anisotropic or tri-linear sampling.
+	template <typename U32, typename U16>
+	inline U32 mix_L(const U32 &colorA, const U32 &colorB, const U32 &weight) {
+		// Get inverse weights
+		U16 weightB = repeatAs16Bits(weight);
+		U16 weightA = invertWeight(weightB);
+		// Multiply
+		return weightColors(colorA, weightA, colorB, weightB);
+	}
+	*/
+
+	template <typename U32, typename U16>
+	inline U32 mix_BL(const U32 &colorA, const U32 &colorB, const U32 &colorC, const U32 &colorD, const U32 &weightX, const U32 &weightY) {
+		// Get inverse weights
+		U16 weightXR = repeatAs16Bits<U32>(weightX);
+		U16 weightYB = repeatAs16Bits<U32>(weightY);
+		U16 weightXL = invertWeight<U16>(weightXR);
+		U16 weightYT = invertWeight<U16>(weightYB);
+		// Multiply
+		return weightColors<U32, U16>(weightColors(colorA, weightXL, colorB, weightXR), weightYT, weightColors(colorC, weightXL, colorD, weightXR), weightYB);
+	}
+
+	template<
+	  bool SQUARE = false,
+	  bool SINGLE_LAYER = false,
+	  bool MIP_INSIDE = false,
+	  bool HIGHEST_RESOLUTION = false,
+	  typename U32, // uint32_t, U32x4, U32x8, U32xX
+	  typename U16, // uint32_t, U32x4, U32x8, U32xX
+	  typename F32, // float, F32x4, F32x8, F32xX, F32xF
+	  DSR_ENABLE_IF(
+	    DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U32) &&
+	    DSR_CHECK_PROPERTY(DsrTrait_Any_U16, U16) &&
+	    DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F32)
+	  )>
+	inline U32 texture_sample_bilinear(const TextureRgbaU8 &texture, F32 u, F32 v, U32 mipLevel) {
+		U32 scaleU = U32(256u) << U32(texture.impl_log2width );
+		U32 scaleV = U32(256u) << U32(texture.impl_log2height);
+		if (!HIGHEST_RESOLUTION) {
+			scaleU = scaleU >> mipLevel;
+			scaleV = scaleV >> mipLevel;
+		}
+		// Convert from the normalized 0..1 scale to a 0..size*256 scale for 8 bits of sub-pixel precision.
+		//   Half a pixel is subtracted so that the seam between bi-linear patches end up at the center of texels.
+		U32 subCenterX = truncateToU32(u * floatFromU32(scaleU)) - U32(128);
+		U32 subCenterY = truncateToU32(v * floatFromU32(scaleV)) - U32(128);
+		// Get the remainders as interpolation weights.
+		U32 weightX = subCenterX & 0xFF;
+		U32 weightY = subCenterY & 0xFF;
+		// Divide and truncate sub-pixel coordinates to get whole pixel coordinates.
+		U32 pixelLeft = bitShiftRightImmediate<8>(subCenterX);
+		U32 pixelTop = bitShiftRightImmediate<8>(subCenterY);
+		U32 pixelRight = pixelLeft + 1;
+		U32 pixelBottom = pixelTop + 1;
+		// Generate pixel tiling masks.
+		U32 tileMaskX = U32(texture.impl_maxWidthAndMask );
+		U32 tileMaskY = U32(texture.impl_maxHeightAndMask);
+		if (!HIGHEST_RESOLUTION) {
+			tileMaskX = tileMaskX >> mipLevel;
+			tileMaskY = tileMaskY >> mipLevel;
+		}
+		if (!MIP_INSIDE) {
+			tileMaskX = tileMaskX | texture.impl_minWidthOrMask;
+			if (!SQUARE) {
+				tileMaskY = tileMaskY | texture.impl_minHeightOrMask;
+			}
+		}
+		// Get the stride.
+		U32 log2PixelStride = U32(texture.impl_log2width);
+		if (!HIGHEST_RESOLUTION) {
+			log2PixelStride = log2PixelStride - mipLevel;
+		}
+		// Apply tiling masks
+		pixelLeft = pixelLeft & tileMaskX;
+		pixelRight = pixelRight & tileMaskX;
+		if (SQUARE) {
+			// Apply the same mask to both for square images, so that the other mask can be optimized away.
+			pixelTop = pixelTop & tileMaskX;
+			pixelBottom = pixelBottom & tileMaskX;
+		} else {
+			// Apply a separate mask for Y coordinates when the texture might not be square.
+			pixelTop = pixelTop & tileMaskY;
+			pixelBottom = pixelBottom & tileMaskY;
+		}
+
+
+		#ifndef NDEBUG
+			// In debug mode, wrong use of optimization arguments will throw errors.
+			if (SQUARE && (texture.impl_log2width != texture.impl_log2height)) {
+				throwError(U"texture_getPixelOffset was told that the texture would have square dimensions using SQUARE, but ", texture_getMaxWidth(texture), U"x", texture_getMaxHeight(texture), U" is not square!\n");
+			}
+			if (SINGLE_LAYER && (texture_getSmallestMipLevel(texture) > 0)) {
+				throwError(U"texture_getPixelOffset was told that the texture would only have a single layer using SINGLE_LAYER, but it has ", texture_getSmallestMipLevel(texture) + 1, U" layers!\n");
+			}
+			if (!HIGHEST_RESOLUTION) {
+				if (!allLanesLesserOrEqual(mipLevel, U32(15u))) {
+					throwError(U"texture_getPixelOffset got mip level ", mipLevel, U", which is not within the fixed range of 0..15!\n");
+				}
+				if (MIP_INSIDE) {
+					if (!allLanesLesserOrEqual(mipLevel, U32(texture_getSmallestMipLevel(texture)))) {
+						throwError(U"texture_getPixelOffset was told that the mip level would stay within valid indices using MIP_INSIDE, but mip level ", mipLevel, U" is not within 0..", texture_getSmallestMipLevel(texture), U"!\n");
+					}
+				}
+			}
+		#endif
+		U32 upperOffset       = pixelTop    << log2PixelStride;
+		U32 bottomOffset      = pixelBottom << log2PixelStride;
+		U32 upperLeftOffset   = upperOffset  | pixelLeft;
+		U32 upperRightOffset  = upperOffset  | pixelRight;
+		U32 bottomLeftOffset  = bottomOffset | pixelLeft;
+		U32 bottomRightOffset = bottomOffset | pixelRight;
+		if (!SINGLE_LAYER) {
+			U32 layerStartOffset = texture_getPixelOffsetToLayer<HIGHEST_RESOLUTION, U32>(texture, mipLevel);
+			upperLeftOffset  = upperLeftOffset  + layerStartOffset;
+			upperRightOffset = upperRightOffset + layerStartOffset;
+			bottomLeftOffset  = bottomLeftOffset  + layerStartOffset;
+			bottomRightOffset = bottomRightOffset + layerStartOffset;
+		}
+		SafePointer<uint32_t> data = texture.impl_buffer.getSafe<uint32_t>("RgbaU8 pyramid pixel buffer for bi-linear pixel sampling");
+		U32 upperLeftColor   = gather_U32(data, upperLeftOffset  );
+		U32 upperRightColor  = gather_U32(data, upperRightOffset );
+		U32 bottomLeftColor  = gather_U32(data, bottomLeftOffset );
+		U32 bottomRightColor = gather_U32(data, bottomRightOffset);
+		return mix_BL<U32, U16>(upperLeftColor, upperRightColor, bottomLeftColor, bottomRightColor, weightX, weightY);
+	}
+
+	// resolutions is the maximum number of resolutions to create.
+	//   The actual number of layers in the texture is limited by the most narrow dimension.
+	//   A texture of 16x4 pixels can have up to three resolutions, 4x1, 8x2 and 16x4.
+	//   A texture of 8x8 pixels can have up to four resolutions, 1x1, 2x2, 4x4 and 8x8.
+	// Pre-condition:
+	//   1 <= width <= 32768
+	//   1 <= height <= 32768
+	//   0 <= resolutions <= 16
+	// Post-condition:
+	//   Returns a pyramid image of the smallest power of two size capable of storing width x height pixels, by scaling up the resolution with interpolation if needed.
+	TextureRgbaU8 texture_create_RgbaU8(int32_t width, int32_t height, int32_t resolutions);
+	// Pre-condition:
+	//   1 <= width <= 32768
+	//   1 <= height <= 32768
+	//   1 <= resolutions
+	// Post-condition:
+	//   Returns a pyramid image created from image, or an empty pyramid if the image is empty.
+	TextureRgbaU8 texture_create_RgbaU8(const ImageRgbaU8& image, int32_t resolutions);
+
+	// Get a layer from the texture as an image.
+	// Pre-condition:
+	//   texture_exists(texture)
+	//   0 <= mipLevel <= texture_getSmallestMipLevel(texture)
+	// Post-condition:
+	//   Returns an unaligned RGBA image sharing pixel data with the requested texture layer.
+	ImageRgbaU8 texture_getMipLevelImage(const TextureRgbaU8& texture, int32_t mipLevel);
+
+	// TODO: Pre-calculate the pixel offset, float scales and tile masks and merge into a reusable multi-layer sampling method.
+	//       Because dynamic bit shifts can not be vectorized on Intel processors and would be the same for 2x2 pixels anyway.
+	//       The hard part will be to implement it for ARM SVE with variable width vectors, so maybe calculate the
+	//         2x2 derivation sparsely, interpolate a floating mip level and do comparisons vectorized with blend instructions to select masks.
+	template <typename F>
+	inline uint32_t texture_getMipLevelIndex(const TextureRgbaU8 &source, const F &u, const F &v) {
+		// TODO: Support reading elements from SIMD vectors of any size somehow. Can use SVE's maximum size of 2048 bits as the space to allocate in advance to aligned stack memory.
+		// Assume that U is at least 128 bits wide and reuse the result for additional pixels if there is more.
+		auto ua = u.get();
+		auto va = v.get();
+		float offsetUX = fabs(ua.x - ua.y); // Left U - Right  U
+		float offsetUY = fabs(ua.x - ua.z); // Top  U - Bottom U
+		float offsetVX = fabs(va.x - va.y); // Left V - Right  V
+		float offsetVY = fabs(va.x - va.z); // Top  V - Bottom V
+		float offsetU = max(offsetUX, offsetUY) * source.impl_floatMaxWidth;
+		float offsetV = max(offsetVX, offsetVY) * source.impl_floatMaxHeight;
+		float offset = max(offsetU, offsetV);
+		int result = 0;
+		// TODO: Can count leading zeroes be used with integers to use all available mip levels?
+		//       It would make MIP_INSIDE useless for optimization.
+		if (offset >  2.0f) { result = 1; }
+		if (offset >  4.0f) { result = 2; }
+		if (offset >  8.0f) { result = 3; }
+		if (offset > 16.0f) { result = 4; }
+		// TODO: Should it be possible to configure the number of mip levels?
+		return result;
+	}
+
+	// TODO: Optimize using template arguments.
+	// Pre-conditions:
+	//   0 <= mipLevel <= texture_getSmallestMipLevel(texture)
+	// Post-condition:
+	//   Returns a safe pointer to the first pixel at mipLevel in texture.
+	template <typename U = uint32_t>
+	inline SafePointer<U> texture_getSafePointer(const TextureRgbaU8& texture, uint32_t mipLevel) {
+		// Get a pointer to the start of the image.
+		return texture.impl_buffer.getSafe<U>("RgbaU8 pyramid pixel buffer").increaseBytes(texture_getPixelOffsetToLayer(texture, mipLevel) * sizeof(uint32_t));
+	}
+
+	// TODO: Optimize using template arguments.
+	// Pre-conditions:
+	//   0 <= mipLevel <= texture_getSmallestMipLevel(texture)
+	//   0 <= rowIndex < (1 << mipLevel)
+	// Post-condition:
+	//   Returns a safe pointer to the first pixel at rowIndex in mipLevel in texture.
+	template <typename U = uint32_t>
+	inline SafePointer<U> texture_getSafePointer(const TextureRgbaU8& texture, int32_t mipLevel, int32_t rowIndex) {
+		return texture_getSafePointer<U>(texture, mipLevel).increaseBytes(texture_getWidth(texture, mipLevel) * sizeof(uint32_t) * rowIndex);
+	}
+}
+
+#endif

+ 0 - 55
Source/DFPSR/api/types.cpp

@@ -1,55 +0,0 @@
-
-// zlib open source license
-//
-// Copyright (c) 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#define DFPSR_INTERNAL_ACCESS
-
-#include "types.h"
-#include "../image/Image.h"
-#include "../image/ImageU8.h"
-#include "../image/ImageU16.h"
-#include "../image/ImageF32.h"
-#include "../image/ImageRgbaU8.h"
-#include "../image/PackOrder.h"
-
-using namespace dsr;
-
-// Null
-ImageU8::ImageU8() {}
-ImageU16::ImageU16() {}
-ImageF32::ImageF32() {}
-ImageRgbaU8::ImageRgbaU8() {}
-MediaMachine::MediaMachine() {}
-
-// Existing shared pointer
-ImageU8::ImageU8(const std::shared_ptr<ImageU8Impl>& image) : std::shared_ptr<ImageU8Impl>(image) {}
-ImageU16::ImageU16(const std::shared_ptr<ImageU16Impl>& image) : std::shared_ptr<ImageU16Impl>(image) {}
-ImageF32::ImageF32(const std::shared_ptr<ImageF32Impl>& image) : std::shared_ptr<ImageF32Impl>(image) {}
-ImageRgbaU8::ImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : std::shared_ptr<ImageRgbaU8Impl>(image) {}
-MediaMachine::MediaMachine(const std::shared_ptr<VirtualMachine>& machine) : std::shared_ptr<VirtualMachine>(machine) {}
-
-// Shallow copy
-ImageU8::ImageU8(const ImageU8Impl& image) : std::shared_ptr<ImageU8Impl>(std::make_shared<ImageU8Impl>(image)) {}
-ImageU16::ImageU16(const ImageU16Impl& image) : std::shared_ptr<ImageU16Impl>(std::make_shared<ImageU16Impl>(image)) {}
-ImageF32::ImageF32(const ImageF32Impl& image) : std::shared_ptr<ImageF32Impl>(std::make_shared<ImageF32Impl>(image)) {}
-ImageRgbaU8::ImageRgbaU8(const ImageRgbaU8Impl& image) : std::shared_ptr<ImageRgbaU8Impl>(std::make_shared<ImageRgbaU8Impl>(image)) {}

+ 0 - 214
Source/DFPSR/api/types.h

@@ -1,214 +0,0 @@
-
-// zlib open source license
-//
-// Copyright (c) 2019 to 2022 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_API_TYPES
-#define DFPSR_API_TYPES
-
-#include <cstdint>
-#include <memory>
-#include "../image/Color.h"
-#include "../math/IRect.h"
-#include "stringAPI.h"
-
-// Define DFPSR_INTERNAL_ACCESS before any include to get internal access to exposed types
-#ifdef DFPSR_INTERNAL_ACCESS
-	#define IMPL_ACCESS public
-#else
-	#define IMPL_ACCESS protected
-#endif
-
-namespace dsr {
-
-enum class ImageFileFormat {
-	Unknown, // Used as an error code for unidentified formats.
-	JPG, // Lossy compressed image format storing brightness separated from red and blue offsets using the discrete cosine transform of each block.
-	PNG, // Lossless compressed image format. Some image editors don't save RGB values where alpha is zero, which will bleed through black edges in bi-linear interpolation when the interpolated alpha is not zero.
-	TGA, // Lossless compressed format. Applications usually give Targa better control over the alpha channel than PNG, but it's more common that the Targa specification is interpreted in incompatible ways.
-	BMP // Uncompressed image format for storing data that does not really represent an image and you just want it to be exact.
-};
-
-enum class PackOrderIndex {
-	RGBA, // Windows
-	BGRA, // Ubuntu
-	ARGB,
-	ABGR
-};
-
-enum class Sampler {
-	Nearest,
-	Linear
-};
-
-enum class ReturnCode {
-	Good,
-	KeyNotFound,
-	ParsingFailure
-};
-
-// A handle to a model.
-class ModelImpl;
-using Model = std::shared_ptr<ModelImpl>;
-
-// A handle to a multi-threaded rendering context.
-class RendererImpl;
-using Renderer = std::shared_ptr<RendererImpl>;
-
-// A handle to a window.
-//  The Window wraps itself around native window backends to abstract away platform specific details.
-//  It also makes it easy to load and use a graphical interface using the optional component system.
-class DsrWindow;
-using Window = std::shared_ptr<DsrWindow>;
-
-// A handle to a GUI component.
-//   Components are an abstraction for graphical user interfaces, which might not always be powerful enough.
-//   * If you're making something advanced that components cannot do,
-//     you can also use draw calls and input events directly against the window without using Component.
-class VisualComponent;
-using Component = std::shared_ptr<VisualComponent>;
-
-// A handle to a GUI theme.
-//   Themes describes the visual appearance of an interface.
-//   By having more than one theme for your interface, you can let the user select one.
-class VisualThemeImpl;
-using VisualTheme = std::shared_ptr<VisualThemeImpl>;
-
-// A handle to a raster font
-class RasterFontImpl;
-using RasterFont = std::shared_ptr<RasterFontImpl>;
-
-// A handle to a media machine.
-//   Media machines can be used to generate, filter and analyze images.
-//   Everything running in a media machine is guaranteed to be 100% deterministic to the last bit.
-//     This reduces the amount of code where maintenance has to be performed during porting.
-//     It also means that any use of float or double is forbidden.
-struct VirtualMachine;
-struct MediaMachine : IMPL_ACCESS std::shared_ptr<VirtualMachine> {
-	MediaMachine(); // Defaults to null
-IMPL_ACCESS:
-	explicit MediaMachine(const std::shared_ptr<VirtualMachine>& machine);
-};
-
-// Images
-// Points to a buffer region holding at least height * stride bytes.
-// Each row contains:
-//   * A number of visible pixels
-//   * A number of unused bytes
-//     New or cloned images have their stride aligned to 16-bytes
-//       Stride is the number of bytes from the start of one row to the next
-//     Sub-images have the same stride and buffer as their parent
-//       Some unused pixels may be visible somewhere else
-
-// 8-bit unsigned integer grayscale image
-class ImageU8Impl;
-struct ImageU8 : IMPL_ACCESS std::shared_ptr<ImageU8Impl> {
-	ImageU8(); // Defaults to null
-IMPL_ACCESS:
-	explicit ImageU8(const std::shared_ptr<ImageU8Impl>& image);
-	explicit ImageU8(const ImageU8Impl& image);
-};
-// Invariant:
-//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 16 pixels)
-//      This allow reading a full SIMD vector at each row's end without violating memory bounds
-//    * No other image can displays pixels from its padding
-//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
-struct AlignedImageU8 : public ImageU8 {
-	AlignedImageU8() {} // Defaults to null
-IMPL_ACCESS:
-	explicit AlignedImageU8(const std::shared_ptr<ImageU8Impl>& image) : ImageU8(image) {}
-	explicit AlignedImageU8(const ImageU8Impl& image) : ImageU8(image) {}
-};
-
-// 16-bit unsigned integer grayscale image
-class ImageU16Impl;
-struct ImageU16 : IMPL_ACCESS std::shared_ptr<ImageU16Impl> {
-	ImageU16(); // Defaults to null
-IMPL_ACCESS:
-	explicit ImageU16(const std::shared_ptr<ImageU16Impl>& image);
-	explicit ImageU16(const ImageU16Impl& image);
-};
-// Invariant:
-//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 16 pixels)
-//      This allow reading a full SIMD vector at each row's end without violating memory bounds
-//    * No other image can displays pixels from its padding
-//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
-struct AlignedImageU16 : public ImageU16 {
-	AlignedImageU16() {} // Defaults to null
-IMPL_ACCESS:
-	explicit AlignedImageU16(const std::shared_ptr<ImageU16Impl>& image) : ImageU16(image) {}
-	explicit AlignedImageU16(const ImageU16Impl& image) : ImageU16(image) {}
-};
-
-// 32-bit floating-point grayscale image
-class ImageF32Impl;
-struct ImageF32 : IMPL_ACCESS std::shared_ptr<ImageF32Impl> {
-	ImageF32(); // Defaults to null
-IMPL_ACCESS:
-	explicit ImageF32(const std::shared_ptr<ImageF32Impl>& image);
-	explicit ImageF32(const ImageF32Impl& image);
-};
-// Invariant:
-//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 4 pixels)
-//      This allow reading a full SIMD vector at each row's end without violating memory bounds
-//    * No other image can displays pixels from its padding
-//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
-struct AlignedImageF32 : public ImageF32 {
-	AlignedImageF32() {} // Defaults to null
-IMPL_ACCESS:
-	explicit AlignedImageF32(const std::shared_ptr<ImageF32Impl>& image) : ImageF32(image) {}
-	explicit AlignedImageF32(const ImageF32Impl& image) : ImageF32(image) {}
-};
-
-// 4x8-bit unsigned integer RGBA color image
-class ImageRgbaU8Impl;
-struct ImageRgbaU8 : IMPL_ACCESS std::shared_ptr<ImageRgbaU8Impl> {
-	ImageRgbaU8(); // Defaults to null
-IMPL_ACCESS:
-	explicit ImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image);
-	explicit ImageRgbaU8(const ImageRgbaU8Impl& image);
-};
-// Invariant:
-//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 4 pixels)
-//      This allow reading a full SIMD vector at each row's end without violating memory bounds
-//    * No other image can displays pixels from its padding
-//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
-struct AlignedImageRgbaU8 : public ImageRgbaU8 {
-	AlignedImageRgbaU8() {} // Defaults to null
-IMPL_ACCESS:
-	explicit AlignedImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : ImageRgbaU8(image) {}
-	explicit AlignedImageRgbaU8(const ImageRgbaU8Impl& image) : ImageRgbaU8(image) {}
-};
-// Invariant:
-//    * Using the default RGBA pack order
-//      This removes the need to implement filters for different pack orders when RGBA can be safely assumed
-//      Just use AlignedImageRgbaU8 if channels don't have to be aligned
-struct OrderedImageRgbaU8 : public AlignedImageRgbaU8 {
-	OrderedImageRgbaU8() {} // Defaults to null
-IMPL_ACCESS:
-	explicit OrderedImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : AlignedImageRgbaU8(image) {}
-	explicit OrderedImageRgbaU8(const ImageRgbaU8Impl& image) : AlignedImageRgbaU8(image) {}
-};
-
-}
-
-#endif

+ 131 - 0
Source/DFPSR/base/DsrTraits.h

@@ -0,0 +1,131 @@
+// zlib open source license
+//
+// Copyright (c) 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// These custom traits allow implementing template functions that can work with SIMD types when needed, without exposing simd.h in headers.
+
+#ifndef DFPSR_TRAITS
+#define DFPSR_TRAITS
+
+	#include <stdint.h>
+
+	namespace dsr {
+		// Subset of std::integral_constant.
+		template <typename T, T VALUE>
+		struct DSR_PROPERTY {
+			static constexpr T value = VALUE;
+		};
+		// Custom implementation of std::false_type.
+		using DSR_TRAIT_FALSE = DSR_PROPERTY<bool, false>;
+		// Custom implementation of std::true_type.
+		using DSR_TRAIT_TRUE = DSR_PROPERTY<bool, true>;
+		// Custom implementation of std::is_same.
+		template <typename T, typename U>
+		struct DsrTrait_SameType { static const bool value = false; };
+		template <typename T>
+		struct DsrTrait_SameType<T, T> { static const bool value = true; };
+		// Custom implementation of std::enable_if.
+		template<bool B, class T = void>
+		struct DsrTrait_EnableIf;
+		template<class T>
+		struct DsrTrait_EnableIf<true, T> {
+			using type = T;
+		};
+
+		// Place this as a template argument to disable the template function when false.
+		#define DSR_ENABLE_IF(TRAIT) \
+			typename = typename DsrTrait_EnableIf<TRAIT>::type 
+
+		// Properties are given to single types.
+		#define DSR_DECLARE_PROPERTY(PROPERTY_NAME) \
+			template <typename T> struct PROPERTY_NAME : DSR_TRAIT_FALSE {};
+
+		#define DSR_APPLY_PROPERTY(PROPERTY_NAME, TYPE_NAME) \
+			template <> struct PROPERTY_NAME<TYPE_NAME> : DSR_TRAIT_TRUE  {};
+
+		#define DSR_CHECK_PROPERTY(PROPERTY_NAME, TYPE_NAME) \
+			(PROPERTY_NAME<TYPE_NAME>::value)
+
+		// Relations are given to pairs of types.
+		#define DSR_DECLARE_RELATION(RELATION_NAME) \
+			template <typename T, typename U> struct RELATION_NAME : DSR_TRAIT_FALSE {};
+
+		#define DSR_APPLY_RELATION(RELATION_NAME, TYPE_A, TYPE_B) \
+			template <> struct RELATION_NAME<TYPE_A, TYPE_B> : DSR_TRAIT_TRUE  {};
+
+		#define DSR_CHECK_RELATION(RELATION_NAME, TYPE_A, TYPE_B) \
+			(RELATION_NAME<TYPE_A, TYPE_B>::value)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Any_U8)
+		DSR_APPLY_PROPERTY(DsrTrait_Any_U8, uint8_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Any_U16)
+		DSR_APPLY_PROPERTY(DsrTrait_Any_U16, uint16_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Any_U32)
+		DSR_APPLY_PROPERTY(DsrTrait_Any_U32, uint32_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Any_I32)
+		DSR_APPLY_PROPERTY(DsrTrait_Any_I32, int32_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Any_F32)
+		DSR_APPLY_PROPERTY(DsrTrait_Any_F32, float)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Scalar_SignedInteger)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_SignedInteger,  int8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_SignedInteger, int16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_SignedInteger, int32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_SignedInteger, int64_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Scalar_UnsignedInteger)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_UnsignedInteger,  uint8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_UnsignedInteger, uint16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_UnsignedInteger, uint32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_UnsignedInteger, uint64_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Scalar_Floating)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Floating,  float)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Floating, double)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Scalar_Integer)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer,   int8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer,  int16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer,  int32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer,  int64_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer,  uint8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer, uint16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer, uint32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar_Integer, uint64_t)
+
+		DSR_DECLARE_PROPERTY(DsrTrait_Scalar)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,   int8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,  int16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,  int32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,  int64_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,  uint8_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar, uint16_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar, uint32_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar, uint64_t)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,    float)
+		DSR_APPLY_PROPERTY(DsrTrait_Scalar,   double)
+	}
+#endif

+ 371 - 0
Source/DFPSR/base/Handle.h

@@ -0,0 +1,371 @@
+// zlib open source license
+//
+// Copyright (c) 2024 to 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_HANDLE
+#define DFPSR_HANDLE
+
+#include "heap.h"
+#include <utility>
+
+enum class AllocationInitialization {
+	Uninitialized, // Used when the data will be instantly overwritten.
+	Zeroed,        // Used for trivial data types.
+	Constructed    // Used for a few objects.
+};
+
+namespace dsr {
+	template <typename T>
+	class Handle {
+	private:
+		// The internal pointer that reference counting is added to.
+		//   Must be allocated using heap_allocate, so that it can be freed using heap_free when the use count reaches zero.
+		T *data = nullptr;
+		#ifdef SAFE_POINTER_CHECKS
+			// The identity that should match the allocation header's identity.
+			uint64_t allocationIdentity = 0;
+			inline void validate() const {
+				if (this->data != nullptr) {
+					// Heap allocations are shared with all threads, so we only need to check the identity.
+					AllocationHeader *header = heap_getHeader(this->data);
+					if (header->allocationIdentity != this->allocationIdentity) {
+						impl_throwIdentityMismatch(header->allocationIdentity, this->allocationIdentity);
+					}
+				}
+			}
+		#endif
+	public:
+		// Default construct an empty handle.
+		Handle() {}
+
+		// Assigns a debug name to the handled heap allocation.
+		//   Returns the handle by reference to allow call chaining:
+		//     return handle_create<MyType>(some, arguments).setName("data for something specific");
+		//     return buffer_create(size).setName("data for something specific");
+		//   Should be trivially optimized away by the compiler in release mode.
+		inline Handle<T> &setName(const char *name) {
+			#ifdef SAFE_POINTER_CHECKS
+				heap_setAllocationName(this->data, name);
+			#endif
+			return *this;
+		}
+
+		// Construct from pointer.
+		//   Pre-condition: data is the data allocated with heap_allocate.
+		#ifdef SAFE_POINTER_CHECKS
+			Handle(T* data, uint64_t allocationIdentity) noexcept
+			: data(data) {
+				this->allocationIdentity = allocationIdentity;
+				if (this->data != nullptr) {
+					heap_increaseUseCount(this->data);
+				}
+				this->validate();
+			}
+			inline uint64_t getAllocationIdentity() const { return this->allocationIdentity; }
+		#else
+			Handle(T* data) noexcept
+			: data(data) {
+				if (this->data != nullptr) {
+					heap_increaseUseCount(this->data);
+				}
+			}
+		#endif
+		// Copy constructor.
+		Handle(const Handle<T> &other) noexcept
+		: data(other.getUnsafe()) {
+			if (this->data != nullptr) {
+				heap_increaseUseCount(this->data);
+			}
+			#ifdef SAFE_POINTER_CHECKS
+				this->allocationIdentity = other.getAllocationIdentity();
+				this->validate();
+			#endif
+		}
+		// Copy constructor with static cast.
+		template <typename V>
+		Handle(const Handle<V> &other) noexcept
+		: data(static_cast<T*>(other.getUnsafe())) {
+			if (this->data != nullptr) {
+				heap_increaseUseCount(this->data);
+			}
+			#ifdef SAFE_POINTER_CHECKS
+				this->allocationIdentity = other.getAllocationIdentity();
+				this->validate();
+			#endif
+		}
+		// Move constructor.
+		Handle(Handle<T> &&other) noexcept
+		: data(other.takeOwnership()) {
+			#ifdef SAFE_POINTER_CHECKS
+				this->allocationIdentity = other.getAllocationIdentity();
+				this->validate();
+			#endif
+		}
+		// Move constructor with static cast.
+		template <typename V>
+		Handle(Handle<V> &&other) noexcept
+		: data(static_cast<T*>(other.takeOwnership())) {
+			#ifdef SAFE_POINTER_CHECKS
+				this->allocationIdentity = other.getAllocationIdentity();
+				this->validate();
+			#endif
+		}
+		// Assignment.
+		Handle<T>& operator = (const Handle<T> &other) {
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+				this->allocationIdentity = other.getAllocationIdentity();
+			#endif
+			if (this->data != other.getUnsafe()) {
+				// Decrease any old use count.
+				if (this->data != nullptr) {
+					heap_decreaseUseCount(this->data);
+				}
+				this->data = other.data;
+				// Increase any new use count.
+				if (this->data != nullptr) {
+					heap_increaseUseCount(this->data);
+				}
+			}
+			return *this;
+		}
+		// Assignment with static cast.
+		template <typename V>
+		Handle<T>& operator = (const Handle<V> &other) {
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+				this->allocationIdentity = other.getAllocationIdentity();
+			#endif
+			if (this->data != other.getUnsafe()) {
+				// Decrease any old use count.
+				if (this->data != nullptr) {
+					heap_decreaseUseCount(this->data);
+				}
+				this->data = static_cast<T*>(other.data);
+				// Increase any new use count.
+				if (this->data != nullptr) {
+					heap_increaseUseCount(this->data);
+				}
+			}
+			return *this;
+		}
+		// Move assignment.
+		Handle<T>& operator = (Handle<T> &&other) {
+			T* inherited = other.takeOwnership();
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+				this->allocationIdentity = other.getAllocationIdentity();
+			#endif
+			if (this->data != inherited) {
+				// Decrease any old use count.
+				if (this->data != nullptr) {
+					heap_decreaseUseCount(this->data);
+				}
+				this->data = inherited;
+			}
+			return *this;
+		}
+		// Move assignment with static cast.
+		template <typename V>
+		Handle<T>& operator = (Handle<V> &&other) {
+			T* inherited = static_cast<T*>(other.takeOwnership());
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+				this->allocationIdentity = other.getAllocationIdentity();
+			#endif
+			if (this->data != inherited) {
+				// Decrease any old use count.
+				if (this->data != nullptr) {
+					heap_decreaseUseCount(this->data);
+				}
+				this->data = inherited;
+			}
+			return *this;
+		}
+		// Destructor.
+		~Handle() {
+			if (this->data != nullptr) {
+				#ifdef SAFE_POINTER_CHECKS
+					this->validate();
+				#endif
+				heap_decreaseUseCount(this->data);
+			}
+		}
+		// Take ownership of the returned pointer from this handle.
+		inline T* takeOwnership() {
+			T* result = this->data;
+			this->data = nullptr;
+			return result;
+		}
+		// Check if the handle is null, using explicit syntax to explain the code.
+		inline bool isNull() const { return this->data == nullptr; }
+		// Check if the handle points to anything, using explicit syntax to explain the code.
+		inline bool isNotNull() const { return this->data != nullptr; }
+		// Access content through the handle using the -> operator.
+		inline T* operator ->() const {
+			#ifdef SAFE_POINTER_CHECKS
+				if (this->data == nullptr) { impl_throwNullException(); }
+				this->validate();
+			#endif
+			return this->data;
+		}
+		// Returns the allocation's used size in bytes.
+		inline uintptr_t getUsedSize() const {
+			if (this->data == nullptr) {
+				return 0;
+			} else {
+				return heap_getUsedSize(this->data);
+			}
+		}
+		// Get the number of elements by dividing the total size with the element size.
+		inline uintptr_t getElementCount() const {
+			if (this->data == nullptr) {
+				return 0;
+			} else {
+				// When sizeof(T) is a power of two, this unsigned integer division will be optimized into a bit shift by the compiler.
+				return heap_getUsedSize(this->data) / sizeof(T);
+			}
+		}
+		// Get a SafePointer to the data, which is used temporarity to iterate over the content with bound checks in debug mode but no overhead in release mode.
+		// Alignment decides how many additional bytes of padding that should be possible to access for SIMD operations.
+		template <typename V = T>
+		SafePointer<V> getSafe(const char * name) const {
+			if (this->data == nullptr) {
+				// A null handle returns a null pointer.
+				return SafePointer<V>();
+			} else {
+				#ifdef SAFE_POINTER_CHECKS
+					AllocationHeader *header = heap_getHeader(this->data);
+					return SafePointer<V>(header, this->allocationIdentity, name, (V*)this->data, heap_getPaddedSize(this->data));
+				#else
+					return SafePointer<V>(name, (V*)this->data);
+				#endif
+			}
+		}
+		// Get an unsafe pointer.
+		inline T* getUnsafe() const {
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+			#endif
+			return this->data;
+		}
+		// Get a reference.
+		inline T& getReference() const {
+			#ifdef SAFE_POINTER_CHECKS
+				if (this->data == nullptr) { impl_throwNullException(); }
+				this->validate();
+			#endif
+			return *(this->data);
+		}
+		// Get the use count.
+		inline uintptr_t getUseCount() const {
+			#ifdef SAFE_POINTER_CHECKS
+				this->validate();
+			#endif
+			return this->data ? heap_getUseCount(this->data) : 0;
+		}
+	};
+
+	// Construct a new Handle<T> using the heap allocator and begin reference counting.
+	// The object is aligned by DSR_MAXIMUM_ALIGNMENT.
+	template<typename T, typename... ARGS>
+	static Handle<T> handle_create(ARGS&&...args) {
+		// Reset the memory to zero before construction, in case that something was forgotten.
+		// TODO: Should debug mode set the memory to a deterministic pattern to simplify detection of uninitialized variables?
+		UnsafeAllocation allocation = heap_allocate(sizeof(T), true);
+		// Construction from pointer increases the allocation's use count to 1.
+		#ifdef SAFE_POINTER_CHECKS
+			Handle<T> result((T*)(allocation.data), allocation.header->allocationIdentity);
+		#else
+			Handle<T> result((T*)(allocation.data));
+		#endif
+		if (result.isNull()) {
+			impl_throwAllocationFailure();
+		} else {
+			new (result.getUnsafe()) T(std::forward<ARGS>(args)...);
+			if (!std::is_trivially_destructible<T>::value) {
+				heap_setAllocationDestructor(result.getUnsafe(), HeapDestructor([](void *toDestroy, void *externalResource) {
+					// Destroy one object.
+					((T*)toDestroy)->~T();
+				}));
+			}
+		}
+		return std::move(result.setName("Nameless handle object"));
+	}
+
+	// Construct an array of objects with a shared handle pointing to the first element.
+	// The first element is aligned by DSR_MAXIMUM_ALIGNMENT and the rest are following directly according to sizeof(T).
+	//   This allow tight packing of data for SIMD vectorization, because aligning with a SIMD vector would be pointless if each vector only contained one useful lane.
+	// Pre-condition:
+	//   sizeof(T) % alignof(T) == 0
+	template<typename T, typename... ARGS>
+	static Handle<T> handle_createArray(AllocationInitialization initialization, uintptr_t elementCount, ARGS&&...args) {
+		UnsafeAllocation allocation = heap_allocate(sizeof(T) * elementCount, initialization == AllocationInitialization::Zeroed);
+		// Construction from pointer increases the allocation's use count to 1.
+		#ifdef SAFE_POINTER_CHECKS
+			Handle<T> result((T*)(allocation.data), allocation.header->allocationIdentity);
+		#else
+			Handle<T> result((T*)(allocation.data));
+		#endif
+		if (result.isNull()) {
+			impl_throwAllocationFailure();
+		} else {
+			if (initialization == AllocationInitialization::Constructed) {
+				for (uintptr_t i = 0; i < elementCount; i++) {
+					new (result.getUnsafe() + i) T(std::forward<ARGS>(args)...);
+				}
+			}
+			if (!std::is_trivially_destructible<T>::value) {
+				heap_setAllocationDestructor(result.getUnsafe(), HeapDestructor([](void *toDestroy, void *externalResource) {
+					// Calculate the number of elements from the size.
+					uintptr_t elementCount = heap_getUsedSize(toDestroy) / sizeof(T);
+					// Destroy each element.
+					for (uintptr_t i = 0; i < elementCount; i++) {
+						((T*)toDestroy)[i].~T();
+					}
+				}));
+			}
+		}
+		return std::move(result.setName("Nameless handle array"));
+	}
+
+	// Dynamic casting of handles.
+	//   Attempts to cast from a base class to a specific class inheriting from the old type.
+	//   OLD_TYPE does not have to be stated explicitly in the call, because it is provided by oldHandle.
+	//   Example:
+	//     Handle<TypeB> = handle_dynamicCast<TypeB>(handle_create<TypeA>(1, 2, 3));
+	//   Pre-condition:
+	//     The old handle must refer to a single element or nullptr, no arrays allowed.
+	//   Post-condition:
+	//     Returns oldHandle dynamically casted to NEW_TYPE.
+	//     Returns an empty handle if the conversion failed.
+	template <typename NEW_TYPE, typename OLD_TYPE>
+	Handle<NEW_TYPE> handle_dynamicCast(const Handle<OLD_TYPE> &oldHandle) {
+		#ifdef SAFE_POINTER_CHECKS
+			return Handle<NEW_TYPE>(dynamic_cast<NEW_TYPE*>(oldHandle.getUnsafe()), oldHandle.getAllocationIdentity());
+		#else
+			return Handle<NEW_TYPE>(dynamic_cast<NEW_TYPE*>(oldHandle.getUnsafe()));
+		#endif
+	}
+}
+
+#endif

+ 111 - 62
Source/DFPSR/base/SafePointer.cpp

@@ -23,6 +23,7 @@
 
 #include "SafePointer.h"
 #include "../api/stringAPI.h"
+#include "../settings.h"
 
 #ifdef SAFE_POINTER_CHECKS
 	#include <thread>
@@ -31,10 +32,10 @@
 
 using namespace dsr;
 
-// Thread hash of memory without any specific owner.
-static uint64_t ANY_THREAD_HASH = 0xF986BA1496E872A5;
-
 #ifdef SAFE_POINTER_CHECKS
+	// Thread hash of memory without any specific owner.
+	static uint64_t ANY_THREAD_HASH = 0xF986BA1496E872A5;
+
 	// A primitive hash function that assumes that all compared objects have the same length, so that trailing zeroes can be ignored.
 	static uint64_t hash(const uint8_t *bytes, size_t size) {
 		uint64_t result = 527950984572370412;
@@ -94,84 +95,132 @@ static uint64_t ANY_THREAD_HASH = 0xF986BA1496E872A5;
 	AllocationHeader::AllocationHeader()
 	: totalSize(0), threadHash(0), allocationIdentity(0) {}
 
-	AllocationHeader::AllocationHeader(uintptr_t totalSize, bool threadLocal)
-	: totalSize(totalSize), threadHash(threadLocal ? currentThreadHash : ANY_THREAD_HASH), allocationIdentity(createIdentity()) {}
+	AllocationHeader::AllocationHeader(uintptr_t totalSize, bool threadLocal, const char *name)
+	: totalSize(totalSize), name(name), threadHash(threadLocal ? currentThreadHash : ANY_THREAD_HASH), allocationIdentity(createIdentity()) {}
+
+	void AllocationHeader::reuse(bool threadLocal, const char *name) {
+		this->threadHash = threadLocal ? currentThreadHash : ANY_THREAD_HASH;
+		this->allocationIdentity = createIdentity();
+		this->name = name;
+	}
 #else
 	AllocationHeader::AllocationHeader()
 	: totalSize(0) {}
 
-	AllocationHeader::AllocationHeader(uintptr_t totalSize, bool threadLocal)
+	// TODO: Avoid passing the debug name in release mode by placing these functions in memory.h.
+	//       Create separate methods for getting the thread hash and the next allocation nonce.
+	AllocationHeader::AllocationHeader(uintptr_t totalSize, bool threadLocal, const char *name)
 	: totalSize(totalSize) {}
+
+	void AllocationHeader::reuse(bool threadLocal, const char *name) {}
 #endif
 
 #ifdef SAFE_POINTER_CHECKS
-	void dsr::assertNonNegativeSize(intptr_t size) {
+	void dsr::impl_assertNonNegativeSize(intptr_t size) {
 		if (size < 0) {
 			throwError(U"Negative size of SafePointer!\n");
 		}
 	}
 
-	void dsr::assertInsideSafePointer(const char* method, const char* name, const uint8_t* pointer, const uint8_t* data, const uint8_t* regionStart, const uint8_t* regionEnd, const AllocationHeader *header, uint64_t allocationIdentity, intptr_t claimedSize, intptr_t elementSize) {
-		if (regionStart == nullptr) {
-			throwError(U"SafePointer exception! Tried to use a null pointer!\n");
+	static bool isOutOfBound(const uint8_t* claimedStart, const uint8_t* claimedEnd, const uint8_t* permittedStart, const uint8_t* permittedEnd) {
+		return claimedStart < permittedStart || claimedEnd > permittedEnd;
+	}
+
+	static void throwPointerError(const ReadableString &title, const char* methodName, const char* pointerName, const FixedAscii<256> &allocationName, const uint8_t* claimedStart, const uint8_t* claimedEnd, intptr_t elementSize, const uint8_t* permittedStart, const uint8_t* permittedEnd, const AllocationHeader *pointerHeader, uint64_t allocationIdentity, uint64_t headerIdentity, uint64_t headerHash) {
+		bool outOfBound = isOutOfBound(claimedStart, claimedEnd, permittedStart, permittedEnd);
+		String *target = &(string_getPrintBuffer());
+		string_clear(*target);
+		string_append(*target, title, U"\n");
+		string_append(*target, U" _______________________________________________________________________\n");
+		string_append(*target, U"/\n");
+		string_append(*target, U"|  SafePointer operation: ", methodName, U"\n");
+		string_append(*target, U"|  Pointer name: ", pointerName, U"\n");
+		#ifdef EXTRA_SAFE_POINTER_CHECKS
+			if (pointerHeader != nullptr) {
+				string_append(*target, U"|  Allocation name    : ", allocationName, U"\n");
+				string_append(*target, U"|  Thread hash:\n");
+				if (headerHash == ANY_THREAD_HASH) {
+					string_append(*target, U"|    Shared with all threads\n");
+				} else {
+					string_append(*target, U"|    Owner thread     : ", headerHash, U"\n");
+					string_append(*target, U"|    Calling thread   : ", currentThreadHash, U"\n");
+				}
+				string_append(*target, U"|  Identity:\n");
+				string_append(*target, U"|    Found            : ", headerIdentity, U"\n");
+				string_append(*target, U"|    Expected         : ", allocationIdentity, U"\n");
+				// TODO: Check if the requested data is outside of the memory allocation's used size or just the permitted region within the allocation.
+				// TODO: Iterate over allocations using until the same header address as in the pointer is found:
+				heap_forAllHeapAllocations([target, pointerHeader](AllocationHeader * header, void * allocation) {
+					// We found the allocation in the heap, so we know that it is an active heap allocation.
+					if (pointerHeader == header) {
+						// The allocation size is the space that can be expanded into without having to reallocate.
+						string_append(*target, U"|    Allocation size  : ", heap_getAllocationSize(allocation), U" bytes\n");
+						// The used size is what the application asked for from the allocator.
+						//   The permissed region often include the whole used size and some padding for aligned memory reads.
+						string_append(*target, U"|    Used size        : ", heap_getUsedSize(allocation), U" bytes\n");
+					}
+				});
+			}
+		#endif
+		if (outOfBound) {
+			string_append(*target, U"|  Claimed memory is outside of the pointer's permitted memory region!\n");
+		} else {
+			string_append(*target, U"|  Claimed memory is safely within the permitted memory region.\n");
+		}
+		string_append(*target, U"|    Permitted region : ", (uintptr_t)permittedStart, U" to ", (uintptr_t)permittedEnd, U" of ", (intptr_t)(permittedEnd - permittedStart), U" bytes\n");
+		string_append(*target, U"|    Requested region : ", (uintptr_t)claimedStart, U" to ", (uintptr_t)claimedEnd, U" of ", (uintptr_t)(claimedEnd - claimedStart), U" bytes\n");
+		string_append(*target, U"|    Element size     : ", elementSize, U" bytes\n");
+		string_append(*target, U"\\_______________________________________________________________________\n\n");
+		string_sendMessage(*target, MessageType::Error);
+	}
+
+	static thread_local bool inside = false;
+	void dsr::impl_assertInsideSafePointer(const char* methodName, const char* pointerName, const uint8_t* claimedStart, const uint8_t* claimedEnd, intptr_t elementSize, const uint8_t* permittedStart, const uint8_t* permittedEnd, const AllocationHeader *header, uint64_t allocationIdentity) {
+		// Abort to avoid infinite recursion from printing text if we are already inside of another check.
+		if (inside) return;
+		inside = true;
+		if (permittedStart == nullptr) {
+			throwPointerError(U"SafePointer identity exception! Tried to use a null pointer.", methodName, pointerName, "(null)", claimedStart, claimedEnd, elementSize, permittedStart, permittedEnd, header, allocationIdentity, 0, 0);
 			return;
 		}
 		// If the pointer has an allocation header, check that the identity matches the one stored in the pointer.
-		if (header != nullptr) {
-			uint64_t headerIdentity, headerHash;
-			try {
-				// Both allocation identity and thread hash may match by mistake, but in most of the cases this will give more information about why it happened.
-				headerIdentity = header->allocationIdentity;
-				headerHash = header->threadHash;
-			} catch(...) {
-				throwError(U"SafePointer exception! Tried to access memory not available to the application!\n");
-				return;
-			}
-			if (headerIdentity != allocationIdentity) {
-				throwError(U"SafePointer exception! Accessing freed memory or corrupted allocation header!\n  headerIdentity = ", headerIdentity, U"\n  allocationIdentity = ", allocationIdentity, U"");
-				return;
-			} else if (headerHash != ANY_THREAD_HASH && headerHash != currentThreadHash) {
-				throwError(U"SafePointer exception! Accessing another thread's private memory!\n  headerHash = ", headerHash, U"\n  currentThreadHash = ", currentThreadHash, U"\n");
-				return;
-			}
-		}
-		if (pointer < regionStart || pointer + claimedSize > regionEnd) {
-			String message;
-			string_append(message, U"\n _________________ SafePointer out of bound exception! _________________\n");
-			string_append(message, U"/\n");
-			string_append(message, U"|  Name: ", name, U"\n");
-			string_append(message, U"|  Method: ", method, U"\n");
-			string_append(message, U"|  Region: ", (uintptr_t)regionStart, U" to ", (uintptr_t)regionEnd, U"\n");
-			string_append(message, U"|  Region size: ", (intptr_t)(regionEnd - regionStart), U" bytes\n");
-			string_append(message, U"|  Base pointer: ", (uintptr_t)data, U"\n");
-			string_append(message, U"|  Requested pointer: ", (uintptr_t)pointer, U"\n");
-			string_append(message, U"|  Requested size: ", claimedSize, U" bytes\n");
-
-			intptr_t startOffset = (intptr_t)pointer - (intptr_t)regionStart;
-			intptr_t baseOffset = (intptr_t)pointer - (intptr_t)data;
-
-			// Index relative to allocation start
-			//   regionStart is the start of the accessible memory region
-			if (startOffset != baseOffset) {
-				string_append(message, U"|  Start offset: ", startOffset, U" bytes\n");
-				if (startOffset % elementSize == 0) {
-					intptr_t index = startOffset / elementSize;
-					intptr_t elementCount = ((intptr_t)regionEnd - (intptr_t)regionStart) / elementSize;
-					string_append(message, U"|    Start index: ", index, U" [0..", (elementCount - 1), U"]\n");
+		uint64_t headerIdentity = 0;
+		uint64_t headerHash = 0;
+		FixedAscii<256> allocationName("(null)");
+		#ifdef EXTRA_SAFE_POINTER_CHECKS
+			if (header != nullptr) {
+				#ifndef DSR_HARD_EXIT_ON_ERROR
+				// This only works if the application has registered a signal handler throwing an error on SIGSEGV, like in the regression tests.
+				try {
+				#endif
+					// Both allocation identity and thread hash may match by mistake, but in most of the cases this will give more information about why it happened.
+					headerIdentity = header->allocationIdentity;
+					headerHash = header->threadHash;
+					if (header->name != nullptr) {
+						// Clone into fixed size memory when we do not know if the memory is corrupted.
+						allocationName = FixedAscii<256>(header->name);
+					}
+				#ifndef DSR_HARD_EXIT_ON_ERROR
+				} catch(...) {
+					headerIdentity = 0;
+					headerHash = 0;
+					throwPointerError(U"SafePointer exception! Tried to access memory not available to the application.", methodName, pointerName, "(invalid)", claimedStart, claimedEnd, elementSize, permittedStart, permittedEnd, header, allocationIdentity, headerIdentity, headerHash);
+					return;
+				}
+				#endif
+				if (headerIdentity != allocationIdentity) {
+					throwPointerError(U"SafePointer identity exception!", methodName, pointerName, allocationName, claimedStart, claimedEnd, elementSize, permittedStart, permittedEnd, header, allocationIdentity, headerIdentity, headerHash);
+					return;
+				} else if (headerHash != ANY_THREAD_HASH && headerHash != currentThreadHash) {
+					throwPointerError(U"SafePointer thread hash exception!", methodName, pointerName, allocationName, claimedStart, claimedEnd, elementSize, permittedStart, permittedEnd, header, allocationIdentity, headerIdentity, headerHash);
+					return;
 				}
 			}
-
-			// Base index relative to the stored pointer within the region
-			//   data is the base of the allocation at index zero
-			string_append(message, U"|  Base offset: ", baseOffset, U" bytes\n");
-			if (baseOffset % elementSize == 0) {
-				intptr_t index = baseOffset / elementSize;
-				intptr_t elementCount = ((intptr_t)regionEnd - (intptr_t)data) / elementSize;
-				string_append(message, U"|    Base index: ", index, U" [0..", (elementCount - 1), U"]\n");
-			}
-			string_append(message, U"\\_______________________________________________________________________\n\n");
-			throwError(message);
+		#endif
+		if (isOutOfBound(claimedStart, claimedEnd, permittedStart, permittedEnd)) {
+			throwPointerError(U"SafePointer out of bound exception!", methodName, pointerName, allocationName, claimedStart, claimedEnd, elementSize, permittedStart, permittedEnd, header, allocationIdentity, headerIdentity, headerHash);
 			return;
 		}
+		inside = false;
 	}
 #endif

+ 114 - 134
Source/DFPSR/base/SafePointer.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2024 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -21,22 +21,12 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-// If you get segmentation faults despite using SafePointer, then check the following.
-// * Are you compiling all of your code in debug mode?
-//   The release mode does not perform SafePointer checks, because it is supposed to be zero overhead by letting the compiler inline the pointers.
-// * Did you create a SafePointer from a memory region that you do not have access to, expired stack memory, or a region larger than the allocation?
-//   SafePointer can not know which memory is safe to call if you do not give it correct information.
-//   If the pointer was created without an allocation, make sure that regionStart is nullptr and claimedSize is zero.
-// * Did you deallocate the memory before using the SafePointer?
-//   SafePointer can not keep the allocation alive, because that would require counting references in both debug and release.
+// If you get segmentation faults despite using SafePointer, make sure to compile a debug version of the program to activate safety checks.
+//   In debug mode, bound checks make sure that memory access do not go a single bit outside of the allowed region.
 
-// To stay safe when using SafePointer:
-// * Compile in debug mode by habit, until it is time for profiling or relase.
-//   The operating system can not detect out of bound access in stack memory or arena allocations, so it may silently corrupt the memory without being caught if safety is disabled.
-// * Let the Buffer create the safe pointer for you to prevent accidentally giving the wrong size, or use the default constructor for expressing null.
-//   If you only need a part of the buffer's memory, use the slice function to get a subset of the memory with bound checks on construction.
-// * Either create a SafePointer when needed within the buffer's scope, or store both in the same structure.
-//   This makes sure that the allocation is not freed while the pointer still exists.
+// If SafePointer is constructed with a pointer to the allocation head and its allocation identity (when the memory is allocated by the framework), more safety checks are done in debug mode.
+//   The allocation identity is a 64-bit nonce stored in both the allocation's head and SafePointer, making sure that the memory accessed has not been freed or reused for something else.
+//   The 64-bit thread hash prevent access of another thread's private memory, for consistent access rights when the virtual stack may allocate in either thread local or heap memory.
 
 #ifndef DFPSR_SAFE_POINTER
 #define DFPSR_SAFE_POINTER
@@ -45,95 +35,86 @@
 #include <cassert>
 #include <cstdint>
 #include "memory.h"
+#include "DsrTraits.h"
 
 namespace dsr {
 
 #ifdef SAFE_POINTER_CHECKS
-	void assertInsideSafePointer(const char* method, const char* name, const uint8_t* pointer, const uint8_t* data, const uint8_t* regionStart, const uint8_t* regionEnd, const AllocationHeader *header, uint64_t allocationIdentity, intptr_t claimedSize, intptr_t elementSize);
-	void assertNonNegativeSize(intptr_t size);
+	void impl_assertInsideSafePointer(const char* methodName, const char* pointerName, const uint8_t* claimedStart, const uint8_t* claimedEnd, intptr_t elementSize, const uint8_t* permittedStart, const uint8_t* permittedEnd, const AllocationHeader *header, uint64_t allocationIdentity);
+	void impl_assertNonNegativeSize(intptr_t size);
 #endif
 
 template<typename T>
 class SafePointer {
 private:
-	// A pointer from regionStart to regionEnd
+	// A pointer from permittedStart to permittedEnd
 	//   Mutable because only the data being pointed to is write protected in a const SafePointer
-	mutable T *data;
+	T *data;
+public:
 	#ifdef SAFE_POINTER_CHECKS
 		// Points to the first accessible byte, which should have the same alignment as the data pointer.
-		mutable T *regionStart;
+		T *permittedStart;
 		// Marks the end of the allowed region, pointing to the first byte that is not accessible.
-		mutable T *regionEnd;
+		T *permittedEnd;
 		// Pointer to an ascii literal containing the name for improving error messages for crashes in debug mode.
-		mutable const char *name;
+		const char *name;
 		// Optional pointer to an allocation header to know if it still exists and which threads are allowed to access it.
-		mutable AllocationHeader *header = nullptr;
+		AllocationHeader *header = nullptr;
 		// The identity that should match the allocation header's identity.
-		mutable uint64_t allocationIdentity = 0;
+		uint64_t allocationIdentity = 0;
 	#endif
 public:
 	#ifdef SAFE_POINTER_CHECKS
-	SafePointer() : data(nullptr), regionStart(nullptr), regionEnd(nullptr), name("Unnamed null pointer") {}
-	explicit SafePointer(const char* name) : data(nullptr), regionStart(nullptr), regionEnd(nullptr), name(name) {}
-	SafePointer(const char* name, T* regionStart, intptr_t regionByteSize = sizeof(T), AllocationHeader *header = nullptr)
-	: data(regionStart), regionStart(regionStart), regionEnd((T*)(((uint8_t*)regionStart) + (intptr_t)regionByteSize)), name(name), header(header) {
-		assertNonNegativeSize(regionByteSize);
-		// If the pointer has a header, then store the allocation's identity in the pointer.
-		if (header != nullptr) {
-			this->allocationIdentity = header->allocationIdentity;
+		// Create a null pointer.
+		SafePointer() : data(nullptr), permittedStart(nullptr), permittedEnd(nullptr), name("Unnamed null pointer") {}
+		explicit SafePointer(const char* name) : data(nullptr), permittedStart(nullptr), permittedEnd(nullptr), name(name) {}
+		SafePointer(const char* name, T* permittedStart, intptr_t permittedByteSize = sizeof(T))
+		: data(permittedStart), permittedStart(permittedStart), permittedEnd((T*)(((uint8_t*)permittedStart) + (intptr_t)permittedByteSize)), name(name) {
+			impl_assertNonNegativeSize(permittedByteSize);
 		}
-	}
-	SafePointer(const char* name, T* regionStart, intptr_t regionByteSize, T* data, AllocationHeader *header = nullptr)
-	: data(data), regionStart(regionStart), regionEnd((T*)(((uint8_t*)regionStart) + (intptr_t)regionByteSize)), name(name), header(header) {
-		assertNonNegativeSize(regionByteSize);
-		// If the pointer has a header, then store the allocation's identity in the pointer.
-		if (header != nullptr) {
-			this->allocationIdentity = header->allocationIdentity;
+		SafePointer(const char* name, T* permittedStart, intptr_t permittedByteSize, T* data)
+		: data(data), permittedStart(permittedStart), permittedEnd((T*)(((uint8_t*)permittedStart) + (intptr_t)permittedByteSize)), name(name) {
+			impl_assertNonNegativeSize(permittedByteSize);
+		}
+		SafePointer(AllocationHeader *header, uint64_t allocationIdentity, const char* name, T* permittedStart, intptr_t permittedByteSize = sizeof(T))
+		: data(permittedStart), permittedStart(permittedStart), permittedEnd((T*)(((uint8_t*)permittedStart) + (intptr_t)permittedByteSize)), name(name), header(header), allocationIdentity(allocationIdentity) {
+			impl_assertNonNegativeSize(permittedByteSize);
+		}
+		SafePointer(AllocationHeader *header, uint64_t allocationIdentity, const char* name, T* permittedStart, intptr_t permittedByteSize, T* data)
+		: data(data), permittedStart(permittedStart), permittedEnd((T*)(((uint8_t*)permittedStart) + (intptr_t)permittedByteSize)), name(name), header(header), allocationIdentity(allocationIdentity) {
+			impl_assertNonNegativeSize(permittedByteSize);
 		}
-	}
 	#else
-	SafePointer() : data(nullptr) {}
-	explicit SafePointer(const char* name) : data(nullptr) {}
-	SafePointer(const char* name, T* regionStart, intptr_t regionByteSize = sizeof(T), AllocationHeader *header = nullptr) : data(regionStart) {}
-	SafePointer(const char* name, T* regionStart, intptr_t regionByteSize, T* data, AllocationHeader *header = nullptr) : data(data) {}
+		SafePointer() : data(nullptr) {}
+		explicit SafePointer(const char* name) : data(nullptr) {}
+		SafePointer(const char* name, T* permittedStart, intptr_t permittedByteSize = sizeof(T)) : data(permittedStart) {}
+		SafePointer(const char* name, T* permittedStart, intptr_t permittedByteSize, T* data) : data(data) {}
+		SafePointer(AllocationHeader *header, uint64_t allocationIdentity, const char* name, T* permittedStart, intptr_t permittedByteSize = sizeof(T)) : data(permittedStart) {}
+		SafePointer(AllocationHeader *header, uint64_t allocationIdentity, const char* name, T* permittedStart, intptr_t permittedByteSize, T* data) : data(data) {}
 	#endif
 public:
 	#ifdef SAFE_POINTER_CHECKS
-	inline void assertInside(const char* method, const T* pointer, intptr_t size = (intptr_t)sizeof(T)) const {
-		assertInsideSafePointer(method, this->name, (const uint8_t*)pointer, (const uint8_t*)this->data, (const uint8_t*)this->regionStart, (const uint8_t*)this->regionEnd, this->header, this->allocationIdentity, size, sizeof(T));
+	inline void assertInside(const char* methodName, const T* claimedStart, intptr_t size = (intptr_t)sizeof(T)) const {
+		impl_assertInsideSafePointer(methodName, this->name, (const uint8_t*)claimedStart, ((const uint8_t*)claimedStart) + size, sizeof(T), (const uint8_t*)this->permittedStart, (const uint8_t*)this->permittedEnd, this->header, this->allocationIdentity);
 	}
-	inline void assertInside(const char* method) const {
-		this->assertInside(method, this->data);
+	inline void assertInside(const char* methodName) const {
+		this->assertInside(methodName, this->data);
 	}
 	#endif
 public:
 	// Back to unsafe pointer with a clearly visible method name as a warning
 	// The same can be done by mistake using the & operator on a reference
 	// p.getUnsafe() = &(*p) = &(p[0])
-	inline T* getUnsafe() {
-		#ifdef SAFE_POINTER_CHECKS
-		this->assertInside("getUnsafe");
-		#endif
-		return this->data;
-	}
-	inline const T* getUnsafe() const {
+	inline T* getUnsafe() const {
 		#ifdef SAFE_POINTER_CHECKS
 		this->assertInside("getUnsafe");
 		#endif
 		return this->data;
 	}
 	// Get unsafe pointer without bound checks for implementing your own safety
-	inline T* getUnchecked() {
-		return this->data;
-	}
-	inline const T* getUnchecked() const {
+	inline T* getUnchecked() const {
 		return this->data;
 	}
-	// Returns the pointer in modulo byteAlignment
-	// Returns 0 if the pointer is aligned with byteAlignment
-	inline int32_t getAlignmentOffset(int32_t byteAlignment) const {
-		return ((uintptr_t)this->data) % byteAlignment;
-	}
 	inline bool isNull() const {
 		return this->data == nullptr;
 	}
@@ -147,72 +128,41 @@ public:
 		T *newStart = (T*)(((uint8_t*)(this->data)) + byteOffset);
 		#ifdef SAFE_POINTER_CHECKS
 		assertInside("getSlice", newStart, size);
-		return SafePointer<T>(name, newStart, size, this->header);
-		#else
-		return SafePointer<T>(name, newStart);
-		#endif
-	}
-	inline const SafePointer<T> slice(const char* name, intptr_t byteOffset, intptr_t size) const {
-		T *newStart = (T*)(((uint8_t*)(this->data)) + byteOffset);
-		#ifdef SAFE_POINTER_CHECKS
-		assertInside("getSlice", newStart, size);
-		return SafePointer<T>(name, newStart, size, this->header);
+		return SafePointer<T>(this->header, this->allocationIdentity, name, newStart, size);
 		#else
 		return SafePointer<T>(name, newStart);
 		#endif
 	}
 	// Dereference
 	template <typename S = T>
-	inline S& get() {
+	inline S& get() const {
 		#ifdef SAFE_POINTER_CHECKS
 		assertInside("get", this->data, sizeof(S));
 		#endif
 		return *((S*)this->data);
 	}
-	template <typename S = T>
-	inline const S& get() const {
-		#ifdef SAFE_POINTER_CHECKS
-		assertInside("get", this->data, sizeof(S));
-		#endif
-		return *((const S*)this->data);
-	}
-	inline T& operator*() {
+	inline T& operator*() const {
 		#ifdef SAFE_POINTER_CHECKS
 		assertInside("operator*");
 		#endif
 		return *(this->data);
 	}
-	inline const T& operator*() const {
-		#ifdef SAFE_POINTER_CHECKS
-		assertInside("operator*");
-		#endif
-		return *(this->data);
-	}
-	inline T& operator[] (intptr_t index) {
+	inline T& operator[] (intptr_t index) const {
 		T* address = this->data + index;
 		#ifdef SAFE_POINTER_CHECKS
 		assertInside("operator[]", address);
 		#endif
 		return *address;
 	}
-	inline const T& operator[] (intptr_t index) const {
-		T* address = this->data + index;
-		#ifdef SAFE_POINTER_CHECKS
-		assertInside("operator[]", address);
-		#endif
-		return *address;
-	}
-	inline void increaseBytes(intptr_t byteOffset) const {
+	inline SafePointer<T> &increaseBytes(intptr_t byteOffset) {
 		this->data = (T*)(((uint8_t*)(this->data)) + byteOffset);
+		return *this;
 	}
-	inline void increaseElements(intptr_t elementOffset) const {
-		this->data += elementOffset;
-	}
-	inline SafePointer<T>& operator+=(intptr_t elementOffset) {
+	inline SafePointer<T> &increaseElements(intptr_t elementOffset) {
 		this->data += elementOffset;
 		return *this;
 	}
-	inline const SafePointer<T>& operator+=(intptr_t elementOffset) const {
+	inline SafePointer<T>& operator+=(intptr_t elementOffset) {
 		this->data += elementOffset;
 		return *this;
 	}
@@ -220,66 +170,96 @@ public:
 		this->data -= elementOffset;
 		return *this;
 	}
-	inline const SafePointer<T>& operator-=(intptr_t elementOffset) const {
-		this->data -= elementOffset;
-		return *this;
-	}
-	inline SafePointer<T> operator+(intptr_t elementOffset) {
-		SafePointer<T> result = *this;
-		result += elementOffset;
-		return result;
-	}
-	inline const SafePointer<T> operator+(intptr_t elementOffset) const {
+	inline SafePointer<T> operator+(intptr_t elementOffset) const {
 		SafePointer<T> result = *this;
 		result += elementOffset;
 		return result;
 	}
-	inline SafePointer<T> operator-(intptr_t elementOffset) {
+	inline SafePointer<T> operator-(intptr_t elementOffset) const {
 		SafePointer<T> result = *this;
 		result -= elementOffset;
 		return result;
 	}
-	inline const SafePointer<T> operator-(intptr_t elementOffset) const {
-		SafePointer<T> result = *this;
-		result -= elementOffset;
-		return result;
+	// Copy constructor.
+	SafePointer(const SafePointer<T> &other) noexcept
+	: data(other.getUnchecked()) {
+		#ifdef SAFE_POINTER_CHECKS
+			this->header = other.header;
+			this->allocationIdentity = other.allocationIdentity;
+			this->permittedStart = other.permittedStart;
+			this->permittedEnd = other.permittedEnd;
+			this->name = other.name;
+		#endif
 	}
-	inline const SafePointer<T>& operator=(const SafePointer<T>& source) const {
-		this->data = source.data;
+	// Copy constructor from non-const to const.
+	template <typename U, DSR_ENABLE_IF(DSR_CHECK_RELATION(DsrTrait_SameType, T, const U))>
+    SafePointer(const SafePointer<U> &other) noexcept
+	: data(other.getUnchecked()) {
 		#ifdef SAFE_POINTER_CHECKS
-			this->header = source.header;
-			this->allocationIdentity = source.allocationIdentity;
-			this->regionStart = source.regionStart;
-			this->regionEnd = source.regionEnd;
-			this->name = source.name;
+			this->header = other.header;
+			this->allocationIdentity = other.allocationIdentity;
+			this->permittedStart = other.permittedStart;
+			this->permittedEnd = other.permittedEnd;
+			this->name = other.name;
+		#endif
+	}
+	// Assignment.
+	SafePointer<T>& operator = (const SafePointer<T> &other) noexcept {
+		this->data = other.getUnchecked();
+		#ifdef SAFE_POINTER_CHECKS
+			this->header = other.header;
+			this->allocationIdentity = other.allocationIdentity;
+			this->permittedStart = other.permittedStart;
+			this->permittedEnd = other.permittedEnd;
+			this->name = other.name;
+		#endif
+		return *this;
+	}
+	// Assignment from non-const to const.
+	template <typename U, DSR_ENABLE_IF(DSR_CHECK_RELATION(DsrTrait_SameType, T, const U))>
+	SafePointer<T>& operator = (const SafePointer<U> &other) noexcept {
+		this->data = other.getUnchecked();
+		#ifdef SAFE_POINTER_CHECKS
+			this->header = other.header;
+			this->allocationIdentity = other.allocationIdentity;
+			this->permittedStart = other.permittedStart;
+			this->permittedEnd = other.permittedEnd;
+			this->name = other.name;
 		#endif
 		return *this;
 	}
 };
 
 template <typename T, typename S>
-inline void safeMemoryCopy(SafePointer<T> target, const SafePointer<S>& source, intptr_t byteSize) {
+inline void safeMemoryCopy(SafePointer<T> target, SafePointer<S> source, intptr_t byteSize) {
+	T *targetPointer = target.getUnchecked();
+	const T *sourcePointer = source.getUnchecked();
 	#ifdef SAFE_POINTER_CHECKS
 		// Both target and source must be in valid memory
-		target.assertInside("memoryCopy (target)", target.getUnchecked(), (size_t)byteSize);
-		source.assertInside("memoryCopy (source)", source.getUnchecked(), (size_t)byteSize);
+		target.assertInside("memoryCopy (target)", targetPointer, (size_t)byteSize);
+		source.assertInside("memoryCopy (source)", sourcePointer, (size_t)byteSize);
 		// memcpy doesn't allow pointer aliasing
 		// TODO: Make a general assertion with the same style as out of bound exceptions
 		assert(((const uint8_t*)target.getUnchecked()) + byteSize <= (uint8_t*)source.getUnchecked() || ((const uint8_t*)source.getUnchecked()) + byteSize <= (uint8_t*)target.getUnchecked());
+		assert(targetPointer != nullptr);
+		assert(sourcePointer != nullptr);
+		assert(byteSize > 0);
 	#endif
-	std::memcpy(target.getUnchecked(), source.getUnchecked(), (size_t)byteSize);
+	std::memcpy(targetPointer, sourcePointer, (size_t)byteSize);
 }
 
 template <typename T>
-inline void safeMemorySet(SafePointer<T>& target, uint8_t value, intptr_t byteSize) {
+inline void safeMemorySet(SafePointer<T> target, uint8_t value, intptr_t byteSize) {
+	T *targetPointer = target.getUnchecked();
 	#ifdef SAFE_POINTER_CHECKS
 		// Target must be in valid memory
-		target.assertInside("memoryCopy (target)", target.getUnchecked(), byteSize);
+		target.assertInside("memoryCopy (target)", targetPointer, byteSize);
+		assert(targetPointer != nullptr);
+		assert(byteSize > 0);
 	#endif
-	std::memset((char*)(target.getUnchecked()), value, (size_t)byteSize);
+	std::memset(targetPointer, value, (size_t)byteSize);
 }
 
 }
 
 #endif
-

+ 5 - 0
Source/DFPSR/base/endian.h

@@ -40,13 +40,18 @@
 #ifndef DFPSR_ENDIAN
 #define DFPSR_ENDIAN
 	#include <cstdint>
+	#include "noSimd.h"
 	#ifdef DSR_BIG_ENDIAN
 		// TODO: Not yet tested on a big-endian machine!
+		#define ENDIAN_POS_ADDR_IMM(VALUE,OFFSET) (bitShiftRightImmediate<OFFSET>(VALUE))
+		#define ENDIAN_NEG_ADDR_IMM(VALUE,OFFSET) (bitShiftLeftImmediate<OFFSET>(VALUE))
 		#define ENDIAN_POS_ADDR(VALUE,OFFSET) ((VALUE) >> (OFFSET))
 		#define ENDIAN_NEG_ADDR(VALUE,OFFSET) ((VALUE) << (OFFSET))
 		#define ENDIAN32_BYTE_0 0xFF000000u
 		static_assert(false, "Big-endian mode has not been officially tested!");
 	#else
+		#define ENDIAN_POS_ADDR_IMM(VALUE,OFFSET) (bitShiftLeftImmediate<OFFSET>(VALUE))
+		#define ENDIAN_NEG_ADDR_IMM(VALUE,OFFSET) (bitShiftRightImmediate<OFFSET>(VALUE))
 		#define ENDIAN_POS_ADDR(VALUE,OFFSET) ((VALUE) << (OFFSET))
 		#define ENDIAN_NEG_ADDR(VALUE,OFFSET) ((VALUE) >> (OFFSET))
 		#define ENDIAN32_BYTE_0 0x000000FFu

+ 350 - 73
Source/DFPSR/base/heap.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2024 David Forsgren Piuva
+// Copyright (c) 2024 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -22,6 +22,7 @@
 //    distribution.
 
 #include "heap.h"
+#include "../api/stringAPI.h"
 #include <mutex>
 #include <thread>
 #include <stdio.h>
@@ -31,10 +32,22 @@
 #include "../settings.h"
 
 namespace dsr {
+	using HeapFlag = uint16_t;
+	using BinIndex = uint16_t;
+
 	// The framework's maximum memory alignment is either the largest float SIMD vector or the thread safe alignment.
 	static const uintptr_t heapAlignment = DSR_MAXIMUM_ALIGNMENT;
 	static const uintptr_t heapAlignmentAndMask = memory_createAlignmentAndMask(heapAlignment);
 
+	// Because locking is recursive, it is safest to just have one global mutex for allocating, freeing and manipulating use counters.
+	//   Otherwise each use counter would need to store the thread identity and recursive depth for each heap allocation.
+	static thread_local intptr_t lockDepth = 0;
+	std::mutex memoryLock;
+
+	// The free function is hidden, because all allocations are forced to use reference counting,
+	//   so that a hard exit can disable recursive calls to heap_free by incrementing all reference counters.
+	static void heap_free(void * const allocation);
+
 	// Calculates the largest power of two allocation size that does not overflow a pointer on the target platform.
 	constexpr int calculateBinCount() {
 		intptr_t p = 0;
@@ -47,12 +60,19 @@ namespace dsr {
 			p++;
 		}
 	}
+	// TODO: Leave a few bins empty in the beginning until reaching heapAlignment from a minimum alignment.
+	static const int LOWEST_BIN_INDEX = 0;
 	static const int MAX_BIN_COUNT = calculateBinCount();
 
-	static int32_t getBinIndex(uintptr_t minimumSize) {
+	inline uintptr_t getBinSize(BinIndex index) {
+		return (((uintptr_t)1) << ((uintptr_t)index)) * heapAlignment;
+	}
+
+	static BinIndex getBinIndex(uintptr_t minimumSize) {
 		for (intptr_t p = 0; p < MAX_BIN_COUNT; p++) {
-			uintptr_t result = ((uintptr_t)1 << p) * heapAlignment;
+			uintptr_t result = getBinSize(p);
 			if (result >= minimumSize) {
+				//printf("getBinIndex %i from %i\n", (int)p, (int)minimumSize);
 				return p;
 			}
 		}
@@ -60,36 +80,184 @@ namespace dsr {
 		return -1;
 	}
 
-	static const uint16_t heapFlag_recycled = 1 << 0;
+	static const HeapFlag heapFlag_recycled = 1 << 0;
 	struct HeapHeader : public AllocationHeader {
-		HeapHeader *nextRecycled = nullptr;
-		//uint64_t useCount;
-		uint16_t flags = 0;
-		uint8_t binIndex;
-		HeapHeader(uintptr_t totalSize, uint8_t binIndex)
-		: AllocationHeader(totalSize, false), binIndex(binIndex) {}
+		// Because nextRecycled and usedSize have mutually exclusive lifetimes, they can share memory location.
+		union {
+			// When allocated
+			uintptr_t usedSize; // The actual size requested.
+			// When recycled
+			HeapHeader *nextRecycled = nullptr;
+		};
+		HeapDestructor destructor;
+		uintptr_t useCount = 0; // How many handles that point to the data.
+		// TODO: Allow placing a pointer to a singleton in another heap allocation, which will simply be freed using heap_free when the owner is freed.
+		//       This allow accessing any amount of additional information shared between all handles to the same payload.
+		//       Useful when you in hindsight realize that you need more information attached to something that is already created and shared, like a value allocated image needing a texture.
+		//       Check if it already exists. If it does not exist, lock the allocation mutex and check again if it still does not exist, before allocating the singleton.
+		//         Then there will only be a mutex overhead for accessing the singleton when accessed for the first time.
+		//         Once created, it can not go away until the allocation that knows about it is gone.
+		//       If using the reference counting, the singleton could also be reused between different allocations.
+		// void *singleton = nullptr;
+		// TODO: Allow the caller to access custom bit flags in allocations.
+		// uint32_t userFlags = 0;
+		HeapFlag flags = 0; // Flags use the heapFlag_ prefix.
+		BinIndex binIndex = 0; // Recycling bin index to use when freeing the allocation.
+		HeapHeader(uintptr_t totalSize)
+		: AllocationHeader(totalSize, false, "Nameless heap allocation") {}
+		inline uintptr_t getAllocationSize() {
+			return getBinSize(this->binIndex);
+		}
+		inline uintptr_t getUsedSize() {
+			if (this->isRecycled()) {
+				return 0;
+			} else {
+				return this->usedSize;
+			}
+		}
+		inline uintptr_t setUsedSize(uintptr_t size) {
+			//printf("setUsedSize: try %i\n", (int)size);
+			//printf("  MAX_BIN_COUNT: %i\n", (int)MAX_BIN_COUNT);
+			if (!(this->isRecycled())) {
+				uintptr_t allocationSize = getAllocationSize();
+				//printf("  binIndex: %i\n", (int)this->binIndex);
+				//printf("  allocationSize: %i\n", (int)allocationSize);
+				if (size > allocationSize) {
+					//printf("  too big!\n");
+					size = allocationSize;
+				}
+				this->usedSize = size;
+				//printf("  assigned size: %i\n", (int)this->usedSize);
+				return size;
+			} else {
+				return 0;
+			}
+		}
+		inline bool isRecycled() const {
+			return (this->flags & heapFlag_recycled) != 0;
+		}
+		inline void makeRecycled() {
+			this->flags |= heapFlag_recycled;
+		}
+		inline void makeUsed() {
+			this->flags &= ~heapFlag_recycled;
+		}
 	};
 	static const uintptr_t heapHeaderPaddedSize = memory_getPaddedSize(sizeof(HeapHeader), heapAlignment);
 
-	AllocationHeader *heap_getHeader(uint8_t const * const allocation) {
-		return (AllocationHeader*)(allocation - heapHeaderPaddedSize);
+	AllocationHeader *heap_getHeader(void * const allocation) {
+		return (AllocationHeader*)((uint8_t*)allocation - heapHeaderPaddedSize);
+	}
+
+	inline HeapHeader *headerFromAllocation(void const * const allocation) {
+		return (HeapHeader *)((uint8_t*)allocation - heapHeaderPaddedSize);
+	}
+
+	inline void *allocationFromHeader(HeapHeader * const header) {
+		return ((uint8_t*)header) + heapHeaderPaddedSize;
+	}
+	inline void const *allocationFromHeader(HeapHeader const * const header) {
+		return ((uint8_t const *)header) + heapHeaderPaddedSize;
+	}
+
+	#ifdef SAFE_POINTER_CHECKS
+		void heap_setAllocationName(void * const allocation, const char *name) {
+			if (allocation != nullptr) {
+				HeapHeader *header = headerFromAllocation(allocation);
+				header->name = name;
+			}
+		}
+
+		const char * heap_getAllocationName(void * const allocation) {
+			if (allocation == nullptr) {
+				return "none";
+			} else {
+				HeapHeader *header = headerFromAllocation(allocation);
+				return header->name;
+			}
+		}
+
+		uintptr_t heap_getPaddedSize(void const * const allocation) {
+			if (allocation == nullptr) {
+				return 0;
+			} else {
+				HeapHeader *header = headerFromAllocation(allocation);
+				return memory_getPaddedSize_usingAndMask(header->getUsedSize(), heapAlignmentAndMask);
+			}
+		}
+	#endif
+
+	uintptr_t heap_getAllocationSize(void const * const allocation) {
+		HeapHeader *header = headerFromAllocation(allocation);
+		return getBinSize(header->binIndex);
+	}
+
+	uintptr_t heap_getUsedSize(void const * const allocation) {
+		uintptr_t result = 0;
+		if (allocation != nullptr) {
+			HeapHeader *header = headerFromAllocation(allocation);
+			result = header->getUsedSize();
+			//printf("  heap_getUsedSize: get %i\n", (int)result);
+		}
+		return result;
+	}
+
+	uintptr_t heap_setUsedSize(void * const allocation, uintptr_t size) {
+		uintptr_t result = 0;
+		if (allocation != nullptr) {
+			//uintptr_t allocationSize = heap_getAllocationSize(allocation);
+			HeapHeader *header = headerFromAllocation(allocation);
+			result = header->setUsedSize(size);
+			//printf("  heap_setUsedSize: try %i get %i\n", (int)size, (int)result);
+		}
+		return result;
 	}
 
-	inline HeapHeader *headerFromAllocation(uint8_t const * const allocation) {
-		return (HeapHeader*)(allocation - heapHeaderPaddedSize);
+	void heap_increaseUseCount(void const * const allocation) {
+		if (allocation != nullptr) {
+			HeapHeader *header = headerFromAllocation(allocation);
+			if (lockDepth == 0) memoryLock.lock();
+			//printf("heap_increaseUseCount called for allocation @ %ld\n", (uintptr_t)allocation);
+			//printf("    Use count: %ld -> %ld\n", header->useCount, header->useCount + 1);
+			//#ifdef SAFE_POINTER_CHECKS
+			//	printf("    ID: %lu\n", header->allocationIdentity);
+			//	printf("    Name: %s\n", header->name ? header->name : "<nameless>");
+			//#endif
+			header->useCount++;
+			if (lockDepth == 0) memoryLock.unlock();
+		}
 	}
 
-	inline uint8_t *allocationFromHeader(HeapHeader const * const header) {
-		return (uint8_t*)header + heapHeaderPaddedSize;
+	void heap_decreaseUseCount(void const * const allocation) {
+		if (allocation != nullptr) {
+			HeapHeader *header = headerFromAllocation(allocation);
+			if (lockDepth == 0) memoryLock.lock();
+			//printf("heap_decreaseUseCount called for allocation @ %ld\n", (uintptr_t)allocation);
+			//printf("    Use count: %ld -> %ld\n", header->useCount, header->useCount - 1);
+			//#ifdef SAFE_POINTER_CHECKS
+			//	printf("    ID: %lu\n", header->allocationIdentity);
+			//	printf("    Name: %s\n", header->name ? header->name : "<nameless>");
+			//#endif
+			if (header->useCount == 0) {
+				throwError(U"Heap error: Decreasing a count that is already zero!\n");
+			} else {
+				header->useCount--;
+				if (header->useCount == 0) {
+					lockDepth++;
+					heap_free((void*)allocation);
+					lockDepth--;
+				}
+			}
+			if (lockDepth == 0) memoryLock.unlock();
+		}
 	}
 
-	uint64_t heap_getAllocationSize(uint8_t const * const allocation) {
-		return headerFromAllocation(allocation)->totalSize - heapHeaderPaddedSize;
+	uintptr_t heap_getUseCount(void const * const allocation) {
+		return headerFromAllocation(allocation)->useCount;
 	}
 
 	// A block of memory where heap data can be allocated.
 	struct HeapMemory {
-		// TODO: Recycle memory using groups of allocations in power of two bytes.
 		HeapMemory *prevHeap = nullptr;
 		uint8_t *top = nullptr; // The start of the arena, where the allocation pointer is when full.
 		uint8_t *allocationPointer = nullptr; // The allocation pointer that moves from bottom to top when filling the arena.
@@ -109,27 +277,42 @@ namespace dsr {
 		}
 	};
 
+	// The total number of used heap allocations, excluding recycled memory.
+	// Only accessed when defaultHeap.poolLock is locked.
+	static intptr_t allocationCount = 0;
+
 	// The heap can have memory freed after its own destruction by telling the remaining allocations to clean up after themselves.
 	struct HeapPool {
-		std::mutex poolLock;
 		HeapMemory *lastHeap = nullptr;
 		HeapHeader *recyclingBin[MAX_BIN_COUNT] = {};
 		bool terminating = false;
+		void cleanUp() {
+			// If memory safety checks are enabled, then we should indicate that everything is fine with the memory once cleaning up.
+			//   There is however no way to distinguish between leaking memory and not yet having terminated everything, so there is no leak warning to print.
+			#ifdef SAFE_POINTER_CHECKS
+				// Can't allocate more memory after freeing all memory.
+				printf("All heap memory was freed without leaks.\n");
+			#endif
+			HeapMemory *nextHeap = this->lastHeap;
+			while (nextHeap != nullptr) {
+				HeapMemory *currentHeap = nextHeap;
+				nextHeap = currentHeap->prevHeap;
+				operator delete(currentHeap);
+			}
+			this->lastHeap = nullptr;
+		}
 		HeapPool() {}
 		~HeapPool() {
-			this->poolLock.lock();
+			memoryLock.lock();
 				this->terminating = true;
-				HeapMemory *nextHeap = this->lastHeap;
-				while (nextHeap != nullptr) {
-					HeapMemory *currentHeap = nextHeap;
-					nextHeap = currentHeap->prevHeap;
-					operator delete(currentHeap);
+				if (allocationCount == 0) {
+					this->cleanUp();
 				}
-			this->poolLock.unlock();
+			memoryLock.unlock();
 		}
 	};
 
-	static UnsafeAllocation tryToAllocate(HeapMemory &heap, uintptr_t paddedSize, uintptr_t alignmentAndMask, uint8_t binIndex) {
+	static UnsafeAllocation tryToAllocate(HeapMemory &heap, uintptr_t paddedSize, uintptr_t alignmentAndMask) {
 		UnsafeAllocation result(nullptr, nullptr);
 			uint8_t *dataPointer = (uint8_t*)(((uintptr_t)(heap.allocationPointer) - paddedSize) & alignmentAndMask);
 			AllocationHeader *headerPointer = (AllocationHeader*)(dataPointer - heapHeaderPaddedSize);
@@ -137,18 +320,18 @@ namespace dsr {
 				// There is enough space, so confirm the allocation.
 				result = UnsafeAllocation(dataPointer, headerPointer);
 				// Write data to the header.
-				*headerPointer = HeapHeader((uintptr_t)heap.allocationPointer - (uintptr_t)headerPointer, binIndex);
+				*headerPointer = HeapHeader((uintptr_t)heap.allocationPointer - (uintptr_t)headerPointer);
 				// Reserve the data in the heap by moving the allocation pointer.
 				heap.allocationPointer = (uint8_t*)headerPointer;
 			}
 		return result;
 	}
 
-	static UnsafeAllocation tryToAllocate(HeapPool &pool, uintptr_t paddedSize, uintptr_t alignmentAndMask, uint8_t binIndex) {
+	static UnsafeAllocation tryToAllocate(HeapPool &pool, uintptr_t paddedSize, uintptr_t alignmentAndMask) {
 		// Start with the most recent heap, which is most likely to have enough space.
 		HeapMemory *currentHeap = pool.lastHeap;
 		while (currentHeap != nullptr) {
-			UnsafeAllocation result = tryToAllocate(*currentHeap, paddedSize, heapAlignmentAndMask, binIndex);
+			UnsafeAllocation result = tryToAllocate(*currentHeap, paddedSize, heapAlignmentAndMask);
 			if (result.data != nullptr) {
 				return result;
 			}
@@ -163,69 +346,163 @@ namespace dsr {
 		pool.lastHeap = new HeapMemory(allocationSize);
 		pool.lastHeap->prevHeap = previousHeap;
 		// Make one last attempt at allocating the memory.
-		return tryToAllocate(*(pool.lastHeap), paddedSize, heapAlignmentAndMask, binIndex);
+		return tryToAllocate(*(pool.lastHeap), paddedSize, heapAlignmentAndMask);
 	}
 
 	static HeapPool defaultHeap;
 
 	UnsafeAllocation heap_allocate(uintptr_t minimumSize, bool zeroed) {
-		int32_t binIndex = getBinIndex(minimumSize);
 		UnsafeAllocation result(nullptr, nullptr);
+		int32_t binIndex = getBinIndex(minimumSize);
+		//printf("heap_allocate\n");
+		//printf("  minimumSize: %i\n", (int)minimumSize);
+		//printf("  binIndex: %i\n", (int)binIndex);
 		if (binIndex == -1) {
 			// If the requested allocation is so big that there is no power of two that can contain it without overflowing the address space, then it can not be allocated.
-			printf("Heap error: Exceeded the maximum size when trying to allocate!\n");
+			throwError(U"Heap error: Exceeded the maximum size when trying to allocate!\n");
 		} else {
 			uintptr_t paddedSize = ((uintptr_t)1 << binIndex) * heapAlignment;
-			defaultHeap.poolLock.lock();
-			if (!(defaultHeap.terminating)) {
-				// Look for pre-existing allocations in the recycling bins.
-				HeapHeader *binHeader = defaultHeap.recyclingBin[binIndex];
-				if (binHeader != nullptr) {
-					// Make the recycled allocation's tail into the new head.
-					defaultHeap.recyclingBin[binIndex] = binHeader->nextRecycled;
-					// Mark the allocation as not recycled. (assume that it was recycled when found in the bin)
-					binHeader->flags &= ~heapFlag_recycled;
-					result = UnsafeAllocation(allocationFromHeader(binHeader), binHeader);
-				} else {
-					// Look for a heap with enough space for a new allocation.
-					result = tryToAllocate(defaultHeap, paddedSize, heapAlignmentAndMask, binIndex);
-					if (result.data == nullptr) {
-						printf("Heap error: Failed to allocate more memory!\n");
-					}
+			if (lockDepth == 0) memoryLock.lock();
+			allocationCount++;
+			// Look for pre-existing allocations in the recycling bins.
+			HeapHeader *binHeader = defaultHeap.recyclingBin[binIndex];
+			if (binHeader != nullptr) {
+				// Make the recycled allocation's tail into the new head.
+				defaultHeap.recyclingBin[binIndex] = binHeader->nextRecycled;
+				// Clear the pointer to make room for the allocation's size in the union.
+				binHeader->nextRecycled = nullptr;
+				// Mark the allocation as not recycled. (assume that it was recycled when found in the bin)
+				binHeader->makeUsed();
+				binHeader->reuse(false, "Nameless reused allocation");
+				result = UnsafeAllocation((uint8_t*)allocationFromHeader(binHeader), binHeader);
+			} else {
+				// Look for a heap with enough space for a new allocation.
+				result = tryToAllocate(defaultHeap, paddedSize, heapAlignmentAndMask);
+				if (result.data == nullptr) {
+					throwError(U"Heap error: Failed to allocate more memory!\n");
 				}
 			}
-			defaultHeap.poolLock.unlock();
+			if (lockDepth == 0) memoryLock.unlock();
 			if (zeroed && result.data != nullptr) {
 				memset(result.data, 0, paddedSize);
 			}
 		}
+		if (result.data != nullptr) {
+			// Get the header.
+			HeapHeader *head = (HeapHeader*)(result.header);
+			// Tell the allocation where it should be recycled when freed.
+			head->binIndex = binIndex;
+			// Tell the allocation how many of the bytes that are used.
+			head->setUsedSize(minimumSize);
+			// Give a debug name to the allocation if we are debugging.
+			//printf("Allocated memory @ %ld\n", (uintptr_t)result.data);
+			//printf("    Use count = %ld\n", head->useCount);
+			//#ifdef SAFE_POINTER_CHECKS
+			//	printf("    ID = %lu\n", head->allocationIdentity);
+			//#endif
+		}
 		return result;
 	}
 
-	void heap_free(uint8_t * const allocation) {
-		defaultHeap.poolLock.lock();
-		if (!(defaultHeap.terminating)) {
-			HeapHeader *header = (HeapHeader*)(allocation - heapHeaderPaddedSize);
-			// Get the recycled allocation's header and its bin index.
-			HeapHeader *newHeader = headerFromAllocation(allocation);
-			if (newHeader->flags & heapFlag_recycled) {
-				printf("Heap error: A heap allocation was freed twice!\n");
+	void heap_setAllocationDestructor(void * const allocation, const HeapDestructor &destructor) {
+		HeapHeader *header = headerFromAllocation(allocation);		
+		header->destructor = destructor;
+	}
+
+	static void heap_free(void * const allocation) {
+		if (lockDepth == 0) memoryLock.lock();
+		// Get the recycled allocation's header.
+		HeapHeader *header = headerFromAllocation(allocation);
+		if (header->isRecycled()) {
+			throwError(U"Heap error: A heap allocation was freed twice!\n");
+		} else {
+			// Call the destructor without using the mutex (lockDepth > 0).
+			lockDepth++;
+			//printf("heap_free called for allocation @ %ld\n", (uintptr_t)allocation);
+			//printf("    Use count: %ld\n", header->useCount);
+			//#ifdef SAFE_POINTER_CHECKS
+			//	printf("    ID: %lu\n", header->allocationIdentity);
+			//	printf("    Name: %s\n", header->name ? header->name : "<nameless>");
+			//#endif
+			//printf("    Calling destructor\n");
+			// Call the destructor provided with any external resource that also needs to be freed.
+			if (header->destructor.destructor) {
+				header->destructor.destructor(allocation, header->destructor.externalResource);
+			}
+			//printf("    Finished destructor\n");
+			lockDepth--;
+			assert(lockDepth >= 0);
+			// Remove the destructor so that it is not called again for the next allocation.
+			header->destructor = HeapDestructor();
+			int binIndex = header->binIndex;
+			if (binIndex >= MAX_BIN_COUNT) {
+				throwError(U"Heap error: Out of bound recycling bin index in corrupted head of freed allocation!\n");
 			} else {
-				int binIndex = header->binIndex;
-				if (binIndex >= MAX_BIN_COUNT) {
-					printf("Heap error: Out of bound recycling bin index in corrupted head of freed allocation!\n");
-				} else {
-					// Make any previous head from the bin into the new tail.
-					HeapHeader *oldHeader = defaultHeap.recyclingBin[binIndex];
-					newHeader->nextRecycled = oldHeader;
-					// Mark the allocation as recycled.
-					newHeader->flags |= heapFlag_recycled;
-					// Store the newly recycled allocation in the bin.
-					defaultHeap.recyclingBin[binIndex] = newHeader;
-					newHeader->nextRecycled = oldHeader;
-				}
+				// Make any previous head from the bin into the new tail.
+				HeapHeader *oldHeader = defaultHeap.recyclingBin[binIndex];
+				header->nextRecycled = oldHeader;
+				// Mark the allocation as recycled.
+				header->makeRecycled();
+				#ifdef SAFE_POINTER_CHECKS
+					// Remove the allocation identity, so that use of freed memory can be detected in SafePointer and Handle.
+					header->allocationIdentity = 0;
+					header->threadHash = 0;
+				#endif
+				// Store the newly recycled allocation in the bin.
+				defaultHeap.recyclingBin[binIndex] = header;
+				header->nextRecycled = oldHeader;
+			}
+		}
+		// By decreasing the count after recursive calls to destructors, we can make sure that the arena is freed last.
+		// If a destructor allocates new memory, it will have to allocate a new arena and then clean it up again.
+		allocationCount--;
+		// If the heap has been told to terminate and we reached zero allocations, we can tell it to clean up.
+		if (defaultHeap.terminating && allocationCount == 0) {
+			defaultHeap.cleanUp();
+		}
+		if (lockDepth == 0) memoryLock.unlock();
+	}
+
+	static void forAllHeapAllocations(HeapMemory &heap, std::function<void(AllocationHeader * header, void * allocation)> callback) {
+		uint8_t * current = heap.allocationPointer;
+		while (current < heap.bottom) {
+			HeapHeader *header = (HeapHeader*)current;
+			void *payload = allocationFromHeader(header);
+			if (!(header->isRecycled())) {
+				callback(header, payload);
 			}
+			current += header->totalSize;
 		}
-		defaultHeap.poolLock.unlock();
+	}
+
+	void heap_forAllHeapAllocations(std::function<void(AllocationHeader * header, void * allocation)> callback) {
+		HeapMemory *currentHeap = defaultHeap.lastHeap;
+		while (currentHeap != nullptr) {
+			forAllHeapAllocations(*currentHeap, callback);
+			currentHeap = currentHeap->prevHeap;
+		}
+	}
+
+	void heap_hardExitCleaning() {
+		// TODO:
+		// * Implement a function that iterates over all heap allocations.
+		// * Increment the use count for each allocation, to prevent recursive freeing of resources.
+		// * Call the destructor on each allocation without freeing any memory, while all memory is still available.
+		// Then the arenas can safely be deallocated without looking at individual allocations again.
+		allocationCount = 0;
+		defaultHeap.terminating = true;
+		defaultHeap.cleanUp();
+	}
+
+	void impl_throwAllocationFailure() {
+		throwError(U"Failed to allocate memory for a new object!\n");
+	}
+
+	void impl_throwNullException() {
+		throwError(U"Null handle exception!\n");
+	}
+
+	void impl_throwIdentityMismatch(uint64_t allocationIdentity, uint64_t pointerIdentity) {
+		throwError(U"Identity mismatch! The allocation pointed to had identity ", allocationIdentity, U" but ", pointerIdentity, U" was expected by the pointer from when it was allocated.\n");
 	}
 }

+ 106 - 12
Source/DFPSR/base/heap.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2024 David Forsgren Piuva
+// Copyright (c) 2024 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -21,36 +21,130 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
+// An arena memory allocator with recycling bins to provide heap memory.
+//   All allocations are reference counted, because the memory allocator itself may increase the reference count as needed.
+//     * An allocation with use count 0 will remain until the use count changes and reaches zero again.
+//   All allocations are aligned to DSR_MAXIMUM_ALIGNMENT to prevent false sharing of cache lines between threads.
+//   The space in front of each allocation contains a HeapHeader including:
+//     * The total size of the allocation including padding and the header.
+//     * How many of the allocated bytes that are actually used.
+//     * A reference counter.
+//     * A destructor to allow freeing the memory no matter where the reference counter decreases to zero.
+//     * A bin index for fast recycling of memory.
+//     * Bit flags for keeping track of the allocation's state.
+//   In debug mode, the header also contains:
+//     * A thread hash to keep track of data ownership.
+//       All heap allocations are currently shared among all threads unlike virtual stack memory.
+//     * An allocation identity that is unique for each new allocation.
+//       When freed, the header's allocation identity is set to zero to prevent accidental use of freed memory.
+//       When recycled as a new allocation, the same address gets a new identity, which invalidates any old SafePointer to the same address.
+//     * An ascii name to make memory debugging easier.
+
+// Dimensions
+//   used size <= padded size <= allocation size
+// * Size defines the used bytes that represent something, which affects destruction of elements.
+//   Size can change from 0 to allocation size without having to move the data.
+// * The padded size defines the region where memory access is allowed for SafePointer.
+//   Padded size is computed by rounding up to whole blocks of DSR_MAXIMUM_ALIGNMENT.
+// * Allocation size is the available space to work with.
+//   To change the allocation's size, one must move to another memory location.
+
 #ifndef DFPSR_HEAP
 #define DFPSR_HEAP
 
 #include "SafePointer.h"
+#include <functional>
 
 namespace dsr {
+	// TODO: Replace with a lambda printing the name from capture and optional serialized content, because memory efficiency is not required in debug mode.
+	#ifdef SAFE_POINTER_CHECKS
+		// Assign a debug name to the allocation.
+		//   Does nothing if allocation is nullptr.
+		//   Only assign constant ascii string literals.
+		void heap_setAllocationName(void * const allocation, const char *name);
+		// Get the ascii name of allocation, or "none" if allocation is nullptr.
+		const char * heap_getAllocationName(void * const allocation);
+		// Gets the size padded out to whole blocks of DSR_MAXIMUM_ALIGNMENT, for constructing a SafePointer.
+		uintptr_t heap_getPaddedSize(void const * const allocation);
+	#endif
+
+	// TODO: Allow allocating fixed size allocations using a typename and pre-calculate the bin index in compile time.
+	//       This requires the bin sizes to be independent of DSR_MAXIMUM_ALIGNMENT, possibly by leaving a few bins
+	//       unused in the beginning and start counting with the header size as the smallest allocation size.
 	// Allocate memory in the heap.
 	//   The minimumSize argument is the minimum number of bytes to allocate, but the result may give you more than you asked for.
+	//   To allow representing empty files using buffers, it is allowed to create an allocation of zero bytes.
 	//   When zeroed is true, the new memory will be zeroed. Otherwise it may contain uninitialized data.
 	// Post-condition: Returns pointers to the payload and header.
-	UnsafeAllocation heap_allocate(uint64_t minimumSize, bool zeroed = true);
+	UnsafeAllocation heap_allocate(uintptr_t minimumSize, bool zeroed = true);
+
+	// Increase the use count of an allocation.
+	//   Does nothing if the allocation is nullptr.
+	void heap_increaseUseCount(void const * const allocation);
+
+	// Decrease the use count of an allocation and recycle it when reaching zero.
+	//   Does nothing if the allocation is nullptr.
+	void heap_decreaseUseCount(void const * const allocation);
+
+	// Pre-condition:
+	//   allocation points to memory allocated as heap_allocate(...).data because this feature is specific to this allocator.
+	// Post-condition:
+	//   Returns the number of bytes in the allocation that are actually used, which is used for tight bound checks and knowing how large a buffer is.
+	//   Returns 0 if allocation is nullptr.
+	uintptr_t heap_getUsedSize(void const * const allocation);
+
+	// Side-effect:
+	//   Assigns a new used size to allocation.
+	//   Has no effect if allocation is nullptr.
+	// Pre-condition:
+	//   You may not reserve more memory than what is available in total.
+	//   size <= heap_getAllocationSize(allocation)
+	//   If exceeded, size will be limited by the allocation's size.
+	// Post-condition:
+	//   Returns the assigned size, which is the given size, an exceeded allocation size, or zero for an allocation that does not exist.
+	uintptr_t heap_setUsedSize(void * const allocation, uintptr_t size);
+
+	// A function pointer for destructors.
+	using HeapDestructorPointer = void(*)(void *toDestroy, void *externalResource);
+	struct HeapDestructor {
+		// A function pointer to a method taking toDestroy and externalResource as arguments.
+		HeapDestructorPointer destructor = nullptr;
+		// A pointer for freeing external resources owning the allocation.
+		void *externalResource = nullptr;
+		// Constructor.
+		HeapDestructor(HeapDestructorPointer destructor = nullptr, void *externalResource = nullptr)
+		: destructor(destructor), externalResource(externalResource) {}
+	};
+
+	// Register a destructor function pointer to be called automatically when the allocation is freed.
+	//   externalResource is the second argument that will be given to destructor together with the freed memory to destruct.
+	void heap_setAllocationDestructor(void * const allocation, const HeapDestructor &destructor);
+
+	// Get the use count outside of transactions without locking.
+	uintptr_t heap_getUseCount(void const * const allocation);
 
 	// Pre-condition: The allocation pointer must point to the start of a payload allocated using heap_allocate, no offsets nor other allocators allowed.
 	// Post-condition: Returns the number of available bytes in the allocation.
 	//                 You may not read a single byte outside of it, because it might include padding that ends at uneven addresses.
 	//                 To use more memory than requested, you must round it down to whole elements.
 	//                 If the element's size is a power of two, you can pre-compute a bit mask using memory_createAlignmentAndMask for rounding down.
-	uint64_t heap_getAllocationSize(uint8_t const * const allocation);
+	uintptr_t heap_getAllocationSize(void const * const allocation);
 
 	// Pre-condition: The allocation pointer must point to the start of a payload allocated using heap_allocate, no offsets nor other allocators allowed.
 	// Post-condition: Returns a pointer to the heap allocation's header, which is used to construct safe pointers.
-	AllocationHeader *heap_getHeader(uint8_t const * const allocation);
-
-	// Only a pointer is needed, so that it can be sent as a function pointer to X11.
-	// TODO: Use the allocation head's alignment as the minimum alignment by combining the masks in compile time.
-	//       Then it is possible to place the padded allocation header for heap memory at a fixed offset from the allocation start, so that the head can be accessed.
-	//       No extra offsets are allowed on the pointer used to free the memory.
-	// TODO: Have a global variable containing the default memory pool.
-	//       When it is destructed, all allocations that are empty will be freed and a termination flag will be enabled so that any more allocations being freed after it will free the memory themselves.
-	void heap_free(uint8_t * const allocation);
+	AllocationHeader *heap_getHeader(void * const allocation);
+
+	// Call back for each used heap allocation.
+	//   Recycled allocations are not included.
+	void heap_forAllHeapAllocations(std::function<void(AllocationHeader * header, void * allocation)> callback);
+
+	// If terminating the program using std::exit, you can call this first to free all heap memory in the allocator, leaked or not.
+	void heap_hardExitCleaning();
+
+	// Helper methods to prevent cyclic dependencies between strings and buffers when handles must be inlined for performance. Do not call these yourself.
+	void impl_throwAllocationFailure();
+	void impl_throwNullException();
+	void impl_throwIdentityMismatch(uint64_t allocationIdentity, uint64_t pointerIdentity);
 }
 
 #endif

+ 25 - 15
Source/DFPSR/base/memory.h

@@ -36,6 +36,10 @@ namespace dsr {
 	struct AllocationHeader {
 		uintptr_t totalSize; // Size of both header and payload.
 		#ifdef SAFE_POINTER_CHECKS
+			// TODO: Replace the name with a function pointer serializing the buffer's data into a human readable format.
+			//       Because it is only for the debug version, lambdas with capture may be used to store additional information.
+			//       If string_toStreamIndented has been defined for the type, it should try to use it if no serialization function was provided manually.
+			const char *name = nullptr; // Debug name of the allocation.
 			uint64_t threadHash; // Hash of the owning thread identity for thread local memory, 0 for shared memory.
 			uint64_t allocationIdentity; // Rotating identity of the allocation, to know if the memory has been freed and reused within a memory allocator.
 		#endif
@@ -43,32 +47,38 @@ namespace dsr {
 		AllocationHeader();
 		// Header for allocated memory.
 		// threadLocal should be true iff the memory may not be accessed from other threads, such as virtual stack memory.
-		AllocationHeader(uintptr_t totalSize, bool threadLocal);
+		AllocationHeader(uintptr_t totalSize, bool threadLocal, const char *name);
+		// Give a new identity to a reused allocation header.
+		void reuse(bool threadLocal, const char *name);
 	};
 
 	// A structure used to allocate memory before placing the content in SafePointer.
 	struct UnsafeAllocation {
 		uint8_t *data;
-		#ifdef SAFE_POINTER_CHECKS
-			AllocationHeader *header;
-			UnsafeAllocation(uint8_t *data, AllocationHeader *header)
-			: data(data), header(header) {}
-		#else
-			UnsafeAllocation(uint8_t *data, AllocationHeader *header)
-			: data(data) {}
-		#endif
+		AllocationHeader *header;
+		UnsafeAllocation(uint8_t *data, AllocationHeader *header)
+		: data(data), header(header) {}
 	};
 
+	// Post-condition: Returns size rounded up by (~alignmentAndMask) + 1.
+	constexpr inline uintptr_t memory_getPaddedSize_usingAndMask(uintptr_t size, uintptr_t alignmentAndMask) {
+		// The bitwise negation of alignmentAndMask equals the alignment minus one, which is just what we need to add before truncating down using the and mask.
+		return (size + ~alignmentAndMask) & alignmentAndMask;
+	}
+
+	// Pre-condition: The alignment argument must be a power of two (1, 2, 4, 8, 16, 32, 64...).
 	// Post-condition: Returns size rounded up by alignment.
-	constexpr uint64_t memory_getPaddedSize(uint64_t size, uint64_t alignment) {
-		// Round up with unsigned integers.
-		return size + (alignment - 1) - ((size - 1) % alignment);
+	constexpr inline uintptr_t memory_getPaddedSize(uintptr_t size, uintptr_t alignment) {
+		// For integers, you can round up to multiples of alignment, by adding alignment - 1 and rounding down.
+		// When rounding down for a power of two, you can bit mask away the least significant bits.
+		uintptr_t roundedBits = alignment - 1;
+		return (size + roundedBits) & ~roundedBits;
 	}
 
 	// Post-condition: Returns the size of T rounded up by T's own alignment, which becomes the stride between elements in a memory aligned array.
 	template <typename T>
-	constexpr uint64_t memory_getPaddedSize() {
-		return memory_getPaddedSize((uint64_t)sizeof(T), (uint64_t)alignof(T));
+	constexpr inline uintptr_t memory_getPaddedSize() {
+		return memory_getPaddedSize((uintptr_t)sizeof(T), (uintptr_t)alignof(T));
 	}
 
 	// Create a mask for aligning memory in descending address space.
@@ -79,7 +89,7 @@ namespace dsr {
 	//   alignment is a power of two (1, 2, 4, 8, 16, 32, 64...)
 	// Post-condition:
 	//   Returns a bit mask for rounding an integer down to the closest multiple of alignment.
-	constexpr uintptr_t memory_createAlignmentAndMask(uintptr_t alignment) {
+	constexpr inline uintptr_t memory_createAlignmentAndMask(uintptr_t alignment) {
 		// alignment = ...00001000...
 		// Subtracting one from a power of two gives a mask with ones for the remainder bits.
 		// remainder = ...00000111...

+ 112 - 0
Source/DFPSR/base/noSimd.h

@@ -0,0 +1,112 @@
+// zlib open source license
+//
+// Copyright (c) 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// Functions used to simplify template programming when using functions both with and without simd.h.
+
+#ifndef DFPSR_NO_SIMD
+#define DFPSR_NO_SIMD
+
+#include <stdint.h>
+#include "SafePointer.h"
+
+namespace dsr {
+	// Type conversions.
+	inline int32_t truncateToI32(float value) { return (int32_t)value; }
+	inline uint32_t truncateToU32(float value) { return (uint32_t)value; }
+	inline float floatFromI32(int32_t value) { return (float)value; }
+	inline float floatFromU32(uint32_t value) { return (float)value; }
+	inline int32_t I32FromU32(uint32_t value) { return (int32_t)value; }
+	inline uint32_t U32FromI32(int32_t value) { return (uint32_t)value; }
+
+	// Memory read operations.
+	inline uint32_t gather_U32(dsr::SafePointer<const uint32_t> data, const uint32_t &elementOffset) { return data[elementOffset]; }
+	inline int32_t gather_I32(dsr::SafePointer<const int32_t> data, const uint32_t &elementOffset) { return data[elementOffset]; }
+	inline float gather_F32(dsr::SafePointer<const float> data, const uint32_t &elementOffset) { return data[elementOffset]; }
+
+	// Comparisons between all lanes, which is one lane for scalar types.
+	inline bool allLanesEqual         (const  uint8_t& left,  const uint8_t& right) { return left == right; }
+	inline bool allLanesEqual         (const uint16_t& left, const uint16_t& right) { return left == right; }
+	inline bool allLanesEqual         (const uint32_t& left, const uint32_t& right) { return left == right; }
+	inline bool allLanesEqual         (const  int32_t& left, const  int32_t& right) { return left == right; }
+	inline bool allLanesEqual         (const    float& left, const    float& right) { return abs(left - right) < 0.0001f; }
+	inline bool allLanesNotEqual      (const  uint8_t& left, const  uint8_t& right) { return left != right; }
+	inline bool allLanesNotEqual      (const uint16_t& left, const uint16_t& right) { return left != right; }
+	inline bool allLanesNotEqual      (const uint32_t& left, const uint32_t& right) { return left != right; }
+	inline bool allLanesNotEqual      (const  int32_t& left, const  int32_t& right) { return left != right; }
+	inline bool allLanesNotEqual      (const    float& left, const    float& right) { return abs(left - right) >= 0.0001f; }
+	inline bool allLanesGreater       (const  uint8_t& left, const  uint8_t& right) { return left >  right; }
+	inline bool allLanesGreater       (const uint16_t& left, const uint16_t& right) { return left >  right; }
+	inline bool allLanesGreater       (const uint32_t& left, const uint32_t& right) { return left >  right; }
+	inline bool allLanesGreater       (const  int32_t& left, const  int32_t& right) { return left >  right; }
+	inline bool allLanesGreater       (const    float& left, const    float& right) { return left >  right; }
+	inline bool allLanesGreaterOrEqual(const  uint8_t& left, const  uint8_t& right) { return left >= right; }
+	inline bool allLanesGreaterOrEqual(const uint16_t& left, const uint16_t& right) { return left >= right; }
+	inline bool allLanesGreaterOrEqual(const uint32_t& left, const uint32_t& right) { return left >= right; }
+	inline bool allLanesGreaterOrEqual(const  int32_t& left, const  int32_t& right) { return left >= right; }
+	inline bool allLanesGreaterOrEqual(const    float& left, const    float& right) { return left >= right; }
+	inline bool allLanesLesser        (const  uint8_t& left, const  uint8_t& right) { return left <  right; }
+	inline bool allLanesLesser        (const uint16_t& left, const uint16_t& right) { return left <  right; }
+	inline bool allLanesLesser        (const uint32_t& left, const uint32_t& right) { return left <  right; }
+	inline bool allLanesLesser        (const  int32_t& left, const  int32_t& right) { return left <  right; }
+	inline bool allLanesLesser        (const    float& left, const    float& right) { return left <  right; }
+	inline bool allLanesLesserOrEqual (const  uint8_t& left, const  uint8_t& right) { return left <= right; }
+	inline bool allLanesLesserOrEqual (const uint16_t& left, const uint16_t& right) { return left <= right; }
+	inline bool allLanesLesserOrEqual (const uint32_t& left, const uint32_t& right) { return left <= right; }
+	inline bool allLanesLesserOrEqual (const  int32_t& left, const  int32_t& right) { return left <= right; }
+	inline bool allLanesLesserOrEqual (const    float& left, const    float& right) { return left <= right; }
+
+	template <uint32_t bitOffset>
+	inline uint32_t bitShiftLeftImmediate(const uint32_t& left) {
+		static_assert(bitOffset < 32u);
+		return left << bitOffset;
+	}
+	template <uint32_t bitOffset>
+	inline uint32_t bitShiftRightImmediate(const uint32_t& left) {
+		static_assert(bitOffset < 32u);
+		return left >> bitOffset;
+	}
+	template <uint16_t bitOffset>
+	inline uint16_t bitShiftLeftImmediate(const uint16_t& left) {
+		static_assert(bitOffset < 16u);
+		return left << bitOffset;
+	}
+	template <uint16_t bitOffset>
+	inline uint16_t bitShiftRightImmediate(const uint16_t& left) {
+		static_assert(bitOffset < 16u);
+		return left >> bitOffset;
+	}
+	template <uint8_t bitOffset>
+	inline uint8_t bitShiftLeftImmediate(const uint8_t& left) {
+		static_assert(bitOffset < 8u);
+		return left << bitOffset;
+	}
+	template <uint8_t bitOffset>
+	inline uint8_t bitShiftRightImmediate(const uint8_t& left) {
+		static_assert(bitOffset < 8u);
+		return left >> bitOffset;
+	}
+
+	// TODO: Add more functions from simd.h.
+}
+
+#endif

+ 454 - 35
Source/DFPSR/base/simd.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2023 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -87,14 +87,19 @@
 	#include "../math/FVector.h"
 	#include "../math/IVector.h"
 	#include "../math/UVector.h"
-
-	// Get settings from here.
+	#include "DsrTraits.h"
 	#include "../settings.h"
+	#include "../base/noSimd.h"
 
 	// Alignment in bytes
 	#define ALIGN_BYTES(SIZE)  __attribute__((aligned(SIZE)))
 	#define ALIGN16 ALIGN_BYTES(16) // 128-bit alignment
 	#define ALIGN32 ALIGN_BYTES(32) // 256-bit alignment
+	#define ALIGN64 ALIGN_BYTES(64) // 512-bit alignment
+	#define ALIGN128 ALIGN_BYTES(128) // 1024-bit alignment
+	#define ALIGN256 ALIGN_BYTES(256) // 2048-bit alignment
+
+	namespace dsr {
 
 	// Everything declared in here handles things specific for SSE.
 	// Direct use of the macros will not provide portability to all hardware.
@@ -507,7 +512,7 @@
 			}
 		#endif
 		// Bound and alignment checked reading
-		static inline F32x4 readAligned(const dsr::SafePointer<float> data, const char* methodName) {
+		static inline F32x4 readAligned(dsr::SafePointer<const float> data, const char* methodName) {
 			const float* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 15) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -727,7 +732,7 @@
 			}
 		#endif
 		// Bound and alignment checked reading
-		static inline I32x4 readAligned(const dsr::SafePointer<int32_t> data, const char* methodName) {
+		static inline I32x4 readAligned(dsr::SafePointer<const int32_t> data, const char* methodName) {
 			const int32_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 15) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -824,7 +829,7 @@
 			}
 		#endif
 		// Bound and alignment checked reading
-		static inline U32x4 readAligned(const dsr::SafePointer<uint32_t> data, const char* methodName) {
+		static inline U32x4 readAligned(dsr::SafePointer<const uint32_t> data, const char* methodName) {
 			const uint32_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 15) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -975,7 +980,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline U16x8 readAligned(const dsr::SafePointer<uint16_t> data, const char* methodName) {
+		static inline U16x8 readAligned(dsr::SafePointer<const uint16_t> data, const char* methodName) {
 			const uint16_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 15) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1127,7 +1132,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline U8x16 readAligned(const dsr::SafePointer<uint8_t> data, const char* methodName) {
+		static inline U8x16 readAligned(dsr::SafePointer<const uint8_t> data, const char* methodName) {
 			const uint8_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 15) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1236,7 +1241,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline F32x8 readAligned(const dsr::SafePointer<float> data, const char* methodName) {
+		static inline F32x8 readAligned(dsr::SafePointer<const float> data, const char* methodName) {
 			const float* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 31) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1488,7 +1493,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline I32x8 readAligned(const dsr::SafePointer<int32_t> data, const char* methodName) {
+		static inline I32x8 readAligned(dsr::SafePointer<const int32_t> data, const char* methodName) {
 			const int32_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 31) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1597,7 +1602,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline U32x8 readAligned(const dsr::SafePointer<uint32_t> data, const char* methodName) {
+		static inline U32x8 readAligned(dsr::SafePointer<const uint32_t> data, const char* methodName) {
 			const uint32_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 31) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1803,7 +1808,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline U16x16 readAligned(const dsr::SafePointer<uint16_t> data, const char* methodName) {
+		static inline U16x16 readAligned(dsr::SafePointer<const uint16_t> data, const char* methodName) {
 			const uint16_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 31) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -1960,7 +1965,7 @@
 			#endif
 		}
 		// Bound and alignment checked reading
-		static inline U8x32 readAligned(const dsr::SafePointer<uint8_t> data, const char* methodName) {
+		static inline U8x32 readAligned(dsr::SafePointer<const uint8_t> data, const char* methodName) {
 			const uint8_t* pointer = data.getUnsafe();
 			assert(((uintptr_t)pointer & 31) == 0);
 			#if defined SAFE_POINTER_CHECKS
@@ -2032,9 +2037,9 @@
 		FOR_ALL_VECTOR_TYPES(CREATE_METHOD_PRINT)
 	#undef CREATE_METHOD_PRINT
 
-	// Whole comparisons returning a single boolean, mainly for regression tests.
+	// Integer equality returns true iff all comparisons are identical.
 	#define CREATE_EXACT_EQUALITY(VECTOR_TYPE, ELEMENT_TYPE, LANE_COUNT) \
-		inline bool operator==(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+		inline bool allLanesEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
 			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
 			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
 			left.writeAlignedUnsafe(a); \
@@ -2043,13 +2048,24 @@
 				if (a[i] != b[i]) return false; \
 			} \
 			return true; \
+		} \
+		inline bool allLanesNotEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (a[i] == b[i]) return false; \
+			} \
+			return true; \
 		}
 		// Integer SIMD vectors have exact equlity.
 		FOR_INTEGER_VECTOR_TYPES(CREATE_EXACT_EQUALITY)
 	#undef CREATE_EXACT_EQUALITY
 
+	// Float equality returns true iff all comparisons are near.
 	#define CREATE_TOLERANT_EQUALITY(VECTOR_TYPE, ELEMENT_TYPE, LANE_COUNT) \
-		inline bool operator==(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+		inline bool allLanesEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
 			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
 			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
 			left.writeAlignedUnsafe(a); \
@@ -2058,18 +2074,64 @@
 				if (fabs(a[i] - b[i]) >= 0.0001f) return false; \
 			} \
 			return true; \
+		} \
+		inline bool allLanesNotEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (fabs(a[i] - b[i]) < 0.0001f) return false; \
+			} \
+			return true; \
 		}
 		// Float SIMD vectors have inexact equality.
 		FOR_FLOAT_VECTOR_TYPES(CREATE_TOLERANT_EQUALITY)
 	#undef CREATE_TOLERANT_EQUALITY
 
-	#define CREATE_INEQUALITY(VECTOR_TYPE, ELEMENT_TYPE, LANE_COUNT) \
-		inline bool operator!=(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
-			return !(left == right); \
+	#define CREATE_COMPARISONS(VECTOR_TYPE, ELEMENT_TYPE, LANE_COUNT) \
+		inline bool allLanesGreater(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (a[i] <= b[i]) return false; \
+			} \
+			return true; \
+		} \
+		inline bool allLanesGreaterOrEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (a[i] < b[i]) return false; \
+			} \
+			return true; \
+		} \
+		inline bool allLanesLesser(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (a[i] >= b[i]) return false; \
+			} \
+			return true; \
+		} \
+		inline bool allLanesLesserOrEqual(const VECTOR_TYPE& left, const VECTOR_TYPE& right) { \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE a[LANE_COUNT]; \
+			ALIGN_BYTES(sizeof(VECTOR_TYPE)) ELEMENT_TYPE b[LANE_COUNT]; \
+			left.writeAlignedUnsafe(a); \
+			right.writeAlignedUnsafe(b); \
+			for (int i = 0; i < LANE_COUNT; i++) { \
+				if (a[i] > b[i]) return false; \
+			} \
+			return true; \
 		}
-		// All SIMD vectors have inequality.
-		FOR_ALL_VECTOR_TYPES(CREATE_INEQUALITY)
-	#undef CREATE_INEQUALITY
+		FOR_ALL_VECTOR_TYPES(CREATE_COMPARISONS)
+	#undef CREATE_COMPARISONS
 
 	inline F32x4 operator+(const F32x4& left, const F32x4& right) {
 		#if defined USE_BASIC_SIMD
@@ -2149,7 +2211,7 @@
 	inline I32x4 operator*(const I32x4& left, const I32x4& right) {
 		#if defined USE_BASIC_SIMD
 			#if defined USE_SSE2
-				// Emulate a NEON instruction
+				// TODO: Use AVX2 for 32-bit integer multiplication when available.
 				return I32x4(left.scalars[0] * right.scalars[0], left.scalars[1] * right.scalars[1], left.scalars[2] * right.scalars[2], left.scalars[3] * right.scalars[3]);
 			#elif defined USE_NEON
 				return I32x4(MUL_I32_NEON(left.v, right.v));
@@ -2158,6 +2220,7 @@
 			return I32x4(left.scalars[0] * right.scalars[0], left.scalars[1] * right.scalars[1], left.scalars[2] * right.scalars[2], left.scalars[3] * right.scalars[3]);
 		#endif
 	}
+	// TODO: Specify the behavior of truncated unsigned integer overflow and add it to the tests.
 	inline U32x4 operator+(const U32x4& left, const U32x4& right) {
 		#if defined USE_BASIC_SIMD
 			return U32x4(ADD_U32_SIMD(left.v, right.v));
@@ -2184,6 +2247,7 @@
 			return U32x4(left.scalars[0] * right.scalars[0], left.scalars[1] * right.scalars[1], left.scalars[2] * right.scalars[2], left.scalars[3] * right.scalars[3]);
 		#endif
 	}
+	// Bitwise and
 	inline U32x4 operator&(const U32x4& left, const U32x4& right) {
 		#if defined USE_BASIC_SIMD
 			return U32x4(BITWISE_AND_U32_SIMD(left.v, right.v));
@@ -2191,6 +2255,7 @@
 			return U32x4(left.scalars[0] & right.scalars[0], left.scalars[1] & right.scalars[1], left.scalars[2] & right.scalars[2], left.scalars[3] & right.scalars[3]);
 		#endif
 	}
+	// Bitwise or
 	inline U32x4 operator|(const U32x4& left, const U32x4& right) {
 		#if defined USE_BASIC_SIMD
 			return U32x4(BITWISE_OR_U32_SIMD(left.v, right.v));
@@ -2198,6 +2263,7 @@
 			return U32x4(left.scalars[0] | right.scalars[0], left.scalars[1] | right.scalars[1], left.scalars[2] | right.scalars[2], left.scalars[3] | right.scalars[3]);
 		#endif
 	}
+	// Bitwise xor
 	inline U32x4 operator^(const U32x4& left, const U32x4& right) {
 		#if defined USE_BASIC_SIMD
 			return U32x4(BITWISE_XOR_U32_SIMD(left.v, right.v));
@@ -2205,7 +2271,47 @@
 			return U32x4(left.scalars[0] ^ right.scalars[0], left.scalars[1] ^ right.scalars[1], left.scalars[2] ^ right.scalars[2], left.scalars[3] ^ right.scalars[3]);
 		#endif
 	}
-	inline U32x4 operator<<(const U32x4& left, uint32_t bitOffset) {
+	// Bitwise negation
+	inline U32x4 operator~(const U32x4& value) {
+		#if defined USE_NEON
+			return U32x4(vmvnq_u32(value.v));
+		#elif defined USE_BASIC_SIMD
+			// Fall back on xor against all ones.
+			return value ^ U32x4(~uint32_t(0));
+		#else
+			return U32x4(~value.scalars[0], ~value.scalars[1], ~value.scalars[2], ~value.scalars[3]);
+		#endif
+	}
+	inline U32x4 operator<<(const U32x4& left, const U32x4 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U32x4(32u)));
+		#if defined USE_NEON
+			return U32x4(vshlq_u32(left.v, vreinterpretq_s32_u32(bitOffsets.v)));
+		#else
+			return U32x4(
+			  left.scalars[0] << bitOffsets.scalars[0],
+			  left.scalars[1] << bitOffsets.scalars[1],
+			  left.scalars[2] << bitOffsets.scalars[2],
+			  left.scalars[3] << bitOffsets.scalars[3]
+			);
+		#endif
+	}
+	inline U32x4 operator>>(const U32x4& left, const U32x4 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U32x4(32u)));
+		#if defined USE_NEON
+			return U32x4(vshrq_u32(left.v, vreinterpretq_s32_u32(bitOffsets.v)));
+		#else
+			return U32x4(
+			  left.scalars[0] >> bitOffsets.scalars[0],
+			  left.scalars[1] >> bitOffsets.scalars[1],
+			  left.scalars[2] >> bitOffsets.scalars[2],
+			  left.scalars[3] >> bitOffsets.scalars[3]
+			);
+		#endif
+	}
+	// bitOffset must be an immediate constant, so a template argument is used.
+	template <uint32_t bitOffset>
+	inline U32x4 bitShiftLeftImmediate(const U32x4& left) {
+		static_assert(bitOffset < 32u);
 		#if defined USE_SSE2
 			return U32x4(_mm_slli_epi32(left.v, bitOffset));
 		#else
@@ -2216,17 +2322,211 @@
 			#endif
 		#endif
 	}
-	inline U32x4 operator>>(const U32x4& left, uint32_t bitOffset) {
+	// bitOffset must be an immediate constant.
+	template <uint32_t bitOffset>
+	inline U32x4 bitShiftRightImmediate(const U32x4& left) {
+		static_assert(bitOffset < 32u);
 		#if defined USE_SSE2
 			return U32x4(_mm_srli_epi32(left.v, bitOffset));
 		#else
 			#if defined USE_NEON
-				return U32x4(vshlq_u32(left.v, LOAD_SCALAR_I32_SIMD(-bitOffset)));
+				return U32x4(vshrq_u32(left.v, LOAD_SCALAR_I32_SIMD(bitOffset)));
 			#else
 				return U32x4(left.scalars[0] >> bitOffset, left.scalars[1] >> bitOffset, left.scalars[2] >> bitOffset, left.scalars[3] >> bitOffset);
 			#endif
 		#endif
 	}
+	
+	inline U16x8 operator<<(const U16x8& left, const U16x8 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U16x8(16u)));
+		#if defined USE_NEON
+			return U16x8(vshlq_u16(left.v, vreinterpretq_s16_u16(bitOffsets.v)));
+		#else
+			return U16x8(
+			  left.scalars[0] << bitOffsets.scalars[0],
+			  left.scalars[1] << bitOffsets.scalars[1],
+			  left.scalars[2] << bitOffsets.scalars[2],
+			  left.scalars[3] << bitOffsets.scalars[3],
+			  left.scalars[4] << bitOffsets.scalars[4],
+			  left.scalars[5] << bitOffsets.scalars[5],
+			  left.scalars[6] << bitOffsets.scalars[6],
+			  left.scalars[7] << bitOffsets.scalars[7]
+			);
+		#endif
+	}
+	inline U16x8 operator>>(const U16x8& left, const U16x8 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U16x8(16u)));
+		#if defined USE_NEON
+			return U16x8(vshrq_u16(left.v, vreinterpretq_s16_u16(bitOffsets.v)));
+		#else
+			return U16x8(
+			  left.scalars[0] >> bitOffsets.scalars[0],
+			  left.scalars[1] >> bitOffsets.scalars[1],
+			  left.scalars[2] >> bitOffsets.scalars[2],
+			  left.scalars[3] >> bitOffsets.scalars[3],
+			  left.scalars[4] >> bitOffsets.scalars[4],
+			  left.scalars[5] >> bitOffsets.scalars[5],
+			  left.scalars[6] >> bitOffsets.scalars[6],
+			  left.scalars[7] >> bitOffsets.scalars[7]
+			);
+		#endif
+	}
+	// bitOffset must be an immediate constant, so a template argument is used.
+	template <uint32_t bitOffset>
+	inline U16x8 bitShiftLeftImmediate(const U16x8& left) {
+		static_assert(bitOffset < 16u);
+		#if defined USE_SSE2
+			return U16x8(_mm_slli_epi16(left.v, bitOffset));
+		#else
+			#if defined USE_NEON
+				return U16x8(vshlq_u32(left.v, vdupq_n_s16(int16_t(bitOffset))));
+			#else
+				return U16x8(
+				  left.scalars[0] << bitOffset,
+				  left.scalars[1] << bitOffset,
+				  left.scalars[2] << bitOffset,
+				  left.scalars[3] << bitOffset,
+				  left.scalars[4] << bitOffset,
+				  left.scalars[5] << bitOffset,
+				  left.scalars[6] << bitOffset,
+				  left.scalars[7] << bitOffset
+				);
+			#endif
+		#endif
+	}
+	// bitOffset must be an immediate constant.
+	template <uint32_t bitOffset>
+	inline U16x8 bitShiftRightImmediate(const U16x8& left) {
+		static_assert(bitOffset < 16u);
+		#if defined USE_SSE2
+			return U16x8(_mm_srli_epi16(left.v, bitOffset));
+		#else
+			#if defined USE_NEON
+				return U16x8(vshrq_u32(left.v, vdupq_n_s16(int16_t(bitOffset))));
+			#else
+				return U16x8(
+				  left.scalars[0] >> bitOffset,
+				  left.scalars[1] >> bitOffset,
+				  left.scalars[2] >> bitOffset,
+				  left.scalars[3] >> bitOffset,
+				  left.scalars[4] >> bitOffset,
+				  left.scalars[5] >> bitOffset,
+				  left.scalars[6] >> bitOffset,
+				  left.scalars[7] >> bitOffset
+				);
+			#endif
+		#endif
+	}
+
+	inline U8x16 operator<<(const U8x16& left, const U8x16 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U8x16(8u)));
+		#if defined USE_NEON
+			return U8x16(vshlq_u16(left.v, vreinterpretq_s8_u8(bitOffsets.v)));
+		#else
+			return U8x16(
+			  left.scalars[ 0] << bitOffsets.scalars[ 0],
+			  left.scalars[ 1] << bitOffsets.scalars[ 1],
+			  left.scalars[ 2] << bitOffsets.scalars[ 2],
+			  left.scalars[ 3] << bitOffsets.scalars[ 3],
+			  left.scalars[ 4] << bitOffsets.scalars[ 4],
+			  left.scalars[ 5] << bitOffsets.scalars[ 5],
+			  left.scalars[ 6] << bitOffsets.scalars[ 6],
+			  left.scalars[ 7] << bitOffsets.scalars[ 7],
+			  left.scalars[ 8] << bitOffsets.scalars[ 8],
+			  left.scalars[ 9] << bitOffsets.scalars[ 9],
+			  left.scalars[10] << bitOffsets.scalars[10],
+			  left.scalars[11] << bitOffsets.scalars[11],
+			  left.scalars[12] << bitOffsets.scalars[12],
+			  left.scalars[13] << bitOffsets.scalars[13],
+			  left.scalars[14] << bitOffsets.scalars[14],
+			  left.scalars[15] << bitOffsets.scalars[15]
+			);
+		#endif
+	}
+	inline U8x16 operator>>(const U8x16& left, const U8x16 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U8x16(8u)));
+		#if defined USE_NEON
+			return U8x16(vshrq_u16(left.v, vreinterpretq_s8_u8(bitOffsets.v)));
+		#else
+			return U8x16(
+			  left.scalars[ 0] >> bitOffsets.scalars[ 0],
+			  left.scalars[ 1] >> bitOffsets.scalars[ 1],
+			  left.scalars[ 2] >> bitOffsets.scalars[ 2],
+			  left.scalars[ 3] >> bitOffsets.scalars[ 3],
+			  left.scalars[ 4] >> bitOffsets.scalars[ 4],
+			  left.scalars[ 5] >> bitOffsets.scalars[ 5],
+			  left.scalars[ 6] >> bitOffsets.scalars[ 6],
+			  left.scalars[ 7] >> bitOffsets.scalars[ 7],
+			  left.scalars[ 8] >> bitOffsets.scalars[ 8],
+			  left.scalars[ 9] >> bitOffsets.scalars[ 9],
+			  left.scalars[10] >> bitOffsets.scalars[10],
+			  left.scalars[11] >> bitOffsets.scalars[11],
+			  left.scalars[12] >> bitOffsets.scalars[12],
+			  left.scalars[13] >> bitOffsets.scalars[13],
+			  left.scalars[14] >> bitOffsets.scalars[14],
+			  left.scalars[15] >> bitOffsets.scalars[15]
+			);
+		#endif
+	}
+	// bitOffset must be an immediate constant, so a template argument is used.
+	template <uint32_t bitOffset>
+	inline U8x16 bitShiftLeftImmediate(const U8x16& left) {
+		static_assert(bitOffset < 8u);
+		#if defined USE_SSE2
+			return U8x16(_mm_slli_epi16(left.v, bitOffset));
+		#elif defined USE_NEON
+			return U8x16(vshlq_u32(left.v, vdupq_n_s8(int8_t(bitOffset))));
+		#else
+			return U8x16(
+			  left.scalars[ 0] << bitOffset,
+			  left.scalars[ 1] << bitOffset,
+			  left.scalars[ 2] << bitOffset,
+			  left.scalars[ 3] << bitOffset,
+			  left.scalars[ 4] << bitOffset,
+			  left.scalars[ 5] << bitOffset,
+			  left.scalars[ 6] << bitOffset,
+			  left.scalars[ 7] << bitOffset,
+			  left.scalars[ 8] << bitOffset,
+			  left.scalars[ 9] << bitOffset,
+			  left.scalars[10] << bitOffset,
+			  left.scalars[11] << bitOffset,
+			  left.scalars[12] << bitOffset,
+			  left.scalars[13] << bitOffset,
+			  left.scalars[14] << bitOffset,
+			  left.scalars[15] << bitOffset
+			);
+		#endif
+	}
+	// bitOffset must be an immediate constant.
+	template <uint32_t bitOffset>
+	inline U8x16 bitShiftRightImmediate(const U8x16& left) {
+		static_assert(bitOffset < 8u);
+		#if defined USE_SSE2
+			return U8x16(_mm_srli_epi16(left.v, bitOffset));
+		#elif defined USE_NEON
+			return U8x16(vshrq_u32(left.v, vdupq_n_s8(int8_t(bitOffset))));
+		#else
+			return U8x16(
+			  left.scalars[ 0] >> bitOffset,
+			  left.scalars[ 1] >> bitOffset,
+			  left.scalars[ 2] >> bitOffset,
+			  left.scalars[ 3] >> bitOffset,
+			  left.scalars[ 4] >> bitOffset,
+			  left.scalars[ 5] >> bitOffset,
+			  left.scalars[ 6] >> bitOffset,
+			  left.scalars[ 7] >> bitOffset,
+			  left.scalars[ 8] >> bitOffset,
+			  left.scalars[ 9] >> bitOffset,
+			  left.scalars[10] >> bitOffset,
+			  left.scalars[11] >> bitOffset,
+			  left.scalars[12] >> bitOffset,
+			  left.scalars[13] >> bitOffset,
+			  left.scalars[14] >> bitOffset,
+			  left.scalars[15] >> bitOffset
+			);
+		#endif
+	}
+
 	inline U16x8 operator+(const U16x8& left, const U16x8& right) {
 		#if defined USE_BASIC_SIMD
 			return U16x8(ADD_U16_SIMD(left.v, right.v));
@@ -2414,6 +2714,26 @@
 			return U32x4(source[0], source[1], source[2], source[3]);
 		#endif
 	}
+	// Warning! Behavior depends on endianness.
+	inline U16x8 reinterpret_U16FromU32(const U32x4& vector) {
+		#if defined USE_BASIC_SIMD
+			return U16x8(REINTERPRET_U32_TO_U16_SIMD(vector.v));
+		#else
+			const uint16_t *source = (const uint16_t*)vector.scalars;
+			return U16x8(
+			  source[0], source[1], source[2], source[3], source[4], source[5], source[6], source[7]
+			);
+		#endif
+	}
+	// Warning! Behavior depends on endianness.
+	inline U32x4 reinterpret_U32FromU16(const U16x8& vector) {
+		#if defined USE_BASIC_SIMD
+			return U32x4(REINTERPRET_U16_TO_U32_SIMD(vector.v));
+		#else
+			const uint32_t *source = (const uint32_t*)vector.scalars;
+			return U32x4(source[0], source[1], source[2], source[3]);
+		#endif
+	}
 
 	// Unpacking to larger integers
 	inline U32x4 lowerToU32(const U16x8& vector) {
@@ -2592,7 +2912,7 @@
 		#define GATHER_U32x4_AVX2(SOURCE, FOUR_OFFSETS, SCALE) _mm_i32gather_epi32((const int32_t*)(SOURCE), FOUR_OFFSETS, SCALE)
 		#define GATHER_F32x4_AVX2(SOURCE, FOUR_OFFSETS, SCALE) _mm_i32gather_ps((const float*)(SOURCE), FOUR_OFFSETS, SCALE)
 	#endif
-	static inline U32x4 gather(const dsr::SafePointer<uint32_t> data, const U32x4 &elementOffset) {
+	static inline U32x4 gather_U32(dsr::SafePointer<const uint32_t> data, const U32x4 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return U32x4(GATHER_U32x4_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -2607,7 +2927,7 @@
 			);
 		#endif
 	}
-	static inline I32x4 gather(const dsr::SafePointer<int32_t> data, const U32x4 &elementOffset) {
+	static inline I32x4 gather_I32(dsr::SafePointer<const int32_t> data, const U32x4 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return I32x4(GATHER_U32x4_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -2622,7 +2942,7 @@
 			);
 		#endif
 	}
-	static inline F32x4 gather(const dsr::SafePointer<float> data, const U32x4 &elementOffset) {
+	static inline F32x4 gather_F32(dsr::SafePointer<const float> data, const U32x4 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return F32x4(GATHER_F32x4_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -2826,7 +3146,7 @@
 		#endif
 	}
 	inline U32x8 operator*(const U32x8& left, const U32x8& right) {
-		#if defined USE_AVX2
+		#if defined USE_256BIT_X_SIMD
 			return U32x8(MUL_U32_SIMD256(left.v, right.v));
 		#else
 			return U32x8(
@@ -2889,7 +3209,54 @@
 			);
 		#endif
 	}
-	inline U32x8 operator<<(const U32x8& left, uint32_t bitOffset) {
+	inline U32x8 operator~(const U32x8& value) {
+		#if defined USE_BASIC_SIMD
+			return value ^ U32x8(~uint32_t(0));
+		#else
+			return U32x8(
+			  ~value.scalars[0],
+			  ~value.scalars[1],
+			  ~value.scalars[2],
+			  ~value.scalars[3],
+			  ~value.scalars[4],
+			  ~value.scalars[5],
+			  ~value.scalars[6],
+			  ~value.scalars[7]
+			);
+		#endif
+	}
+
+	// ARM NEON does not support 256-bit vectors and Intel's AVX2 does not support variable shifting.
+	inline U32x8 operator<<(const U32x8& left, const U32x8 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U32x8(32u)));
+		return U32x8(
+		  left.scalars[0] << bitOffsets.scalars[0],
+		  left.scalars[1] << bitOffsets.scalars[1],
+		  left.scalars[2] << bitOffsets.scalars[2],
+		  left.scalars[3] << bitOffsets.scalars[3],
+		  left.scalars[4] << bitOffsets.scalars[4],
+		  left.scalars[5] << bitOffsets.scalars[5],
+		  left.scalars[6] << bitOffsets.scalars[6],
+		  left.scalars[7] << bitOffsets.scalars[7]
+		);
+	}
+	inline U32x8 operator>>(const U32x8& left, const U32x8 &bitOffsets) {
+		assert(allLanesLesser(bitOffsets, U32x8(32u)));
+		return U32x8(
+		  left.scalars[0] >> bitOffsets.scalars[0],
+		  left.scalars[1] >> bitOffsets.scalars[1],
+		  left.scalars[2] >> bitOffsets.scalars[2],
+		  left.scalars[3] >> bitOffsets.scalars[3],
+		  left.scalars[4] >> bitOffsets.scalars[4],
+		  left.scalars[5] >> bitOffsets.scalars[5],
+		  left.scalars[6] >> bitOffsets.scalars[6],
+		  left.scalars[7] >> bitOffsets.scalars[7]
+		);
+	}
+	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
+	template <uint32_t bitOffset>
+	inline U32x8 bitShiftLeftImmediate(const U32x8& left) {
+		static_assert(bitOffset < 32u);
 		#if defined USE_AVX2
 			return U32x8(_mm256_slli_epi32(left.v, bitOffset));
 		#else
@@ -2905,7 +3272,10 @@
 			);
 		#endif
 	}
-	inline U32x8 operator>>(const U32x8& left, uint32_t bitOffset) {
+	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
+	template <uint32_t bitOffset>
+	inline U32x8 bitShiftRightImmediate(const U32x8& left) {
+		static_assert(bitOffset < 32u);
 		#if defined USE_AVX2
 			return U32x8(_mm256_srli_epi32(left.v, bitOffset));
 		#else
@@ -2921,6 +3291,7 @@
 			);
 		#endif
 	}
+
 	inline U16x16 operator+(const U16x16& left, const U16x16& right) {
 		#if defined USE_256BIT_X_SIMD
 			return U16x16(ADD_U16_SIMD256(left.v, right.v));
@@ -3120,6 +3491,27 @@
 			return U32x8(source[0], source[1], source[2], source[3], source[4], source[5], source[6], source[7]);
 		#endif
 	}
+	// Warning! Behavior depends on endianness.
+	inline U16x16 reinterpret_U16FromU32(const U32x8& vector) {
+		#if defined USE_256BIT_X_SIMD
+			return U16x16(REINTERPRET_U32_TO_U16_SIMD256(vector.v));
+		#else
+			const uint16_t *source = (const uint16_t*)vector.scalars;
+			return U16x16(
+			  source[0], source[1], source[2] , source[3] , source[4] , source[5] , source[6] , source[7] ,
+			  source[8], source[9], source[10], source[11], source[12], source[13], source[14], source[15]
+			);
+		#endif
+	}
+	// Warning! Behavior depends on endianness.
+	inline U32x8 reinterpret_U32FromU16(const U16x16& vector) {
+		#if defined USE_256BIT_X_SIMD
+			return U32x4(REINTERPRET_U16_TO_U32_SIMD256(vector.v));
+		#else
+			const uint32_t *source = (const uint32_t*)vector.scalars;
+			return U32x8(source[0], source[1], source[2], source[3], source[4], source[5], source[6], source[7]);
+		#endif
+	}
 
 	// Unpacking to larger integers
 	inline U32x8 lowerToU32(const U16x16& vector) {
@@ -3255,6 +3647,8 @@
 	//   To get elements from the right side, combine the center vector with the right vector and shift one element to the left using vectorExtract_1 for the given type.
 	//   To get elements from the left side, combine the left vector with the center vector and shift one element to the right using vectorExtract_15 for 16 lanes, vectorExtract_7 for 8 lanes, or vectorExtract_3 for 4 lanes.
 
+	// TODO: Also allow using a template arguments as the element offset with a static assert for the offset, which might be useful in template programming.
+
 	U8x32 inline vectorExtract_0(const U8x32 &a, const U8x32 &b) { return a; }
 	U8x32 inline vectorExtract_1(const U8x32 &a, const U8x32 &b) { VECTOR_EXTRACT_GENERATOR_256_U8(1) }
 	U8x32 inline vectorExtract_2(const U8x32 &a, const U8x32 &b) { VECTOR_EXTRACT_GENERATOR_256_U8(2) }
@@ -3344,7 +3738,7 @@
 		#define GATHER_U32x8_AVX2(SOURCE, EIGHT_OFFSETS, SCALE) _mm256_i32gather_epi32((const int32_t*)(SOURCE), EIGHT_OFFSETS, SCALE)
 		#define GATHER_F32x8_AVX2(SOURCE, EIGHT_OFFSETS, SCALE) _mm256_i32gather_ps((const float*)(SOURCE), EIGHT_OFFSETS, SCALE)
 	#endif
-	static inline U32x8 gather(const dsr::SafePointer<uint32_t> data, const U32x8 &elementOffset) {
+	static inline U32x8 gather_U32(dsr::SafePointer<const uint32_t> data, const U32x8 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return U32x8(GATHER_I32x8_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -3363,7 +3757,7 @@
 			);
 		#endif
 	}
-	static inline I32x8 gather(const dsr::SafePointer<int32_t> data, const U32x8 &elementOffset) {
+	static inline I32x8 gather_I32(dsr::SafePointer<const int32_t> data, const U32x8 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return I32x8(GATHER_U32x8_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -3382,7 +3776,7 @@
 			);
 		#endif
 	}
-	static inline F32x8 gather(const dsr::SafePointer<float> data, const U32x8 &elementOffset) {
+	static inline F32x8 gather_F32(dsr::SafePointer<const float> data, const U32x8 &elementOffset) {
 		#if defined USE_AVX2
 			// TODO: Implement safety checks for debug mode.
 			return F32x8(GATHER_F32x8_AVX2(data.getUnsafe(), elementOffset.v, 4));
@@ -3447,6 +3841,7 @@
 	#undef FOR_SIGNED_VECTOR_TYPES
 	#undef FOR_UNSIGNED_VECTOR_TYPES
 
+	// TODO: Let SVE define completely separate types for dynamic vectors.
 	// The X vectors using the longest SIMD length that is efficient to use for both floating-point and integer types.
 	//   DSR_DEFAULT_ALIGNMENT
 	//     The number of bytes memory should be aligned with by default when creating buffers and images.
@@ -3487,6 +3882,7 @@
 	static const int laneCountX_16Bit = DSR_DEFAULT_VECTOR_SIZE / 2;
 	static const int laneCountX_8Bit = DSR_DEFAULT_VECTOR_SIZE;
 
+	// TODO: Let SVE define completely separate types for dynamic vectors.
 	// The F vector using the longest SIMD length that is efficient to use when only processing float values, even if no integer types are available in the same size.
 	//   Used when you know that your algorithm is only going to work with float types and you need the extra performance.
 	//     Some processors have AVX but not AVX2, meaning that it has 256-bit SIMD for floats, but only 128-bit SIMD for integers.
@@ -3504,4 +3900,27 @@
 	#endif
 	// Used to iterate over float pointers when using F32xF.
 	static const int laneCountF = DSR_FLOAT_VECTOR_SIZE / 4;
+
+	// Define traits.
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U8 , U8x16)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U8 , U8x32)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U16, U16x8)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U16, U16x16)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U32, U32x4)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_U32, U32x8)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_I32, I32x4)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_I32, I32x8)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_F32, F32x4)
+	DSR_APPLY_PROPERTY(DsrTrait_Any_F32, F32x8)
+
+	// TODO: Use as independent types when the largest vector lengths are not known in compile time on ARM SVE.
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_U8 , U8xX)
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_U16, U16xX)
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_U32, U32xX)
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_I32, I32xX)
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_F32, F32xX)
+	//DSR_APPLY_PROPERTY(DsrTrait_Any_F32, F32xF)
+
+	}
+
 #endif

+ 7 - 0
Source/DFPSR/base/simd3D.h

@@ -31,6 +31,8 @@
 #ifndef DFPSR_SIMD_3D
 #define DFPSR_SIMD_3D
 
+namespace dsr {
+
 // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2...
 #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
 inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
@@ -273,5 +275,10 @@ SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float)
 	using F32xFx2 = F32x8x2;
 #endif
 
+#undef SIMD_VECTOR_MEMBER_OPERATORS_2D
+#undef SIMD_VECTOR_MEMBER_OPERATORS_3D
+
+}
+
 #endif
 

+ 21 - 12
Source/DFPSR/base/virtualStack.cpp

@@ -21,8 +21,9 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#include "virtualStack.h"
 #include <thread>
+#include "virtualStack.h"
+#include "heap.h"
 #include "../api/stringAPI.h"
 
 namespace dsr {
@@ -53,7 +54,7 @@ namespace dsr {
 	struct DynamicStackMemory : public StackMemory {
 		~DynamicStackMemory() {
 			if (this->top != nullptr) {
-				free(this->top);
+				heap_decreaseUseCount(this->top);
 			}
 		}
 	};
@@ -72,7 +73,7 @@ namespace dsr {
 		pointer += totalSize;
 	}
 
-	static UnsafeAllocation stackAllocate(StackMemory& stack, uint64_t paddedSize, uintptr_t alignmentAndMask) {
+	static UnsafeAllocation stackAllocate(StackMemory& stack, uint64_t paddedSize, uintptr_t alignmentAndMask, const char *name) {
 		uint8_t *newStackPointer = stack.stackPointer;
 		// Allocate memory for payload.
 		uint64_t payloadTotalSize = increaseStackPointer(newStackPointer, paddedSize, alignmentAndMask);
@@ -88,7 +89,7 @@ namespace dsr {
 			stack.stackPointer = newStackPointer;
 			// Write the header to memory.
 			AllocationHeader *header = (AllocationHeader*)(stack.stackPointer);
-			*header = AllocationHeader(payloadTotalSize + headerTotalSize, true);
+			*header = AllocationHeader(payloadTotalSize + headerTotalSize, true, name);
 			// Clear the new allocation for determinism.
 			std::memset((void*)data, 0, payloadTotalSize);
 			// Return a pointer to the payload.
@@ -100,10 +101,10 @@ namespace dsr {
 	thread_local DynamicStackMemory dynamicMemory[MAX_EXTRA_STACKS]; // Index 0..MAX_EXTRA_STACKS-1
 	thread_local int32_t stackIndex = -1;
 
-	UnsafeAllocation virtualStack_push(uint64_t paddedSize, uintptr_t alignmentAndMask) {
+	UnsafeAllocation virtualStack_push(uint64_t paddedSize, uintptr_t alignmentAndMask, const char *name) {
 		// TODO: Assert that the alignment mask begins with ones and ends with zeroes, in case that the caller accidentally truncated the beginning of the mask.
 		if (stackIndex < 0) {
-			UnsafeAllocation result = stackAllocate(fixedMemory, paddedSize, alignmentAndMask);
+			UnsafeAllocation result = stackAllocate(fixedMemory, paddedSize, alignmentAndMask, name);
 			// Check that we did not run out of memory.
 			if (result.data == nullptr) {
 				// Not enough space in thread local memory. Moving to the first dynamic stack.
@@ -121,25 +122,33 @@ namespace dsr {
 		assert(stackIndex < MAX_EXTRA_STACKS);
 		// Allocate memory in the dynamic stack if not yet allocated.
 		if (dynamicMemory[stackIndex].top == nullptr) {
+			// Decide a minimum size to allocate.
 			uint64_t regionSize = 16777216 * (1 << stackIndex);
 			if (paddedSize * 4 > regionSize) {
 				regionSize = paddedSize * 4;
 			}
-			uint8_t *newMemory = (uint8_t*)malloc(regionSize);
-			if (newMemory == nullptr) {
+			// Allocate the memory.
+			UnsafeAllocation newAllocation = heap_allocate(regionSize, true);
+			// Ask how large the allocation actually is.
+			regionSize = heap_getAllocationSize(newAllocation.data);
+			// Use the whole allocation.
+			heap_setUsedSize(newAllocation.data, regionSize);
+			// Tell the allocation that we are using it.
+			heap_increaseUseCount(newAllocation.data);
+			if (newAllocation.data == nullptr) {
 				throwError(U"Failed to allocate ", regionSize, U" bytes of heap memory for expanding the virtual stack when trying to allocate ", paddedSize, " bytes!\n");
 				return UnsafeAllocation(nullptr, nullptr);
 			} else {
 				// Keep the new allocation.
-				dynamicMemory[stackIndex].top = newMemory;
+				dynamicMemory[stackIndex].top = newAllocation.data;
 				// Start from the back of the new allocation.
-				dynamicMemory[stackIndex].stackPointer = newMemory + regionSize;
-				dynamicMemory[stackIndex].bottom = newMemory + regionSize;
+				dynamicMemory[stackIndex].stackPointer = newAllocation.data + regionSize;
+				dynamicMemory[stackIndex].bottom = newAllocation.data + regionSize;
 			}
 		}
 		assert(dynamicMemory[stackIndex].stackPointer != nullptr);
 		// Allocate memory.
-		UnsafeAllocation result = stackAllocate(dynamicMemory[stackIndex], paddedSize, alignmentAndMask);
+		UnsafeAllocation result = stackAllocate(dynamicMemory[stackIndex], paddedSize, alignmentAndMask, name);
 		if (result.data == nullptr) {
 			if (stackIndex >= MAX_EXTRA_STACKS - 1) {
 				throwError(U"Exceeded MAX_EXTRA_STACKS to allocate more heap memory for a thread local virtual stack!\n");

+ 13 - 13
Source/DFPSR/base/virtualStack.h

@@ -30,24 +30,24 @@ namespace dsr {
 	// Allocate memory in the virtual stack owned by the current thread.
 	//   paddedSize is the number of bytes to allocate including all elements and internal padding.
 	//     paddedSize must be at least 1, but has no rounding requirements.
-	//   alignmentMask should only contain zeroes at the bits to round away for alignment.
-	//     alignmentMask should be the bitwise negation of the alignment minus one, where the alignment is a power of two.
+	//   The start of the allocation is aligned according to alignmentAndMask.
+	//     alignmentAndMask should only contain zeroes at the bits to round away for alignment.
+	//     alignmentAndMask should be the bitwise negation of the alignment minus one, where the alignment is a power of two.
 	//     ~(alignment - 1)
-	UnsafeAllocation virtualStack_push(uint64_t paddedSize, uintptr_t alignmentAndMask);
+	UnsafeAllocation virtualStack_push(uint64_t paddedSize, uintptr_t alignmentAndMask, const char *name = "Nameless virtual stack allocation");
 
 	// A simpler way to get the correct alignment is to allocate a number of elements with a specific type.
-	// TODO: Create another function for manual alignment exceeding the type's alignment using another template argument.
-	// TODO: Let the address offset be negated and start with the allocation size going down to zero,
-	//       so that rounding up addresses can be done by simply masking the least significant bits.
+	// Pre-condition:
+	//   sizeof(T) % alignof(T) == 0
 	template <typename T>
-	SafePointer<T> virtualStack_push(uint64_t elementCount, const char *name) {
+	SafePointer<T> virtualStack_push(uint64_t elementCount, const char *name = "Nameless virtual stack allocation") {
 		// Calculate element size and multiply by element count to get the total size.
-		uint64_t paddedSize = memory_getPaddedSize<T>() * elementCount;
+		uint64_t paddedSize = sizeof(T) * elementCount;
 		// Allocate the data with the amount of alignment requested by the element type T.
-		UnsafeAllocation result = virtualStack_push(paddedSize, memory_createAlignmentAndMask((uintptr_t)alignof(T)));
+		UnsafeAllocation result = virtualStack_push(paddedSize, memory_createAlignmentAndMask((uintptr_t)alignof(T)), name);
 		// Return a safe pointer to the allocated data.
 		#ifdef SAFE_POINTER_CHECKS
-			return SafePointer<T>(name, (T*)(result.data), (intptr_t)paddedSize, result.header);
+			return SafePointer<T>(result.header, result.header->allocationIdentity, name, (T*)(result.data), (intptr_t)paddedSize);
 		#else
 			return SafePointer<T>(name, (T*)(result.data), (intptr_t)paddedSize);
 		#endif
@@ -58,12 +58,12 @@ namespace dsr {
 	void virtualStack_pop();
 
 	// Allocate this array on the stack to automatically free the memory when the scope ends.
-	//   Replaces VLA or alloca.
+	//   Replaces Variable Length Arrays (VLA) or alloca.
 	template <typename T>
 	class VirtualStackAllocation : public SafePointer<T> {
 	public:
-		VirtualStackAllocation(uint64_t elementCount)
-		: SafePointer<T>(virtualStack_push<T>(elementCount, "virtual stack allocation")) {}
+		VirtualStackAllocation(uint64_t elementCount, const char *name = "Nameless virtual stack allocation")
+		: SafePointer<T>(virtualStack_push<T>(elementCount, name)) {}
 		~VirtualStackAllocation() {
 			virtualStack_pop();
 		}

+ 2 - 2
Source/DFPSR/font/Font.cpp

@@ -40,9 +40,9 @@ RasterFontImpl::RasterFontImpl(const String& name, int32_t size, int32_t spacing
 
 RasterFontImpl::~RasterFontImpl() {}
 
-std::shared_ptr<RasterFontImpl> RasterFontImpl::createLatinOne(const String& name, const ImageU8& atlas) {
+Handle<RasterFontImpl> RasterFontImpl::createLatinOne(const String& name, const ImageU8& atlas) {
 	int32_t size = image_getHeight(atlas) / 16;
-	std::shared_ptr<RasterFontImpl> result = std::make_shared<RasterFontImpl>(name, size, size / 16, size / 2);
+	Handle<RasterFontImpl> result = handle_create<RasterFontImpl>(name, size, size / 16, size / 2);
 	result->registerLatinOne16x16(atlas);
 	return result;
 }

+ 6 - 3
Source/DFPSR/font/Font.h

@@ -24,8 +24,11 @@
 #ifndef DFPSR_GUI_FONT
 #define DFPSR_GUI_FONT
 
-#include "../api/types.h"
-#include "../collection/List.h"
+#include "../base/Handle.h"
+#include "../api/stringAPI.h"
+#include "../api/imageAPI.h"
+#include "../math/IRect.h"
+#include "../math/IVector.h"
 
 namespace dsr {
 
@@ -67,7 +70,7 @@ public:
 public:
 	// Constructor
 	RasterFontImpl(const String& name, int32_t size, int32_t spacing, int32_t spaceWidth);
-	static std::shared_ptr<RasterFontImpl> createLatinOne(const String& name, const ImageU8& atlas);
+	static Handle<RasterFontImpl> createLatinOne(const String& name, const ImageU8& atlas);
 	// Destructor
 	~RasterFontImpl();
 public:

+ 0 - 1
Source/DFPSR/gui/BackendWindow.h

@@ -27,7 +27,6 @@
 #include <cstdint>
 #include <memory>
 #include "InputEvent.h"
-#include "../image/ImageRgbaU8.h"
 #include "../api/imageAPI.h"
 #include "../api/stringAPI.h"
 #include "../collection/List.h"

+ 11 - 11
Source/DFPSR/gui/DsrWindow.cpp

@@ -59,7 +59,7 @@ void dsr::gui_initialize() {
 	}
 }
 
-DsrWindow::DsrWindow(std::shared_ptr<BackendWindow> backend)
+DsrWindow::DsrWindow(Handle<BackendWindow> backend)
  : backend(backend), innerWidth(backend->getWidth()), innerHeight(backend->getHeight()) {
 	// Initialize the GUI system if needed
 	gui_initialize();
@@ -75,7 +75,7 @@ DsrWindow::DsrWindow(std::shared_ptr<BackendWindow> backend)
 	};
 	// Receiving notifications about resizing should be done in the main panel
 	this->backend->resizeEvent() = [this](int width, int height) {
-		BackendWindow *backend = this->backend.get();
+		BackendWindow *backend = this->backend.getUnsafe();
 		ImageRgbaU8 canvas = backend->getCanvas();
 		this->innerWidth = width;
 		this->innerHeight = height;
@@ -90,7 +90,7 @@ DsrWindow::DsrWindow(std::shared_ptr<BackendWindow> backend)
 	this->resetInterface();
 }
 
-static void setBackendWindowHandle(std::shared_ptr<VisualComponent> component, std::shared_ptr<BackendWindow> windowHandle) {
+static void setBackendWindowHandle(Handle<VisualComponent> component, Handle<BackendWindow> windowHandle) {
 	component->window = windowHandle;
 	for (int c = 0; c < component->children.length(); c++) {
 		setBackendWindowHandle(component->children[c], windowHandle);
@@ -99,14 +99,14 @@ static void setBackendWindowHandle(std::shared_ptr<VisualComponent> component, s
 
 DsrWindow::~DsrWindow() {
 	// Disconnect the backend window from all components, so that handles to components without a DsrWindow will not prevent the BackendWindow from being freed.
-	setBackendWindowHandle(this->mainPanel, std::shared_ptr<BackendWindow>());
+	setBackendWindowHandle(this->mainPanel, Handle<BackendWindow>());
 }
 
 void DsrWindow::applyLayout() {
 	this->mainPanel->applyLayout(IRect(0, 0, this->getCanvasWidth(), this->getCanvasHeight()));
 }
 
-std::shared_ptr<VisualComponent> DsrWindow::findComponentByName(ReadableString name) const {
+Handle<VisualComponent> DsrWindow::findComponentByName(ReadableString name) const {
 	if (string_match(this->mainPanel->getName(), name)) {
 		return this->mainPanel;
 	} else {
@@ -114,7 +114,7 @@ std::shared_ptr<VisualComponent> DsrWindow::findComponentByName(ReadableString n
 	}
 }
 
-std::shared_ptr<VisualComponent> DsrWindow::findComponentByNameAndIndex(ReadableString name, int index) const {
+Handle<VisualComponent> DsrWindow::findComponentByNameAndIndex(ReadableString name, int index) const {
 	if (string_match(this->mainPanel->getName(), name) && this->mainPanel->getIndex() == index) {
 		return this->mainPanel;
 	} else {
@@ -122,14 +122,14 @@ std::shared_ptr<VisualComponent> DsrWindow::findComponentByNameAndIndex(Readable
 	}
 }
 
-std::shared_ptr<VisualComponent> DsrWindow::getRootComponent() const {
+Handle<VisualComponent> DsrWindow::getRootComponent() const {
 	return this->mainPanel;
 }
 
 void DsrWindow::resetInterface() {
 	// Create an empty main panel
-	this->mainPanel = std::dynamic_pointer_cast<VisualComponent>(createPersistentClass("Panel"));
-	if (this->mainPanel.get() == nullptr) {
+	this->mainPanel = handle_dynamicCast<VisualComponent>(createPersistentClass("Panel"));
+	if (this->mainPanel.isNull()) {
 		throwError(U"DsrWindow::resetInterface: The window's Panel could not be created!");
 	}
 	this->mainPanel->setName("mainPanel");
@@ -140,10 +140,10 @@ void DsrWindow::resetInterface() {
 
 void DsrWindow::loadInterfaceFromString(String layout, const ReadableString &fromPath) {
 	// Load a tree structure of visual components from text
-	this->mainPanel = std::dynamic_pointer_cast<VisualComponent>(createPersistentClassFromText(layout, fromPath));
+	this->mainPanel = handle_dynamicCast<VisualComponent>(createPersistentClassFromText(layout, fromPath));
 	// Re-assign the backend window handle
 	setBackendWindowHandle(this->mainPanel, this->backend);
-	if (this->mainPanel.get() == nullptr) {
+	if (this->mainPanel.isNull()) {
 		throwError(U"DsrWindow::loadInterfaceFromString: The window's root component could not be created!\n\nLayout:\n", layout, "\n");
 	}
 	this->applyLayout();

+ 10 - 12
Source/DFPSR/gui/DsrWindow.h

@@ -25,11 +25,9 @@
 #ifndef DFPSR_GUI_DSRWINDOW
 #define DFPSR_GUI_DSRWINDOW
 
-#include <memory>
 #include "../gui/VisualComponent.h"
 #include "../gui/BackendWindow.h"
 #include "../api/stringAPI.h"
-#include "../api/types.h"
 
 // The DSR window is responsible for connecting visual interfaces with the backend window.
 //   An optional depth buffer is allocated on demand when requested, and kept until the window resizes.
@@ -44,10 +42,10 @@ public:
 	// TODO: Should there be a separate interface context object to reduce the number of variables placed in each component?
 	//       If they all store a handle to the backend window, they could instead have a generic interface object storing pointers to the window, root, active components, et cetera...
 	// Window backend, which the API is allowed to call directly to bypass DsrWindow for trivial operations.
-	std::shared_ptr<BackendWindow> backend;
+	Handle<BackendWindow> backend;
 private:
 	// The root component
-	std::shared_ptr<VisualComponent> mainPanel;
+	Handle<VisualComponent> mainPanel;
 	AlignedImageF32 depthBuffer;
 	// The inner window dimensions that are synchronized with the canvas.
 	//   The backend on the contrary may have its size changed before the resize event has been fetched.
@@ -57,7 +55,7 @@ private:
 	IVector2D lastMousePosition;
 public:
 	// Constructor
-	explicit DsrWindow(std::shared_ptr<BackendWindow> backend);
+	explicit DsrWindow(Handle<BackendWindow> backend);
 	// Destructor
 	virtual ~DsrWindow();
 public:
@@ -65,19 +63,19 @@ public:
 		void applyLayout();
 
 		// Component getters
-		std::shared_ptr<VisualComponent> findComponentByName(ReadableString name) const;
+		Handle<VisualComponent> findComponentByName(ReadableString name) const;
 		template <typename T>
-		std::shared_ptr<T> findComponentByName(ReadableString name) const {
-			return std::dynamic_pointer_cast<T>(this->findComponentByName(name));
+		Handle<T> findComponentByName(ReadableString name) const {
+			return handle_dynamicCast<T>(this->findComponentByName(name));
 		}
-		std::shared_ptr<VisualComponent> findComponentByNameAndIndex(ReadableString name, int index) const;
+		Handle<VisualComponent> findComponentByNameAndIndex(ReadableString name, int index) const;
 		template <typename T>
-		std::shared_ptr<T> findComponentByNameAndIndex(ReadableString name, int index) const {
-			return std::dynamic_pointer_cast<T>(this->findComponentByNameAndIndex(name, index));
+		Handle<T> findComponentByNameAndIndex(ReadableString name, int index) const {
+			return handle_dynamicCast<T>(this->findComponentByNameAndIndex(name, index));
 		}
 
 		// Get the root component that contains all other components in the window
-		std::shared_ptr<VisualComponent> getRootComponent() const;
+		Handle<VisualComponent> getRootComponent() const;
 		void resetInterface();
 		void loadInterfaceFromString(String layout, const ReadableString &fromPath);
 		String saveInterfaceToString();

+ 50 - 59
Source/DFPSR/gui/VisualComponent.cpp

@@ -23,7 +23,6 @@
 
 #include <cstdint>
 #include "VisualComponent.h"
-#include "../image/internal/imageInternal.h"
 
 using namespace dsr;
 
@@ -75,12 +74,12 @@ Persistent* VisualComponent::findAttribute(const ReadableString &name) {
 
 // Pre-condition: component != nullptr
 // Post-condition: Returns the root of component
-static VisualComponent *getRoot(VisualComponent *component) {
+static Handle<VisualComponent> getRoot(VisualComponent *component) {
 	assert(component != nullptr);
 	while (component->parent != nullptr) {
 		component = component->parent;
 	}
-	return component;
+	return component->getHandle();
 }
 
 IVector2D VisualComponent::getDesiredDimensions() {
@@ -198,7 +197,7 @@ static void drawOverlays(ImageRgbaU8& targetImage, VisualComponent &component, c
 		}
 		// Draw overlays in each child component on top.
 		for (int i = 0; i < component.getChildCount(); i++) {
-			drawOverlays(targetImage, *(component.children[i]), offset + component.children[i]->location.upperLeft());
+			drawOverlays(targetImage, component.children[i].getReference(), offset + component.children[i]->location.upperLeft());
 		}
 	}
 }
@@ -246,10 +245,10 @@ void VisualComponent::drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLo
 void VisualComponent::drawOverlay(ImageRgbaU8& targetImage, const IVector2D &absoluteOffset) {}
 
 // Manual use with the correct type
-void VisualComponent::addChildComponent(std::shared_ptr<VisualComponent> child) {
+void VisualComponent::addChildComponent(Handle<VisualComponent> child) {
 	if (!this->isContainer()) {
 		sendWarning(U"Cannot attach a child to a non-container parent component!\n");
-	} else if (child.get() == this) {
+	} else if (child.getUnsafe() == this) {
 		sendWarning(U"Cannot attach a component to itself!\n");
 	} else if (child->hasChild(this)) {
 		sendWarning(U"Cannot attach to its own parent as a child component!\n");
@@ -267,10 +266,10 @@ void VisualComponent::addChildComponent(std::shared_ptr<VisualComponent> child)
 }
 
 // Automatic insertion from loading
-bool VisualComponent::addChild(std::shared_ptr<Persistent> child) {
+bool VisualComponent::addChild(Handle<Persistent> child) {
 	// Try to cast from base class Persistent to derived class VisualComponent
-	std::shared_ptr<VisualComponent> visualComponent = std::dynamic_pointer_cast<VisualComponent>(child);
-	if (visualComponent.get() == nullptr) {
+	Handle<VisualComponent> visualComponent = handle_dynamicCast<VisualComponent>(child);
+	if (visualComponent.isNull()) {
 		return false; // Wrong type!
 	} else {
 		this->addChildComponent(visualComponent);
@@ -282,16 +281,16 @@ int VisualComponent::getChildCount() const {
 	return this->children.length();
 }
 
-std::shared_ptr<Persistent> VisualComponent::getChild(int index) const {
+Handle<Persistent> VisualComponent::getChild(int index) const {
 	if (index >= 0 && index < this->children.length()) {
 		return this->children[index];
 	} else {
-		return std::shared_ptr<Persistent>(); // Null handle for out of bound.
+		return Handle<Persistent>(); // Null handle for out of bound.
 	}
 }
 
-static void detachFromWindow(std::shared_ptr<VisualComponent> component) {
-	component->window = std::shared_ptr<BackendWindow>();
+static void detachFromWindow(Handle<VisualComponent> component) {
+	component->window = Handle<BackendWindow>();
 	for (int c = 0; c < component->children.length(); c++) {
 		detachFromWindow(component->children[c]);
 	}
@@ -304,8 +303,8 @@ void VisualComponent::detachFromParent() {
 		parent->childChanged = true;
 		// Find the component to detach among the child components.
 		for (int i = 0; i < parent->getChildCount(); i++) {
-			std::shared_ptr<VisualComponent> current = parent->children[i];
-			if (current.get() == this) {
+			Handle<VisualComponent> current = parent->children[i];
+			if (current.getUnsafe() == this) {
 				// Disconnect child from backend window.
 				detachFromWindow(parent->children[i]);
 				// Disconnect parent from child.
@@ -324,8 +323,8 @@ void VisualComponent::detachFromParent() {
 
 bool VisualComponent::hasChild(VisualComponent *child) const {
 	for (int i = 0; i < this->getChildCount(); i++) {
-		std::shared_ptr<VisualComponent> current = this->children[i];
-		if (current.get() == child) {
+		Handle<VisualComponent> current = this->children[i];
+		if (current.getUnsafe() == child) {
 			return true; // Found the component
 		} else {
 			if (current->hasChild(child)) {
@@ -336,38 +335,38 @@ bool VisualComponent::hasChild(VisualComponent *child) const {
 	return false; // Could not find the component
 }
 
-bool VisualComponent::hasChild(std::shared_ptr<VisualComponent> child) const {
-	return this->hasChild(child.get());
+bool VisualComponent::hasChild(Handle<VisualComponent> child) const {
+	return this->hasChild(child.getUnsafe());
 }
 
-std::shared_ptr<VisualComponent> VisualComponent::findChildByName(ReadableString name) const {
+Handle<VisualComponent> VisualComponent::findChildByName(ReadableString name) const {
 	for (int i = 0; i < this->getChildCount(); i++) {
-		std::shared_ptr<VisualComponent> current = this->children[i];
+		Handle<VisualComponent> current = this->children[i];
 		if (string_match(current->getName(), name)) {
 			return current; // Found the component
 		} else {
-			std::shared_ptr<VisualComponent> searchResult = current->findChildByName(name);
-			if (searchResult.get() != nullptr) {
+			Handle<VisualComponent> searchResult = current->findChildByName(name);
+			if (searchResult.isNotNull()) {
 				return searchResult; // Found the component recursively
 			}
 		}
 	}
-	return std::shared_ptr<VisualComponent>(); // Could not find the component
+	return Handle<VisualComponent>(); // Could not find the component
 }
 
-std::shared_ptr<VisualComponent> VisualComponent::findChildByNameAndIndex(ReadableString name, int index) const {
+Handle<VisualComponent> VisualComponent::findChildByNameAndIndex(ReadableString name, int index) const {
 	for (int i = 0; i < this->getChildCount(); i++) {
-		std::shared_ptr<VisualComponent> current = this->children[i];
+		Handle<VisualComponent> current = this->children[i];
 		if (string_match(current->getName(), name) && current->getIndex() == index) {
 			return current; // Found the component
 		} else {
-			std::shared_ptr<VisualComponent> searchResult = current->findChildByNameAndIndex(name, index);
-			if (searchResult.get() != nullptr) {
+			Handle<VisualComponent> searchResult = current->findChildByNameAndIndex(name, index);
+			if (searchResult.isNotNull()) {
 				return searchResult; // Found the component recursively
 			}
 		}
 	}
-	return std::shared_ptr<VisualComponent>(); // Could not find the component
+	return Handle<VisualComponent>(); // Could not find the component
 }
 
 bool VisualComponent::pointIsInside(const IVector2D& pixelPosition) {
@@ -384,34 +383,26 @@ bool VisualComponent::pointIsInsideOfHover(const IVector2D& pixelPosition) {
 }
 
 // Non-recursive top-down search
-std::shared_ptr<VisualComponent> VisualComponent::getDirectChild(const IVector2D& pixelPosition) {
+Handle<VisualComponent> VisualComponent::getDirectChild(const IVector2D& pixelPosition) {
 	// Iterate child components in reverse drawing order
 	for (int i = this->getChildCount() - 1; i >= 0; i--) {
-		std::shared_ptr<VisualComponent> currentChild = this->children[i];
+		Handle<VisualComponent> currentChild = this->children[i];
 		// Check if the point is inside the child component
 		if (currentChild->getVisible() && currentChild->pointIsInside(pixelPosition)) {
 			return currentChild;
 		}
 	}
 	// Return nothing if the point missed all child components
-	return std::shared_ptr<VisualComponent>();
+	return Handle<VisualComponent>();
 }
 
-// TODO: Store a pointer to the window in each visual component, so that one can get the shared pointer to the root and get access to clipboard functionality.
-std::shared_ptr<VisualComponent> VisualComponent::getShared() {
-	VisualComponent *parent = this->parent;
-	if (parent == nullptr) {
-		// Not working for the root component, because that would require access to the window.
-		return std::shared_ptr<VisualComponent>();
-	} else {
-		for (int c = 0; c < parent->children.length(); c++) {
-			if (parent->children[c].get() == this) {
-				return parent->children[c];
-			}
-		}
-		// Not found in its own parent if the component tree is broken.
-		return std::shared_ptr<VisualComponent>();
-	}
+// Create a Handle to the component.
+Handle<VisualComponent> VisualComponent::getHandle() {
+	#ifdef SAFE_POINTER_CHECKS
+		return Handle<VisualComponent>(this, heap_getHeader(this)->allocationIdentity);
+	#else
+		return Handle<VisualComponent>(this);
+	#endif
 }
 
 void VisualComponent::updateStateEvent(ComponentState oldState, ComponentState newState) {}
@@ -433,7 +424,7 @@ void VisualComponent::sendNotifications() {
 	//   Run the loop backwards, so that no components are missed when once is detached.
 	for (int i = this->getChildCount() - 1; i >= 0; i--) {
 		// Use a reference counted pointer to the child, so that it can be removed safely outside of custom events.
-		std::shared_ptr<VisualComponent> child = this->children[i];
+		Handle<VisualComponent> child = this->children[i];
 		if (child->detach) {
 			child->detach = false;
 			child->detachFromParent();
@@ -455,7 +446,7 @@ static VisualComponent *getTopmostOverlay(VisualComponent *component, const IVec
 	if (component->getVisible()) {
 		// Go through child components in reverse draw order to stop when reaching the one that is visible.
 		for (int i = component->getChildCount() - 1; i >= 0; i--) {
-			VisualComponent *result = getTopmostOverlay(component->children[i].get(), point - component->children[i]->location.upperLeft());
+			VisualComponent *result = getTopmostOverlay(component->children[i].getUnsafe(), point - component->children[i]->location.upperLeft());
 			if (result != nullptr) return result;
 		}
 		// Check itself behind child overlays.
@@ -488,7 +479,7 @@ void VisualComponent::defocusChildren() {
 }
 
 void VisualComponent::addStateBits(ComponentState directStates, bool unique) {
-	VisualComponent *root = getRoot(this);
+	Handle<VisualComponent> root = getRoot(this);
 	// Remove all focus in the window if unique.
 	if (unique) root->applyStateAndMask(~directStates);
 	// Apply state directly to itself and indirectly to parents.
@@ -498,7 +489,7 @@ void VisualComponent::addStateBits(ComponentState directStates, bool unique) {
 }
 
 void VisualComponent::removeStateBits(ComponentState directStates) {
-	VisualComponent *root = getRoot(this);
+	Handle<VisualComponent> root = getRoot(this);
 	// Remove state directly from itself and indirectly from parents.
 	this->currentState &= ~directStates;
 	// Update indirect states, so that parent components know what happens to their child components.
@@ -550,7 +541,7 @@ void VisualComponent::sendMouseEvent(const MouseEvent& event, bool recursive) {
 	//   Grabbing with the dragComponent pointer makes sure that move and up events can be given even if the cursor moves outside of the component.
 	VisualComponent *childComponent = nullptr;
 	// Find the component to interact with.
-	if (event.mouseEventType == MouseEventType::MouseDown || this->dragComponent.get() == nullptr) {
+	if (event.mouseEventType == MouseEventType::MouseDown || this->dragComponent.isNull()) {
 		// Check the overlays first when getting mouse events to the root component.
 		if (this->parent == nullptr) {
 			childComponent = getTopmostOverlay(this, event.position);
@@ -559,19 +550,19 @@ void VisualComponent::sendMouseEvent(const MouseEvent& event, bool recursive) {
 		//   The sendMouseEvent method can be called recursively from a member of an overlay, so we can't know
 		//   which component is at the top without asking the components that manage interaction with their children.
 		if (childComponent == nullptr && !this->managesChildren()) {
-			std::shared_ptr<VisualComponent> nextContainer = this->getDirectChild(event.position);
-			if (nextContainer.get() != nullptr) {
-				childComponent = nextContainer.get();
+			Handle<VisualComponent> nextContainer = this->getDirectChild(event.position);
+			if (nextContainer.isNotNull()) {
+				childComponent = nextContainer.getUnsafe();
 			}
 		}
-	} else if (dragComponent.get() != nullptr) {
+	} else if (dragComponent.isNotNull()) {
 		// If we're grabbing a component, keep sending events to it.
-		childComponent = this->dragComponent.get();
+		childComponent = this->dragComponent.getUnsafe();
 	}
 	// Grab any detected component on mouse down events.
 	if (event.mouseEventType == MouseEventType::MouseDown && childComponent != nullptr) {
 		childComponent->makeFocused();
-		this->dragComponent = childComponent->getShared();
+		this->dragComponent = childComponent->getHandle();
 		this->holdCount++;
 	}
 	// Send the signal to a child component or itself.
@@ -598,7 +589,7 @@ void VisualComponent::sendMouseEvent(const MouseEvent& event, bool recursive) {
 	if (event.mouseEventType == MouseEventType::MouseUp) {
 		this->holdCount--;
 		if (this->holdCount <= 0) {
-			this->dragComponent = std::shared_ptr<VisualComponent>(); // Abort drag.
+			this->dragComponent = Handle<VisualComponent>(); // Abort drag.
 			// Reset when we had more up than down events, in case that the root panel was created with a button already pressed.
 			this->holdCount = 0;
 		}

+ 12 - 11
Source/DFPSR/gui/VisualComponent.h

@@ -42,20 +42,21 @@ class VisualComponent : public Persistent {
 PERSISTENT_DECLARATION(VisualComponent)
 public: // Relations
 	// Handle to the backend window.
-	std::shared_ptr<BackendWindow> window;
+	Handle<BackendWindow> window;
+	// TODO: Should a weak handle be implemented to safely avoid cycles, or is this safe enough?
 	// Parent component
 	VisualComponent *parent = nullptr;
 	IRect givenSpace; // Remembering the local region that was reserved inside of the parent component.
 	bool regionAccessed = false; // If someone requested access to the region, remember to update layout in case of new settings.
 	// Child components
-	List<std::shared_ptr<VisualComponent>> children;
+	List<Handle<VisualComponent>> children;
 	// Remember the component used for a drag event.
 	//   Ensures that mouse down events are followed by mouse up events on the same component.
 	int holdCount = 0;
 	// Marked for removal from the parent when set to true.
 	bool detach = false;
 	// Remember the pressed component for sending mouse move events outside of its region.
-	std::shared_ptr<VisualComponent> dragComponent;
+	Handle<VisualComponent> dragComponent;
 private: // States
 	// Use methods to set the current state, then have it copied to previousState after calling updateStateEvent in sendNotifications.
 	ComponentState currentState = 0;
@@ -156,10 +157,10 @@ public: // Callbacks that the application use by assigning lambdas to specific c
 public:
 	// Returning a shader pointer to the topmost direct visible child that contains pixelPosition.
 	//   The pixelPosition is relative to the called component's upper left corner.
-	std::shared_ptr<VisualComponent> getDirectChild(const IVector2D& pixelPosition);
+	Handle<VisualComponent> getDirectChild(const IVector2D& pixelPosition);
 	// Returning a shared pointer to itself.
 	//   Currently not working for the root component because of limitations in C++.
-	std::shared_ptr<VisualComponent> getShared();
+	Handle<VisualComponent> getHandle();
 public:
 	// Draw the component
 	//   The component is responsible for drawing the component at this->location + offset.
@@ -195,22 +196,22 @@ public:
 	//   Preconditions:
 	//     The parent's component type is a container.
 	//     The child does not already have a parent.
-	void addChildComponent(std::shared_ptr<VisualComponent> child);
+	void addChildComponent(Handle<VisualComponent> child);
 	// Called with any persistent type when constructing child components from text
-	bool addChild(std::shared_ptr<Persistent> child) override;
+	bool addChild(Handle<Persistent> child) override;
 	// Called when saving to text
 	int getChildCount() const override;
-	std::shared_ptr<Persistent> getChild(int index) const override;
+	Handle<Persistent> getChild(int index) const override;
 
 	// Returns true iff child is a member of the component
 	//   Searches recursively
 	bool hasChild(VisualComponent *child) const;
-	bool hasChild(std::shared_ptr<VisualComponent> child) const;
+	bool hasChild(Handle<VisualComponent> child) const;
 
 	// Find the first child component with the requested name using a case sensitive match.
 	//   Returns: A shared pointer to the child or null if not found.
-	std::shared_ptr<VisualComponent> findChildByName(ReadableString name) const;
-	std::shared_ptr<VisualComponent> findChildByNameAndIndex(ReadableString name, int index) const;
+	Handle<VisualComponent> findChildByName(ReadableString name) const;
+	Handle<VisualComponent> findChildByNameAndIndex(ReadableString name, int index) const;
 	// Detach the component from any parent
 	void detachFromParent();
 

+ 9 - 9
Source/DFPSR/gui/VisualTheme.cpp

@@ -337,22 +337,22 @@ public:
 
 static VisualTheme defaultTheme;
 VisualTheme theme_getDefault() {
-	if (!(defaultTheme.get())) {
+	if (!(defaultTheme.getUnsafe())) {
 		defaultTheme = theme_createFromText(machine_create(defaultMediaMachineCode), defaultStyleSettings, file_getCurrentPath());
 	}
 	return defaultTheme;
 }
 
 VisualTheme theme_createFromText(const MediaMachine &machine, const ReadableString &styleSettings, const ReadableString &fromPath) {
-	return std::make_shared<VisualThemeImpl>(machine, styleSettings, fromPath);
+	return handle_create<VisualThemeImpl>(machine, styleSettings, fromPath).setName("Visual Theme");
 }
 
 VisualTheme theme_createFromFile(const MediaMachine &machine, const ReadableString &styleFilename) {
-	return theme_createFromText(machine, string_load(styleFilename), file_getRelativeParentFolder(styleFilename));
+	return theme_createFromText(machine, string_load(styleFilename), file_getRelativeParentFolder(styleFilename)).setName("Visual Theme");
 }
 
 bool theme_exists(const VisualTheme &theme) {
-	return theme.get() != nullptr;
+	return theme.isNotNull();
 }
 
 int theme_getClassIndex(const VisualTheme &theme, const ReadableString &className) {
@@ -375,7 +375,7 @@ String theme_selectClass(const VisualTheme &theme, const ReadableString &suggest
 }
 
 OrderedImageRgbaU8 theme_getImage(const VisualTheme &theme, const ReadableString &className, const ReadableString &settingName) {
-	if (!theme.get()) {
+	if (!theme.getUnsafe()) {
 		return OrderedImageRgbaU8();
 	}
 	int classIndex = theme->getClassIndex(className);
@@ -390,7 +390,7 @@ OrderedImageRgbaU8 theme_getImage(const VisualTheme &theme, const ReadableString
 }
 
 FixedPoint theme_getFixedPoint(const VisualTheme &theme, const ReadableString &className, const ReadableString &settingName, const FixedPoint &defaultValue) {
-	if (!theme.get()) {
+	if (!theme.getUnsafe()) {
 		return defaultValue;
 	}
 	int classIndex = theme->getClassIndex(className);
@@ -409,7 +409,7 @@ int theme_getInteger(const VisualTheme &theme, const ReadableString &className,
 }
 
 ReadableString theme_getString(const VisualTheme &theme, const ReadableString &className, const ReadableString &settingName, const ReadableString &defaultValue) {
-	if (!theme.get()) {
+	if (!theme.getUnsafe()) {
 		return defaultValue;
 	}
 	int classIndex = theme->getClassIndex(className);
@@ -424,7 +424,7 @@ ReadableString theme_getString(const VisualTheme &theme, const ReadableString &c
 }
 
 MediaMethod theme_getScalableImage(const VisualTheme &theme, const ReadableString &className) {
-	if (!theme.get()) {
+	if (!theme.getUnsafe()) {
 		throwError(U"theme_getScalableImage: Can't get scalable image of class ", className, U" from a non-existing theme!\n");
 	}
 	int classIndex = theme->getClassIndex(className);
@@ -459,7 +459,7 @@ static bool assignMediaMachineArguments(ClassSettings settings, MediaMachine &ma
 }
 
 bool theme_assignMediaMachineArguments(const VisualTheme &theme, int contextIndex, MediaMachine &machine, int methodIndex, int inputIndex, const ReadableString &argumentName) {
-	if (!theme.get()) { return false; }
+	if (!theme.getUnsafe()) { return false; }
 	// Check in the context first, and then in the default settings.
 	return (contextIndex > 0 && assignMediaMachineArguments(theme->settings[contextIndex], machine, methodIndex, inputIndex, argumentName))
 	                         || assignMediaMachineArguments(theme->settings[0],            machine, methodIndex, inputIndex, argumentName);

+ 6 - 0
Source/DFPSR/gui/VisualTheme.h

@@ -43,6 +43,12 @@ namespace dsr {
 
 // TODO: Move to the API folder once complete.
 
+// A handle to a GUI theme.
+//   Themes describes the visual appearance of an interface.
+//   By having more than one theme for your interface, you can let the user select one.
+class VisualThemeImpl;
+using VisualTheme = Handle<VisualThemeImpl>;
+
 // Create a theme using a virtual machine with functions to call, style settings telling which functions to call with what arguments, and a path to load any non-embedded images from.
 VisualTheme theme_createFromText(const MediaMachine &machine, const ReadableString &styleSettings, const ReadableString &fromPath);
 // Create a theme using a virtual machine with functions to call, and a path to the style settings to load.

+ 1 - 1
Source/DFPSR/gui/components/Button.cpp

@@ -133,7 +133,7 @@ void Button::completeAssets() {
 	if (this->button.methodIndex == -1) {
 		this->loadTheme(theme_getDefault());
 	}
-	if (this->font.get() == nullptr) {
+	if (this->font.isNull()) {
 		this->font = font_getDefault();
 	}
 }

+ 1 - 1
Source/DFPSR/gui/components/Label.cpp

@@ -73,7 +73,7 @@ bool Label::pointIsInside(const IVector2D& pixelPosition) {
 }
 
 void Label::completeAssets() {
-	if (this->font.get() == nullptr) {
+	if (this->font.isNull()) {
 		this->font = font_getDefault();
 	}
 }

+ 3 - 3
Source/DFPSR/gui/components/Menu.cpp

@@ -197,7 +197,7 @@ void Menu::completeAssets() {
 	if (this->headImageMethod.methodIndex == -1) {
 		this->loadTheme(theme_getDefault());
 	}
-	if (this->font.get() == nullptr) {
+	if (this->font.isNull()) {
 		this->font = font_getDefault();
 	}
 }
@@ -290,7 +290,7 @@ void Menu::receiveMouseEvent(const MouseEvent& event) {
 	bool inHead = this->pointIsInside(event.position);
 	if (event.mouseEventType == MouseEventType::MouseUp) {
 		// Pass on mouse up events to dragged components, even if not inside of them.
-		if (this->dragComponent.get() != nullptr) {
+		if (this->dragComponent.isNotNull()) {
 			MouseEvent childEvent = localEvent;
 			childEvent.position -= this->dragComponent->location.upperLeft();
 			this->dragComponent->sendMouseEvent(childEvent, true);
@@ -366,7 +366,7 @@ void Menu::receiveMouseEvent(const MouseEvent& event) {
 	}
 	// Releasing anywhere should stop pressing.
 	if (event.mouseEventType == MouseEventType::MouseUp) {
-		this->dragComponent = std::shared_ptr<VisualComponent>();
+		this->dragComponent = Handle<VisualComponent>();
 		if (this->pressed) {
 			// No longer pressed.
 			this->pressed = false;

+ 3 - 3
Source/DFPSR/gui/components/TextBox.cpp

@@ -417,7 +417,7 @@ void TextBox::receiveKeyboardEvent(const KeyboardEvent& event) {
 				this->placeBeamAtCharacter(getLineEnd(this->text.value, this->beamLocation), removeSelection);
 			} else if (event.dsrKey == DsrKey_X) {
 				// Cut selection using Ctrl + X
-				if (this->window.get()) {
+				if (this->window.getUnsafe()) {
 					this->window->saveToClipboard(this->getSelectedText());
 					this->replaceSelection(U"");
 				} else {
@@ -425,14 +425,14 @@ void TextBox::receiveKeyboardEvent(const KeyboardEvent& event) {
 				}
 			} else if (event.dsrKey == DsrKey_C) {
 				// Copy selection using Ctrl + C
-				if (this->window.get()) {
+				if (this->window.getUnsafe()) {
 					this->window->saveToClipboard(this->getSelectedText());
 				} else {
 					sendWarning(U"No window handle found in TextBox when trying to copy text!");
 				}
 			} else if (event.dsrKey == DsrKey_V) {
 				// Paste selection using Ctrl + V
-				if (this->window.get()) {
+				if (this->window.getUnsafe()) {
 					this->replaceSelection(this->window->loadFromClipboard());
 				} else {
 					sendWarning(U"No window handle found in TextBox when trying to paste text!");

+ 0 - 103
Source/DFPSR/image/Color.cpp

@@ -1,103 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2018 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "Color.h"
-
-using namespace dsr;
-
-ColorRgbI32 ColorRgbI32::saturate() const {
-	int32_t red = this->red;
-	int32_t green = this->green;
-	int32_t blue = this->blue;
-	if (red < 0) { red = 0; }
-	if (red > 255) { red = 255; }
-	if (green < 0) { green = 0; }
-	if (green > 255) { green = 255; }
-	if (blue < 0) { blue = 0; }
-	if (blue > 255) { blue = 255; }
-	return ColorRgbI32(red, green, blue);
-}
-ColorRgbI32 ColorRgbI32::mix(const ColorRgbI32& colorA, const ColorRgbI32& colorB, float weight) {
-	float invWeight = 1.0f - weight;
-	return (colorA * invWeight) + (colorB * weight);
-}
-ColorRgbI32::ColorRgbI32(const ReadableString &content) : red(0), green(0), blue(0) {
-	int givenChannels = 0;
-	string_split_callback([this, &givenChannels](ReadableString channelValue) {
-		if (givenChannels == 0) {
-			this->red = string_toInteger(channelValue);
-		} else if (givenChannels == 1) {
-			this->green = string_toInteger(channelValue);
-		} else if (givenChannels == 2) {
-			this->blue = string_toInteger(channelValue);
-		}
-		givenChannels++;
-	}, content, U',');
-}
-ColorRgbaI32 ColorRgbaI32::saturate() const {
-	int32_t red = this->red;
-	int32_t green = this->green;
-	int32_t blue = this->blue;
-	int32_t alpha = this->alpha;
-	if (red < 0) { red = 0; }
-	if (red > 255) { red = 255; }
-	if (green < 0) { green = 0; }
-	if (green > 255) { green = 255; }
-	if (blue < 0) { blue = 0; }
-	if (blue > 255) { blue = 255; }
-	if (alpha < 0) { alpha = 0; }
-	if (alpha > 255) { alpha = 255; }
-	return ColorRgbaI32(red, green, blue, alpha);
-}
-ColorRgbaI32 ColorRgbaI32::mix(const ColorRgbaI32& colorA, const ColorRgbaI32& colorB, float weight) {
-	float invWeight = 1.0f - weight;
-	return (colorA * invWeight) + (colorB * weight);
-}
-ColorRgbaI32::ColorRgbaI32(const ReadableString &content) : red(0), green(0), blue(0), alpha(255) {
-	int givenChannels = 0;
-	string_split_callback([this, &givenChannels](ReadableString channelValue) {
-		if (givenChannels == 0) {
-			this->red = string_toInteger(channelValue);
-		} else if (givenChannels == 1) {
-			this->green = string_toInteger(channelValue);
-		} else if (givenChannels == 2) {
-			this->blue = string_toInteger(channelValue);
-		} else if (givenChannels == 3) {
-			this->alpha = string_toInteger(channelValue);
-		}
-		givenChannels++;
-	}, content, U',');
-}
-
-String& dsr::string_toStreamIndented(String& target, const ColorRgbI32& source, const ReadableString& indentation) {
-	string_append(target, indentation, source.red, U",", source.green, U",", source.blue);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const ColorRgbaI32& source, const ReadableString& indentation) {
-	string_append(target, indentation, source.red, U",", source.green, U",", source.blue, U",", source.alpha);
-	return target;
-}
-String& dsr::string_toStreamIndented(String& target, const Color4xU8& source, const ReadableString& indentation) {
-	string_append(target, indentation, source.channels[0], U",", source.channels[1], U",", source.channels[2], U",", source.channels[3]);
-	return target;
-}

+ 97 - 43
Source/DFPSR/image/Color.h

@@ -29,6 +29,19 @@
 
 namespace dsr {
 
+struct ColorRgbI32;
+struct ColorRgbaI32;
+inline ColorRgbI32 operator * (const ColorRgbI32& left, float right);
+inline ColorRgbI32 operator * (const ColorRgbI32& left, int32_t right);
+inline ColorRgbI32 operator + (const ColorRgbI32& left, const ColorRgbI32& right);
+inline bool operator == (const ColorRgbI32& a, const ColorRgbI32& b);
+inline bool operator != (const ColorRgbI32& a, const ColorRgbI32& b);
+inline ColorRgbaI32 operator *( const ColorRgbaI32& left, float right);
+inline ColorRgbaI32 operator * (const ColorRgbaI32& left, int32_t right);
+inline ColorRgbaI32 operator + (const ColorRgbaI32& left, const ColorRgbaI32& right);
+inline bool operator == (const ColorRgbaI32& a, const ColorRgbaI32& b);
+inline bool operator != (const ColorRgbaI32& a, const ColorRgbaI32& b);
+
 // RGB color with 32 bits per channel
 //   Values outside of the 0..255 byte range may cause unexpected behaviour
 struct ColorRgbI32 {
@@ -36,25 +49,51 @@ struct ColorRgbI32 {
 	ColorRgbI32() : red(0), green(0), blue(0) {}
 	explicit ColorRgbI32(int32_t uniform) : red(uniform), green(uniform), blue(uniform) {}
 	ColorRgbI32(int32_t red, int32_t green, int32_t blue) : red(red), green(green), blue(blue) {}
-	// Clamp to the valid range
-	ColorRgbI32 saturate() const;
-	static ColorRgbI32 mix(const ColorRgbI32& colorA, const ColorRgbI32& colorB, float weight);
-	// Create a color from a string
-	explicit ColorRgbI32(const ReadableString &content);
+	// Get the color clamped to the visible range.
+	ColorRgbI32 saturate() const {
+		int32_t red = this->red;
+		int32_t green = this->green;
+		int32_t blue = this->blue;
+		if (red < 0) { red = 0; }
+		if (red > 255) { red = 255; }
+		if (green < 0) { green = 0; }
+		if (green > 255) { green = 255; }
+		if (blue < 0) { blue = 0; }
+		if (blue > 255) { blue = 255; }
+		return ColorRgbI32(red, green, blue);
+	}
+	static ColorRgbI32 mix(const ColorRgbI32& colorA, const ColorRgbI32& colorB, float weight) {
+		float invWeight = 1.0f - weight;
+		return (colorA * invWeight) + (colorB * weight);
+	}
+	// Create a color from a string.
+	explicit ColorRgbI32(const ReadableString &content) : red(0), green(0), blue(0) {
+		int givenChannels = 0;
+		string_split_callback([this, &givenChannels](ReadableString channelValue) {
+			if (givenChannels == 0) {
+				this->red = string_toInteger(channelValue);
+			} else if (givenChannels == 1) {
+				this->green = string_toInteger(channelValue);
+			} else if (givenChannels == 2) {
+				this->blue = string_toInteger(channelValue);
+			}
+			givenChannels++;
+		}, content, U',');
+	}
 };
-inline ColorRgbI32 operator*(const ColorRgbI32& left, float right) {
+inline ColorRgbI32 operator * (const ColorRgbI32& left, float right) {
 	return ColorRgbI32((float)left.red * right, (float)left.green * right, (float)left.blue * right);
 }
-inline ColorRgbI32 operator*(const ColorRgbI32& left, int32_t right) {
+inline ColorRgbI32 operator * (const ColorRgbI32& left, int32_t right) {
 	return ColorRgbI32(left.red * right, left.green * right, left.blue * right);
 }
-inline ColorRgbI32 operator+(const ColorRgbI32& left, const ColorRgbI32& right) {
+inline ColorRgbI32 operator + (const ColorRgbI32& left, const ColorRgbI32& right) {
 	return ColorRgbI32(left.red + right.red, left.green + right.green, left.blue + right.blue);
 }
-inline bool operator== (const ColorRgbI32& a, const ColorRgbI32& b) {
+inline bool operator == (const ColorRgbI32& a, const ColorRgbI32& b) {
 	return a.red == b.red && a.green == b.green && a.blue == b.blue;
 }
-inline bool operator!= (const ColorRgbI32& a, const ColorRgbI32& b) {
+inline bool operator != (const ColorRgbI32& a, const ColorRgbI32& b) {
 	return !(a == b);
 }
 
@@ -66,54 +105,69 @@ struct ColorRgbaI32 {
 	ColorRgbaI32(ColorRgbI32 rgb, int32_t alpha) : red(rgb.red), green(rgb.green), blue(rgb.blue), alpha(alpha) {}
 	explicit ColorRgbaI32(int32_t uniform) : red(uniform), green(uniform), blue(uniform), alpha(uniform) {}
 	ColorRgbaI32(int32_t red, int32_t green, int32_t blue, int32_t alpha) : red(red), green(green), blue(blue), alpha(alpha) {}
-	// Clamp to the valid range
-	ColorRgbaI32 saturate() const;
-	static ColorRgbaI32 mix(const ColorRgbaI32& colorA, const ColorRgbaI32& colorB, float weight);
-	// Create a color from a string
-	explicit ColorRgbaI32(const ReadableString &content);
+	// Get the color clamped to the visible range.
+	ColorRgbaI32 saturate() const {
+		int32_t red = this->red;
+		int32_t green = this->green;
+		int32_t blue = this->blue;
+		int32_t alpha = this->alpha;
+		if (red < 0) { red = 0; }
+		if (red > 255) { red = 255; }
+		if (green < 0) { green = 0; }
+		if (green > 255) { green = 255; }
+		if (blue < 0) { blue = 0; }
+		if (blue > 255) { blue = 255; }
+		if (alpha < 0) { alpha = 0; }
+		if (alpha > 255) { alpha = 255; }
+		return ColorRgbaI32(red, green, blue, alpha);
+	}
+	static ColorRgbaI32 mix(const ColorRgbaI32& colorA, const ColorRgbaI32& colorB, float weight) {
+		float invWeight = 1.0f - weight;
+		return (colorA * invWeight) + (colorB * weight);
+	}
+	// Create a color from a string.
+	explicit ColorRgbaI32(const ReadableString &content) : red(0), green(0), blue(0), alpha(255) {
+		int givenChannels = 0;
+		string_split_callback([this, &givenChannels](ReadableString channelValue) {
+			if (givenChannels == 0) {
+				this->red = string_toInteger(channelValue);
+			} else if (givenChannels == 1) {
+				this->green = string_toInteger(channelValue);
+			} else if (givenChannels == 2) {
+				this->blue = string_toInteger(channelValue);
+			} else if (givenChannels == 3) {
+				this->alpha = string_toInteger(channelValue);
+			}
+			givenChannels++;
+		}, content, U',');
+	}
 };
-inline ColorRgbaI32 operator*(const ColorRgbaI32& left, float right) {
+inline ColorRgbaI32 operator *( const ColorRgbaI32& left, float right) {
 	return ColorRgbaI32((float)left.red * right, (float)left.green * right, (float)left.blue * right, (float)left.alpha * right);
 }
-inline ColorRgbaI32 operator*(const ColorRgbaI32& left, int32_t right) {
+inline ColorRgbaI32 operator * (const ColorRgbaI32& left, int32_t right) {
 	return ColorRgbaI32(left.red * right, left.green * right, left.blue * right, left.alpha * right);
 }
-inline ColorRgbaI32 operator+(const ColorRgbaI32& left, const ColorRgbaI32& right) {
+inline ColorRgbaI32 operator + (const ColorRgbaI32& left, const ColorRgbaI32& right) {
 	return ColorRgbaI32(left.red + right.red, left.green + right.green, left.blue + right.blue, left.alpha + right.alpha);
 }
-inline bool operator== (const ColorRgbaI32& a, const ColorRgbaI32& b) {
+inline bool operator == (const ColorRgbaI32& a, const ColorRgbaI32& b) {
 	return a.red == b.red && a.green == b.green && a.blue == b.blue && a.alpha == b.alpha;
 }
-inline bool operator!= (const ColorRgbaI32& a, const ColorRgbaI32& b) {
+inline bool operator != (const ColorRgbaI32& a, const ColorRgbaI32& b) {
 	return !(a == b);
 }
 
-// TODO: Can this type be hidden from the external API?
-// RGBA color in arbitrary pack order for speed
-// Use ImageRgbaU8Impl::packRgba to construct for a specific pack order
-union Color4xU8 {
-	uint32_t packed;
-	uint8_t channels[4];
-	Color4xU8() : packed(0) {}
-	explicit Color4xU8(uint32_t packed) : packed(packed) {}
-	Color4xU8(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) : channels{first, second, third, fourth} {}
-	bool isUniformByte() {
-		int first = this->channels[0];
-		return this->channels[1] == first && this->channels[2] == first && this->channels[3] == first;
-	}
-};
-inline bool operator== (const Color4xU8& a, const Color4xU8& b) {
-	return a.packed == b.packed;
+// Serialization
+inline String& string_toStreamIndented(String& target, const ColorRgbI32& source, const ReadableString& indentation) {
+	string_append(target, indentation, source.red, U",", source.green, U",", source.blue);
+	return target;
 }
-inline bool operator!= (const Color4xU8& a, const Color4xU8& b) {
-	return !(a == b);
+inline String& string_toStreamIndented(String& target, const ColorRgbaI32& source, const ReadableString& indentation) {
+	string_append(target, indentation, source.red, U",", source.green, U",", source.blue, U",", source.alpha);
+	return target;
 }
 
-// Serialization
-String& string_toStreamIndented(String& target, const ColorRgbI32& source, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const ColorRgbaI32& source, const ReadableString& indentation);
-String& string_toStreamIndented(String& target, const Color4xU8& source, const ReadableString& indentation);
-
 }
 
 #endif

+ 263 - 30
Source/DFPSR/image/Image.h

@@ -1,6 +1,7 @@
-// zlib open source license
+
+// zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2019 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -21,43 +22,275 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#ifndef DFPSR_IMAGE
-#define DFPSR_IMAGE
+#ifndef DFPSR_IMAGE_TYPES
+#define DFPSR_IMAGE_TYPES
 
-#include <cassert>
-#include <cstdint>
-#include "../base/SafePointer.h"
-#include "../api/bufferAPI.h"
-#include "../math/scalar.h"
-#include "../math/IRect.h"
 #include "PackOrder.h"
+#include "../math/IRect.h"
+#include "../api/bufferAPI.h"
 
 namespace dsr {
 
-// See imageAPI.h for public methods
-// See imageInternal.h for protected methods
-class ImageImpl {
-public:
-	int32_t width, height, stride, pixelSize;
-	Buffer buffer; // Content
-	intptr_t startOffset; // Byte offset of the first pixel
-	bool isSubImage = false;
+enum class ImageFileFormat {
+	Unknown, // Used as an error code for unidentified formats.
+	JPG, // Lossy compressed image format storing brightness separated from red and blue offsets using the discrete cosine transform of each block.
+	PNG, // Lossless compressed image format. Some image editors don't save RGB values where alpha is zero, which will bleed through black edges in bi-linear interpolation when the interpolated alpha is not zero.
+	TGA, // Lossless compressed format. Applications usually give Targa better control over the alpha channel than PNG, but it's more common that the Targa specification is interpreted in incompatible ways.
+	BMP // Uncompressed image format for storing data that does not really represent an image and you just want it to be exact.
+};
+
+// Packed into 2 bits in ImageDimensions.
+enum class PixelFormat : uint32_t {
+	MonoU8, // Gray-scale image of 8 bits per pixel (0..255).
+	MonoU16,
+	MonoF32,
+	RgbaU8 // RGBA colors in any order. 8 bits per channel (0..255). 32 bits per pixel.
+};
+
+// Start offset and stride is stored in pixels and the getters in imageAPI can automatically convert them into byte offsets as needed.
+// Maximum image dimensions are 65536 x 65536, because that will precisely fit the worst case start offset into uint32_t.
+//   maxPixelCount = 65536²             = 4294967296
+//   maxStartOffset = maxPixelCount - 1 = 4294967295
+//   largest uint32_t         = 2³² - 1 = 4294967295
+
+// Because the computer will do bitwise operations to read and write small integers anyway,
+//   there is usually no performance penalty for choosing an odd number of bits to pack more information.
+class ImageDimensions {
+private:
+	// Bit masks and offsets for the properties that are packed into the same 64-bit integer.
+	static const uint64_t  readMask_width         = 0b1111111111111111100000000000000000000000000000000000000000000000; //  0 zeroes, 17 ones, 47 zeroes
+	static const uint32_t  inputMask_width        = 0b11111111111111111                                               ; //            17 ones
+	static const int       bitOffset_width        =                    47                                             ;
+	static const uint64_t  readMask_height        = 0b0000000000000000011111111111111111000000000000000000000000000000; // 17 zeroes, 17 ones, 30 zeroes
+	static const uint32_t  inputMask_height       =                  0b11111111111111111                              ; //            17 ones
+	static const int       bitOffset_height       =                                     30                            ;
+	static const uint64_t  readMask_stride        = 0b0000000000000000000000000000000000111111111111111110000000000000; // 34 zeroes, 17 ones, 13 zeroes
+	static const uint32_t  inputMask_stride       =                                   0b11111111111111111             ; //            17 ones
+	static const int       bitOffset_stride       =                                                      13           ;
+	static const uint64_t  readMask_packOrder     = 0b0000000000000000000000000000000000000000000000000001100000000000; // 51 zeroes,  2 ones, 11 zeroes
+	static const uint32_t  inputMask_packOrder    =                                                    0b11           ; //             2 ones
+	static const int       bitOffset_packOrder    =                                                        11         ;
+	static const uint64_t  readMask_format        = 0b0000000000000000000000000000000000000000000000000000011000000000; // 52 zeroes,  2 ones,  9 zeroes
+	static const uint32_t  inputMask_format       =                                                      0b11         ; //             2 ones
+	static const int       bitOffset_format       =                                                          9        ;
+	static const uint64_t  readMask_subImage      = 0b0000000000000000000000000000000000000000000000000000000100000000; // 55 zeroes,  1 ones,  8 zeroes
+private:
+	// Actual members.
+	uint64_t data = 0;
+	uint32_t pixelStartOffset = 0; // This one fits exactly into 32 bits, but we will have 32 more bits of padding.
+private:
+	// Helper functions.
+	static inline uint64_t readFrom64(const uint64_t &source, uint64_t readMask, uint32_t bitOffset) {
+		return (source & readMask) >> bitOffset;
+	}
 public:
-	// Sub-images
-	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, Buffer buffer, intptr_t startOffset) :
-	  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer), startOffset(startOffset), isSubImage(true) {}
-	// New images
-	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize) :
-	  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer_create(stride * height)), startOffset(0), isSubImage(false) {}
+	// Access to the data.
+	inline uint32_t getWidth() const {
+		return ImageDimensions::readFrom64(this->data, readMask_width, bitOffset_width);
+	}
+	inline uint32_t getHeight() const {
+		return ImageDimensions::readFrom64(this->data, readMask_height, bitOffset_height);
+	}
+	inline uint32_t getPixelStride() const {
+		return ImageDimensions::readFrom64(this->data, readMask_stride, bitOffset_stride);
+	}
+	inline PackOrderIndex getPackOrderIndex() const {
+		return (PackOrderIndex)readFrom64(this->data, readMask_packOrder, bitOffset_packOrder);
+	}
+	inline PixelFormat getPixelFormat() const {
+		return (PixelFormat)readFrom64(this->data, readMask_format, bitOffset_format);
+	}
+	inline bool isSubImage() const {
+		// No need to shift the bit before normalizing, because anything else than zero becomes 1.
+		return (this->data & readMask_subImage) != 0;
+	}
+	inline uint32_t getLog2PixelSize() const {
+		// Shift the constants instead of the index to save a cycle.
+		uint64_t shifterPixelFormatIndex = this->data & readMask_format;
+		if        (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoU8  << bitOffset_format)) {
+			return 0;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoU16 << bitOffset_format)) {
+			return 1;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoF32 << bitOffset_format)) {
+			return 2;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::RgbaU8  << bitOffset_format)) {
+			return 2;
+		} else {
+			return 0; // Unknown pixel format!
+		}
+	}
+	inline uint32_t getPixelSize() const {
+		// Shift the constants instead of the index to save a cycle.
+		uint64_t shifterPixelFormatIndex = this->data & readMask_format;
+		if        (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoU8  << bitOffset_format)) {
+			return 1;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoU16 << bitOffset_format)) {
+			return 2;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::MonoF32 << bitOffset_format)) {
+			return 4;
+		} else if (shifterPixelFormatIndex == ((uint64_t)PixelFormat::RgbaU8  << bitOffset_format)) {
+			return 4;
+		} else {
+			return 0; // Unknown pixel format!
+		}
+	}
+	inline uint32_t getPixelStartOffset() const {
+		return this->pixelStartOffset;
+	}
+	inline uintptr_t getByteStartOffset() const {
+		return uintptr_t(this->getPixelStartOffset()) << this->getLog2PixelSize();
+	}
+	inline uintptr_t getByteStride() const {
+		return uintptr_t(this->getPixelStride()) << this->getLog2PixelSize();
+	}
+	// Constuction that truncates individual inputs in modulo, just to make sure that too large values do not affect other values and make debugging into a nightmare.
+	ImageDimensions(uint32_t width, uint32_t height, uint32_t pixelStride, PackOrderIndex packOrderIndex, PixelFormat pixelFormat, uint32_t pixelStartOffset) noexcept
+	: data(((uint64_t)(           width           & inputMask_width       ) << bitOffset_width)
+	     | ((uint64_t)(           height          & inputMask_height      ) << bitOffset_height)
+	     | ((uint64_t)(           pixelStride     & inputMask_stride      ) << bitOffset_stride)
+	     | ((uint64_t)(((uint32_t)packOrderIndex) & inputMask_packOrder   ) << bitOffset_packOrder)
+	     | ((uint64_t)(((uint32_t)pixelFormat)    & inputMask_format      ) << bitOffset_format)
+	), pixelStartOffset(pixelStartOffset) {}
+	ImageDimensions() {}
+	void setWidthHeightStartSubImage(uint32_t width, uint32_t height, uint32_t pixelStartOffset) {
+		this->data = (this->data & ~(readMask_width | readMask_height))
+		           | ((uint64_t)(width  & inputMask_width  ) << bitOffset_width)
+	               | ((uint64_t)(height & inputMask_height ) << bitOffset_height)
+				   | readMask_subImage;
+		this->pixelStartOffset = pixelStartOffset;
+	}
+};
+
+#define IMPL_IMAGE_CONSTRUCTORS(NEW_TYPE, BASE_TYPE) \
+	NEW_TYPE() {} \
+	NEW_TYPE(const Buffer &buffer, ImageDimensions dimensions) : BASE_TYPE(buffer, dimensions) {} \
+	NEW_TYPE(const NEW_TYPE &source) : BASE_TYPE(source.impl_buffer, source.impl_dimensions) {} \
+	NEW_TYPE(const NEW_TYPE &source, const IRect &region) \
+	: BASE_TYPE(source.impl_buffer, source.impl_dimensions) { \
+		IRect cut = IRect::cut(IRect(0, 0, source.impl_dimensions.getWidth(), source.impl_dimensions.getHeight()), region); \
+		if (cut.hasArea()) { \
+			this->impl_dimensions.setWidthHeightStartSubImage(cut.width(), cut.height(), source.impl_dimensions.getPixelStartOffset() + cut.left() + cut.top() * source.impl_dimensions.getPixelStride()); \
+		} else { \
+			this->impl_buffer = Buffer(); \
+			this->impl_dimensions = ImageDimensions(); \
+		} \
+	}
+
+#define IMPL_IMAGE_HIGHER_CONSTRUCTORS(NEW_TYPE, BASE_TYPE) \
+	IMPL_IMAGE_CONSTRUCTORS(NEW_TYPE, BASE_TYPE) \
+	NEW_TYPE(const Buffer &buffer, uint32_t pixelStartOffset, uint32_t width, uint32_t height, uint32_t pixelStride, const PackOrderIndex &packOrderIndex) \
+	: BASE_TYPE(buffer, pixelStartOffset, width, height, pixelStride, packOrderIndex) {}
+
+// Use imageAPI.h to access the content of images!
+//   The content may change between library versions but is public to simplify access for inlined getters.
+struct Image {
+// PRIVATE:
+// To maintain encapsulation from version specific details, do not touch things starting with IMPL_ or impl_.
+// Use the inlined getters in imageAPI.h instead.
+
+	// Reference counted pointer to the pixel data.
+	Buffer impl_buffer;
+	// Dimensions and pack order of the image.
+	ImageDimensions impl_dimensions;
+	// New.
+	Image(const Buffer &buffer, ImageDimensions dimensions)
+	: impl_buffer(buffer), impl_dimensions(dimensions) {}
+	// Generic cut.
+	Image(const Image &source, const IRect &region)
+	: impl_buffer(source.impl_buffer), impl_dimensions(source.impl_dimensions) {
+		IRect cut = IRect::cut(IRect(0, 0, source.impl_dimensions.getWidth(), source.impl_dimensions.getHeight()), region);
+		if (cut.hasArea()) {
+			this->impl_dimensions.setWidthHeightStartSubImage(cut.width(), cut.height(), source.impl_dimensions.getPixelStartOffset() + cut.left() + cut.top() * source.impl_dimensions.getPixelStride());
+		} else {
+			this->impl_buffer = Buffer();
+			this->impl_dimensions = ImageDimensions();
+		}
+	}
+	// Empty.
+	Image() {}
+};
+
+// Can be unaligned.
+//   Is not allowed to overwrite padding bytes, because it does not know the difference between padding and pixels belonging to a larger image sharing the same pixel buffer.
+struct ImageU8
+: public Image {
+	static const int impl_pixelSize = 1;
+	ImageU8(const Buffer &buffer, uint32_t pixelStartOffset, uint32_t width, uint32_t height, uint32_t pixelStride, const PackOrderIndex &packOrderIndex)
+	: Image(buffer, ImageDimensions(width, height, pixelStride, packOrderIndex, PixelFormat::MonoU8, pixelStartOffset)) {}
+	IMPL_IMAGE_CONSTRUCTORS(ImageU8, Image)
 };
 
-#define IMAGE_DECLARATION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \
-	static void writePixel(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color); \
-	static void writePixel_unsafe(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color); \
-	static COLOR_TYPE readPixel_clamp(const IMAGE_TYPE &image, int32_t x, int32_t y); \
-	static COLOR_TYPE readPixel_unsafe(const IMAGE_TYPE &image, int32_t x, int32_t y);
+// The start of each row is aligned to DSR_MAXIMUM_ALIGNMENT for SIMD vectorization and thread safety.
+//   Owns the padding bytes and may overwrite them during SIMD vectorization.
+struct AlignedImageU8
+: public ImageU8 {
+	IMPL_IMAGE_HIGHER_CONSTRUCTORS(AlignedImageU8, ImageU8)
+};
+
+// Can be unaligned.
+//   Is not allowed to overwrite padding bytes, because it does not know the difference between padding and pixels belonging to a larger image sharing the same pixel buffer.
+struct ImageU16
+: public Image {
+	static const int impl_pixelSize = 2;
+	ImageU16(const Buffer &buffer, uint32_t pixelStartOffset, uint32_t width, uint32_t height, uint32_t pixelStride, const PackOrderIndex &packOrderIndex)
+	: Image(buffer, ImageDimensions(width, height, pixelStride, packOrderIndex, PixelFormat::MonoU16, pixelStartOffset)) {}
+	IMPL_IMAGE_CONSTRUCTORS(ImageU16, Image)
+};
+
+// The start of each row is aligned to DSR_MAXIMUM_ALIGNMENT for SIMD vectorization and thread safety.
+//   Owns the padding bytes and may overwrite them during SIMD vectorization.
+struct AlignedImageU16
+: public ImageU16 {
+	IMPL_IMAGE_HIGHER_CONSTRUCTORS(AlignedImageU16, ImageU16)
+};
+
+// Can be unaligned.
+//   Is not allowed to overwrite padding bytes, because it does not know the difference between padding and pixels belonging to a larger image sharing the same pixel buffer.
+struct ImageF32
+: public Image {
+	static const int impl_pixelSize = 4;
+	ImageF32(const Buffer &buffer, uint32_t pixelStartOffset, uint32_t width, uint32_t height, uint32_t pixelStride, const PackOrderIndex &packOrderIndex)
+	: Image(buffer, ImageDimensions(width, height, pixelStride, packOrderIndex, PixelFormat::MonoF32, pixelStartOffset)) {}
+	IMPL_IMAGE_CONSTRUCTORS(ImageF32, Image)
+};
+
+// The start of each row is aligned to DSR_MAXIMUM_ALIGNMENT for SIMD vectorization and thread safety.
+//   Owns the padding bytes and may overwrite them during SIMD vectorization.
+struct AlignedImageF32
+: public ImageF32 {
+	IMPL_IMAGE_HIGHER_CONSTRUCTORS(AlignedImageF32, ImageF32)
+};
+
+// Can be unaligned.
+//   Is not allowed to overwrite padding bytes, because it does not know the difference between padding and pixels belonging to a larger image sharing the same pixel buffer.
+// Can have any pack order.
+struct ImageRgbaU8
+: public Image {
+	static const int impl_pixelSize = 4;
+	ImageRgbaU8(const Buffer &buffer, uint32_t pixelStartOffset, uint32_t width, uint32_t height, uint32_t pixelStride, const PackOrderIndex &packOrderIndex)
+	: Image(buffer, ImageDimensions(width, height, pixelStride, packOrderIndex, PixelFormat::RgbaU8, pixelStartOffset)) {}
+	IMPL_IMAGE_CONSTRUCTORS(ImageRgbaU8, Image)
+};
+
+// The start of each row is aligned to DSR_MAXIMUM_ALIGNMENT for SIMD vectorization and thread safety.
+//   Owns the padding bytes and may overwrite them during SIMD vectorization.
+// Can have any pack order.
+struct AlignedImageRgbaU8
+: public ImageRgbaU8 {
+	IMPL_IMAGE_HIGHER_CONSTRUCTORS(AlignedImageRgbaU8, ImageRgbaU8)
+};
+
+// The start of each row is aligned to DSR_MAXIMUM_ALIGNMENT for SIMD vectorization and thread safety.
+//   Owns the padding bytes and may overwrite them during SIMD vectorization.
+// Always in RGBA order.
+struct OrderedImageRgbaU8
+: public AlignedImageRgbaU8 {
+	IMPL_IMAGE_HIGHER_CONSTRUCTORS(OrderedImageRgbaU8, AlignedImageRgbaU8)
+};
+
+#undef IMPL_IMAGE_CONSTRUCTORS
+#undef IMPL_IMAGE_HIGHER_CONSTRUCTORS
 
 }
 
 #endif
-

+ 0 - 39
Source/DFPSR/image/ImageF32.cpp

@@ -1,39 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "ImageF32.h"
-#include "internal/imageInternal.h"
-#include "internal/imageTemplate.h"
-
-using namespace dsr;
-
-ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset) :
-  ImageImpl(newWidth, newHeight, newStride, sizeof(float), buffer, startOffset) {
-	assert(buffer_getSize(buffer) - startOffset >= imageInternal::getUsedBytes(this));
-}
-
-ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), DSR_MAXIMUM_ALIGNMENT), sizeof(float)) {
-}
-
-IMAGE_DEFINITION(ImageF32Impl, 1, float, float);

+ 0 - 44
Source/DFPSR/image/ImageF32.h

@@ -1,44 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_F32
-#define DFPSR_IMAGE_F32
-
-#include "Image.h"
-
-namespace dsr {
-
-class ImageF32Impl : public ImageImpl {
-public:
-	static const int32_t channelCount = 1;
-	// Inherit constructors
-	using ImageImpl::ImageImpl;
-	ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset);
-	ImageF32Impl(int32_t newWidth, int32_t newHeight);
-	// Macro defined functions
-	IMAGE_DECLARATION(ImageF32Impl, 1, float, float);
-};
-
-}
-
-#endif

+ 3 - 3
Source/DFPSR/image/ImageLoader.h

@@ -24,7 +24,7 @@
 #ifndef DFPSR_IMAGE_LOADER
 #define DFPSR_IMAGE_LOADER
 
-#include "ImageRgbaU8.h"
+#include "Image.h"
 #include "../base/text.h"
 #include <cstdio>
 
@@ -36,10 +36,10 @@ namespace dsr {
 class ImageLoader {
 public:
 	// Load an image from a file. PNG support is a minimum requirement.
-	virtual ImageRgbaU8Impl loadAsRgba(const String& filename) const = 0;
+	virtual ImageRgbaU8 loadAsRgba(const String& filename) const = 0;
 	// Save an image in the PNG format with the given filename.
 	// Returns true on success and false on failure.
-	virtual bool saveAsPng(const ImageRgbaU8Impl &image, const String& filename) const {
+	virtual bool saveAsPng(const ImageRgbaU8 &image, const String& filename) const {
 		printText("saveAsPng is not yet implemented in the image loader!");
 		return false;
 	}

+ 0 - 378
Source/DFPSR/image/ImageRgbaU8.cpp

@@ -1,378 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2023 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "ImageRgbaU8.h"
-#include "internal/imageInternal.h"
-#include "internal/imageTemplate.h"
-#include "draw.h"
-#include <algorithm>
-#include "../base/simd.h"
-
-using namespace dsr;
-
-static const int pixelSize = 4;
-
-IMAGE_DEFINITION(ImageRgbaU8Impl, pixelSize, Color4xU8, uint8_t);
-
-ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset, const PackOrder &packOrder) :
-  ImageImpl(newWidth, newHeight, newStride, sizeof(Color4xU8), buffer, startOffset), packOrder(packOrder) {
-	assert(buffer_getSize(buffer) - startOffset >= imageInternal::getUsedBytes(this));
-	this->initializeRgbaImage();
-}
-
-ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), DSR_MAXIMUM_ALIGNMENT), sizeof(Color4xU8)) {
-	this->initializeRgbaImage();
-}
-
-// Native canvas constructor
-ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), DSR_MAXIMUM_ALIGNMENT), sizeof(Color4xU8)) {
-	this->packOrder = PackOrder::getPackOrder(packOrderIndex);
-	this->initializeRgbaImage();
-}
-
-bool ImageRgbaU8Impl::isTexture() const {
-	return this->texture.exists();
-}
-
-bool ImageRgbaU8Impl::isTexture(const ImageRgbaU8Impl* image) {
-	return image ? image->texture.exists() : false;
-}
-
-ImageRgbaU8Impl ImageRgbaU8Impl::getWithoutPadding() const {
-	if (this->stride == this->width * pixelSize) {
-		// No padding
-		return *this;
-	} else {
-		// Copy each row without padding
-		ImageRgbaU8Impl result = ImageRgbaU8Impl(this->width, this->height, this->packOrder.packOrderIndex);
-		const SafePointer<uint8_t> sourceRow = imageInternal::getSafeData<uint8_t>(*this);
-		int32_t sourceStride = this->stride;
-		SafePointer<uint8_t> targetRow = imageInternal::getSafeData<uint8_t>(result);
-		int32_t targetStride = result.stride;
-		for (int32_t y = 0; y < this->height; y++) {
-			safeMemoryCopy(targetRow, sourceRow, targetStride);
-			sourceRow += sourceStride;
-			targetRow += targetStride;
-		}
-		return result;
-	}
-}
-
-static void extractChannel(SafePointer<uint8_t> targetData, int targetStride, const SafePointer<uint8_t> sourceData, int sourceStride, int sourceChannels, int channelIndex, int width, int height) {
-	const SafePointer<uint8_t> sourceRow = sourceData + channelIndex;
-	SafePointer<uint8_t> targetRow = targetData;
-	for (int y = 0; y < height; y++) {
-		const SafePointer<uint8_t> sourceElement = sourceRow;
-		SafePointer<uint8_t> targetElement = targetRow;
-		for (int x = 0; x < width; x++) {
-			*targetElement = *sourceElement; // Copy one channel from the soruce
-			sourceElement += sourceChannels; // Jump to the same channel in the next source pixel
-			targetElement += 1; // Jump to the next monochrome target pixel
-		}
-		sourceRow.increaseBytes(sourceStride);
-		targetRow.increaseBytes(targetStride);
-	}
-}
-
-ImageU8Impl ImageRgbaU8Impl::getChannel(int32_t channelIndex) const {
-	// Warning for debug mode
-	assert(channelIndex >= 0 && channelIndex < channelCount);
-	// Safety for release mode
-	if (channelIndex < 0) { channelIndex = 0; }
-	if (channelIndex > channelCount) { channelIndex = channelCount; }
-	ImageU8Impl result(this->width, this->height);
-	extractChannel(imageInternal::getSafeData<uint8_t>(result), result.stride, imageInternal::getSafeData<uint8_t>(*this), this->stride, channelCount, channelIndex, this->width, this->height);
-	return result;
-}
-
-static const int32_t smallestSizeGroup = 5;
-static const int32_t largestSizeGroup = 14;
-static int32_t getSizeGroup(int32_t size) {
-	int32_t group = -1;
-	if (size == 1) {
-		group = 0; // Too small for 16-byte alignment!
-	} else if (size == 2) {
-		group = 1; // Too small for 16-byte alignment! (SSE2)
-	} else if (size == 4) {
-		group = 2; // Too small for 32-byte alignment! (AVX2)
-	} else if (size == 8) {
-		group = 3; // Too small for 64-byte alignment! (AVX3)
-	} else if (size == 16) {
-		group = 4; // Too small for 128-byte alignment!
-	} else if (size == 32) {
-		group = 5; // Smallest allowed texture dimension, allowing 1024-bit SIMD.
-	} else if (size == 64) {
-		group = 6;
-	} else if (size == 128) {
-		group = 7;
-	} else if (size == 256) {
-		group = 8;
-	} else if (size == 512) {
-		group = 9;
-	} else if (size == 1024) {
-		group = 10;
-	} else if (size == 2048) {
-		group = 11;
-	} else if (size == 4096) {
-		group = 12;
-	} else if (size == 8192) {
-		group = 13;
-	} else if (size == 16384) {
-		group = 14; // Largest allowed texture dimension
-	} // Higher dimensions should return -1, so that initializeRgbaImage avoids initializing the image as a texture and isTexture returns false
-	return group;
-}
-
-inline int32_t sizeFromGroup(int32_t group) {
-	return 1 << group;
-}
-
-// Round the size down, unless it is already too small.
-static int32_t roundSize(int32_t size) {
-	for (int groupIndex = smallestSizeGroup; groupIndex < largestSizeGroup; groupIndex++) {
-		int currentSize = sizeFromGroup(groupIndex);
-		if (size < currentSize) {
-			return currentSize;
-		}
-	}
-	return sizeFromGroup(largestSizeGroup);
-}
-
-static int32_t getPyramidSize(int32_t width, int32_t height, int32_t levels) {
-	uint32_t result = 0;
-	uint32_t byteCount = width * height * pixelSize;
-	for (int32_t l = 0; l < levels; l++) {
-		result += byteCount; // Add image size to pyramid size
-		byteCount = byteCount >> 2; // Divide size by 4
-	}
-	return (int32_t)result;
-}
-
-inline U32xX averageColor(const U32xX &colorA, const U32xX &colorB) {
-	// TODO: Expand to 16 bits or use built in average intrinsics for full bit depth.
-	// 7-bit precision for speed.
-	return reinterpret_U32FromU8(reinterpret_U8FromU32((colorA >> 1) & U32xX(0b01111111011111110111111101111111)) + reinterpret_U8FromU32((colorB >> 1) & U32xX(0b01111111011111110111111101111111)));
-}
-
-inline U32xX pairwiseAverageColor(const U32xX &colorA, const U32xX &colorB) {
-	// TODO: Vectorize with 32-bit unzipping of pixels and 8-bit average of channels.
-	// Reference implementation
-	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsA[laneCountX_8Bit];
-	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsB[laneCountX_8Bit];
-	ALIGN_BYTES(DSR_DEFAULT_ALIGNMENT) uint8_t elementsR[laneCountX_8Bit];
-	colorA.writeAlignedUnsafe((uint32_t*)elementsA);
-	colorB.writeAlignedUnsafe((uint32_t*)elementsB);
-	int32_t halfPixels = laneCountX_32Bit / 2;
-	for (int p = 0; p < halfPixels; p++) {
-		for (int c = 0; c < 4; c++) {
-			elementsR[p * 4 + c] = uint8_t((uint16_t(elementsA[p * 8 + c]) + uint16_t(elementsA[p * 8 + 4 + c])) >> 1);
-			elementsR[(p + halfPixels) * 4 + c] = uint8_t((uint16_t(elementsB[p * 8 + c]) + uint16_t(elementsB[p * 8 + 4 + c])) >> 1);
-		}
-	}
-	return U32xX::readAlignedUnsafe((uint32_t*)elementsR);
-}
-
-static void downScaleByTwo(SafePointer<uint32_t> targetData, const SafePointer<uint32_t> sourceData, int32_t targetWidth, int32_t targetHeight, int32_t targetStride) {
-	int32_t sourceStride = targetStride * 2;
-	int32_t doubleSourceStride = sourceStride * 2;
-	SafePointer<uint32_t> targetRow = targetData;
-	const SafePointer<uint32_t> sourceRow = sourceData;
-	for (int32_t y = 0; y < targetHeight; y++) {
-		const SafePointer<uint32_t> upperSourcePixel = sourceRow;
-		const SafePointer<uint32_t> lowerSourcePixel = sourceRow;
-		lowerSourcePixel.increaseBytes(sourceStride);
-		SafePointer<uint32_t> targetPixel = targetRow;
-		for (int32_t x = 0; x < targetWidth; x += laneCountX_32Bit) {
-			U32xX upperLeft = U32xX::readAligned(upperSourcePixel, "upperLeftSource in downScaleByTwo");
-			U32xX upperRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");
-			U32xX lowerLeft = U32xX::readAligned(lowerSourcePixel, "upperLeftSource in downScaleByTwo");
-			U32xX lowerRight = U32xX::readAligned(lowerSourcePixel + laneCountX_32Bit, "upperLeftSource in downScaleByTwo");
-			U32xX upperAverage = pairwiseAverageColor(upperLeft, upperRight);
-			U32xX lowerAverage = pairwiseAverageColor(lowerLeft, lowerRight);
-			U32xX finalAverage = averageColor(upperAverage, lowerAverage);
-			finalAverage.writeAligned(targetPixel, "average result in downScaleByTwo");
-			targetPixel += laneCountX_32Bit;
-			upperSourcePixel += laneCountX_32Bit * 2;
-			lowerSourcePixel += laneCountX_32Bit * 2;
-		}
-		targetRow.increaseBytes(targetStride);
-		sourceRow.increaseBytes(doubleSourceStride);
-	}
-}
-
-static void updatePyramid(TextureRgba &texture, int32_t layerCount) {
-	// Downscale each following layer from the previous.
-	for (int32_t targetIndex = 1; targetIndex < layerCount; targetIndex++) {
-		int32_t sourceIndex = targetIndex - 1;
-		int32_t targetWidth = texture.mips[targetIndex].width;
-		int32_t targetHeight = texture.mips[targetIndex].height;
-		downScaleByTwo(texture.data + texture.mips[targetIndex].startOffset, texture.data + texture.mips[sourceIndex].startOffset, targetWidth, targetHeight, targetWidth * pixelSize);
-	}
-	texture.layerCount = layerCount;
-}
-
-TextureRgbaLayer::TextureRgbaLayer() {}
-
-TextureRgbaLayer::TextureRgbaLayer(uint32_t startOffset, int32_t width, int32_t height) :
-  startOffset(startOffset),
-  widthShift(getSizeGroup(width)),
-  widthMask(width - 1),
-  heightMask(height - 1),
-  width(width),
-  height(height),
-  subWidth(width * 256),
-  subHeight(height * 256) {}
-
-void ImageRgbaU8Impl::generatePyramidStructure(int32_t layerCount) {
-	int32_t currentWidth = this->width;
-	int32_t currentHeight = this->height;
-	// Allocate smaller pyramid images within the buffer
-	uint32_t currentStart = 0;
-	for (int32_t m = 0; m < layerCount; m++) {
-		this->texture.mips[m] = TextureRgbaLayer(currentStart, currentWidth, currentHeight);
-		currentStart += currentWidth * currentHeight;
-		currentWidth /= 2;
-		currentHeight /= 2;
-	}
-	// Fill unused mip levels with duplicates of the last mip level
-	for (int32_t m = layerCount; m < MIP_BIN_COUNT; m++) {
-		// m - 1 is never negative, because layerCount is clamped to at least 1 and nobody would choose zero for MIP_BIN_COUNT.
-		this->texture.mips[m] = this->texture.mips[m - 1];
-	}
-	this->texture.layerCount = layerCount;
-	this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
-}
-
-void ImageRgbaU8Impl::removePyramidStructure() {
-	// The mip layers have offsets relative to the texture's data pointer, which is already compensating for any offset from any parent image.
-	for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {
-		this->texture.mips[m] = TextureRgbaLayer(0, this->width, this->height);
-	}
-	// Declare the old pyramid invalid so that it will not be displayed while rendering, but keep the extra memory for next time it is generated.
-	this->texture.layerCount = 1;
-	this->texture.data = imageInternal::getSafeData<uint32_t>(*this);
-}
-
-void ImageRgbaU8Impl::makeIntoTexture() {
-	// Check if the image is a valid texture.
-	if (!this->isTexture()) {
-		// Get valid dimensions.
-		int newWidth = roundSize(this->width);
-		int newHeight = roundSize(this->height);
-		// Create a new image with the correct dimensions.
-		ImageRgbaU8Impl result = ImageRgbaU8Impl(newWidth, newHeight);
-		// Resize the image content with bi-linear interpolation.
-		imageImpl_resizeToTarget(result, *this, true);
-		// Take over the new image's content.
-		this->buffer = result.buffer;
-		this->width = result.width;
-		this->height = result.height;
-		this->stride = result.stride;
-		this->startOffset = 0; // Starts from the beginning.
-		this->isSubImage = false; // No longer sharing buffer with any parent image.
-	}
-}
-
-void ImageRgbaU8Impl::generatePyramid() {
-	int32_t fullSizeGroup = getSizeGroup(std::min(this->width, this->height));
-	int32_t layerCount = std::min(std::max(fullSizeGroup - smallestSizeGroup, 1), MIP_BIN_COUNT);
-	if (this->texture.layerCount > 1) {
-		// Regenerate smaller images without wasting time with any redundant checks,
-		//   because the image has already been approved the first time it had the pyramid allocated.
-		updatePyramid(this->texture, layerCount);
-	} else {
-		// In the event of having to correct a bad image into a valid texture, there will be two reallocations.
-		this->makeIntoTexture();
-		Buffer oldBuffer = this->buffer;
-		SafePointer<uint32_t> oldData = buffer_getSafeData<uint32_t>(oldBuffer, "Pyramid generation source") + this->startOffset;
-		this->buffer = buffer_create(getPyramidSize(this->width, this->height, layerCount));
-		this->generatePyramidStructure(layerCount);
-		// Copy the image's old content while assuming that there is no padding.
-		safeMemoryCopy(this->texture.data + this->texture.mips[0].startOffset, oldData, this->width * this->height * pixelSize);
-		// Generate smaller images.
-		updatePyramid(this->texture, layerCount);
-		// Once an image had a pyramid generated, the new buffer will remain for as long as the image exists.
-		this->texture.layerCount = layerCount;
-		// Remove start offset because the old data has been cloned to create the new pyramid image.
-		this->startOffset = 0;
-	}
-}
-
-void ImageRgbaU8Impl::removePyramid() {
-	// Duplicate the original image when no longer showing the pyramid.
-	this->removePyramidStructure();
-}
-
-void ImageRgbaU8Impl::initializeRgbaImage() {
-	// If the image fills the criterias of a texture
-	if (getSizeGroup(this->width) >= smallestSizeGroup
-	 && getSizeGroup(this->height) >= smallestSizeGroup
-	 && this->stride == this->width * pixelSize) {
-		// Initialize each mip bin to show the original image
-		this->removePyramidStructure();
-	}
-};
-
-Color4xU8 ImageRgbaU8Impl::packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const {
-	return Color4xU8(this->packOrder.packRgba(red, green, blue, alpha));
-}
-
-Color4xU8 ImageRgbaU8Impl::packRgba(ColorRgbaI32 color) const {
-	return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, color.alpha));
-}
-
-ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba, const PackOrder& order) {
-	return ColorRgbaI32(
-	  getRed(rgba.packed, order),
-	  getGreen(rgba.packed, order),
-	  getBlue(rgba.packed, order),
-	  getAlpha(rgba.packed, order)
-	);
-}
-
-ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba) const {
-	return unpackRgba(rgba, this->packOrder);
-}
-
-Color4xU8 ImageRgbaU8Impl::packRgb(uint8_t red, uint8_t green, uint8_t blue) const {
-	return Color4xU8(this->packOrder.packRgba(red, green, blue, 255));
-}
-
-Color4xU8 ImageRgbaU8Impl::packRgb(ColorRgbI32 color) const {
-	return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, 255));
-}
-
-ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb, const PackOrder& order) {
-	return ColorRgbI32(
-	  getRed(rgb.packed, order),
-	  getGreen(rgb.packed, order),
-	  getBlue(rgb.packed, order)
-	);
-}
-
-ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb) const {
-	return unpackRgb(rgb, this->packOrder);
-}
-

+ 0 - 137
Source/DFPSR/image/ImageRgbaU8.h

@@ -1,137 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2023 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_RGBA_U8
-#define DFPSR_IMAGE_RGBA_U8
-
-#include "Color.h"
-#include "Image.h"
-#include "ImageU8.h"
-
-namespace dsr {
-
-// TODO: Check that the start offsets in mip layers are based on the image's own start offset.
-// TODO: Replace the lookup table for pyramid layers with template inline functions, so that it can be vectorized per pixel or 2x2 group using bitwise operations.
-// TODO: Calculate start offset dynamically for textures.
-//       Keep the same order of mip layers, but mask out offset bits from the right side.
-//       When the most significant bit is masked out, it jumps to the full resoultion image at offset zero.
-//       Offsets
-//         00000000000000000000000000000000 Full resolution of 64x64
-//         00000000000000000000010000000000 Half resolution of 32x32
-//         00000000000000000000010100000000 Quarter resolution of 16x16
-//         00000000000000000000010101000000 Low resolution of 8x8
-//         00000000000000000000010101010000 Lowest resolution of 4x4
-//       Power of 4 offset masks
-//         11111111111111111100000000000000 Show at most 16384 pixels (clamped to full resolution because no more bits are masked out)
-//         11111111111111111111000000000000 Show at most 4096 pixels (full resolution for the image)
-//         11111111111111111111110000000000 Show at most 1024 pixels
-//         11111111111111111111111100000000 Show at most 256 pixels
-//         11111111111111111111111111000000 Show at most 64 pixels
-//         11111111111111111111111111110000 Show at most 16 pixels
-// PROBLEMS:
-//   * How can stride be calculated in the same way as the start offset?
-//     - Consistently in base two, not using the base 4 mask.
-//     - Limited to the range of available resolutions, not going to stride 512 when the full resolution stride is 256.
-//   * What about the width and height masks, can they reuse the same bit masking to avoid looking up data with scalar operations?
-//   * What should be done about very small textures?
-//     Automatically scale them up to the minimum resolution and leave the original image in the middle of the buffer?
-//     Change minimum size requirements?
-//       This would be the simplest approach and nobody would want their textures up-scaled anyway if one can easily redraw images in a higher resolution.
-
-// Pointing to the parent image using raw pointers for fast rendering. May not exceed the lifetime of the parent image!
-struct TextureRgbaLayer {
-	// Offset from the main texture's data pointer in whole texels.
-	uint32_t startOffset = 0;                                           // Generate by and-masking the smallest image's start offset with a double bit shift
-	// How much should we shift one to the left to get the stride in whole texels.
-	int32_t widthShift = 0;                                             // Subtract one per layer 
-	uint32_t widthMask = 0, heightMask = 0;                             // Shift one bit right per layer
-	// TODO: These dimensions are integers added against floats, which is very expensive.
-	//       Try to apply their multiplication against UV coordinates in an integer scale after getting enough bits for both high resolution and many laps around the texture.
-	int32_t width = 0, height = 0;                                      // Shift one bit right per layer
-	float subWidth = 0.0f, subHeight = 0.0f;                            // Try to use integers, so that these can be shifted
-	TextureRgbaLayer();
-	TextureRgbaLayer(uint32_t startOffset, int32_t width, int32_t height);
-};
-
-// TODO: Try to replace with generated bit masks from inline functions.
-#define MIP_BIN_COUNT 5
-
-// Pointing to the parent image using raw pointers for fast rendering. Do not separate from the image!
-struct TextureRgba {
-	SafePointer<uint32_t> data; // Direct access to the shared buffer's content for faster sampling.
-	// TODO: Remove the array, so that any number of layers can be contained by calculating the masks and offsets.
-	// TODO: Store bit masks and offsets needed to quickly generate the memory offsets for a pixel coordinate at a specified mip layer.
-	TextureRgbaLayer mips[MIP_BIN_COUNT]; // Pointing to all mip levels including the original image
-	int32_t layerCount = 0; // 0 Means that there are no pointers, 1 means that you have a pyramid but only one layer.
-	// Can it be sampled as a texture
-	bool exists() const { return this->layerCount > 0; }
-	// Does it have a mip pyramid generated for smoother sampling
-	// TODO: Rename.
-	bool hasMipBuffer() const { return this->layerCount > 1; }
-};
-
-class ImageRgbaU8Impl : public ImageImpl {
-public:
-	static const int32_t channelCount = 4;
-	PackOrder packOrder;
-	// Macro defined functions
-	IMAGE_DECLARATION(ImageRgbaU8Impl, 4, Color4xU8, uint8_t);
-	// Constructors
-	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset, const PackOrder &packOrder);
-	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight);
-	// Native canvas constructor
-	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex);
-	// The texture view for fast reading
-	TextureRgba texture;
-	// Points to level 0 from all bins to allow rendering
-	void initializeRgbaImage();
-	// Resizes the image to valid texture dimensions
-	void makeIntoTexture();
-	void generatePyramid(); // Fills the following bins with smaller images
-	void removePyramid();
-	bool isTexture() const;
-	static bool isTexture(const ImageRgbaU8Impl* image); // Null cannot be sampled as a texture
-private:
-	void generatePyramidStructure(int32_t layerCount);
-	void removePyramidStructure();
-public:
-	// Conversion to monochrome by extracting a channel
-	ImageU8Impl getChannel(int32_t channelIndex) const;
-	// Clone the image without padding or return the same instance if there is no padding
-	// TODO: Return the unaligned image type, which is incompatible with SIMD operations
-	ImageRgbaU8Impl getWithoutPadding() const;
-	// Packs/unpacks the channels of an RGBA color in an unsigned 32-bit integer
-	Color4xU8 packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const;
-	Color4xU8 packRgba(ColorRgbaI32 rgba) const;
-	static ColorRgbaI32 unpackRgba(Color4xU8 rgba, const PackOrder& order);
-	ColorRgbaI32 unpackRgba(Color4xU8 rgba) const;
-	// Packs/unpacks the channels of an RGB color in an unsigned 32-bit integer
-	Color4xU8 packRgb(uint8_t red, uint8_t green, uint8_t blue) const;
-	Color4xU8 packRgb(ColorRgbI32 rgb) const;
-	static ColorRgbI32 unpackRgb(Color4xU8 rgb, const PackOrder& order);
-	ColorRgbI32 unpackRgb(Color4xU8 rgb) const;
-};
-
-}
-
-#endif

+ 0 - 39
Source/DFPSR/image/ImageU16.cpp

@@ -1,39 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "ImageU16.h"
-#include "internal/imageInternal.h"
-#include "internal/imageTemplate.h"
-
-using namespace dsr;
-
-ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset) :
-  ImageImpl(newWidth, newHeight, newStride, sizeof(uint16_t), buffer, startOffset) {
-	assert(buffer_getSize(buffer) - startOffset >= imageInternal::getUsedBytes(this));
-}
-
-ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), DSR_MAXIMUM_ALIGNMENT), sizeof(uint16_t)) {
-}
-
-IMAGE_DEFINITION(ImageU16Impl, 1, uint16_t, uint16_t);

+ 0 - 45
Source/DFPSR/image/ImageU16.h

@@ -1,45 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_U16
-#define DFPSR_IMAGE_U16
-
-#include "Image.h"
-
-namespace dsr {
-
-// Warning! Reading or writing a 16-bit integer as a sequence of 8-bit integers may depend on endianness.
-class ImageU16Impl : public ImageImpl {
-public:
-	static const int32_t channelCount = 1;
-	// Inherit constructors
-	using ImageImpl::ImageImpl;
-	ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset);
-	ImageU16Impl(int32_t newWidth, int32_t newHeight);
-	// Macro defined functions
-	IMAGE_DECLARATION(ImageU16Impl, 1, uint16_t, uint16_t);
-};
-
-}
-
-#endif

+ 0 - 39
Source/DFPSR/image/ImageU8.cpp

@@ -1,39 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "ImageU8.h"
-#include "internal/imageInternal.h"
-#include "internal/imageTemplate.h"
-
-using namespace dsr;
-
-ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset) :
-  ImageImpl(newWidth, newHeight, newStride, sizeof(uint8_t), buffer, startOffset) {
-	assert(buffer_getSize(buffer) - startOffset >= imageInternal::getUsedBytes(this));
-}
-
-ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight) :
-  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), DSR_MAXIMUM_ALIGNMENT), sizeof(uint8_t)) {
-}
-
-IMAGE_DEFINITION(ImageU8Impl, 1, uint8_t, uint8_t);

+ 0 - 45
Source/DFPSR/image/ImageU8.h

@@ -1,45 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_U8
-#define DFPSR_IMAGE_U8
-
-#include "Image.h"
-
-namespace dsr {
-
-class ImageU8Impl : public ImageImpl {
-public:
-	static const int32_t channelCount = 1;
-	// Inherit constructors
-	using ImageImpl::ImageImpl;
-	// Constructors
-	ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, Buffer buffer, intptr_t startOffset);
-	ImageU8Impl(int32_t newWidth, int32_t newHeight);
-	// Macro defined functions
-	IMAGE_DECLARATION(ImageU8Impl, 1, uint8_t, uint8_t);
-};
-
-}
-
-#endif

+ 120 - 119
Source/DFPSR/image/PackOrder.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2023 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,33 +25,54 @@
 #define DFPSR_IMAGE_PACK_ORDER
 
 #include <cstdint>
-#include "../api/types.h"
-#include "../base/simd.h"
+#include "Color.h"
 #include "../base/endian.h"
+#include "../base/DsrTraits.h"
 #include "../api/stringAPI.h"
+#include "../math/scalar.h"
 
 namespace dsr {
 
-// See types.h for the definition of PackOrderIndex
+// The pack order defines where each color channel should be when uint32_t is interpreted as an array of four bytes using local endianness.
+// Packed into 2 bits in ImageDimensions, because one can assume that future pack orders at least have visible colors sorted by wavelength.
+enum class PackOrderIndex : uint32_t {
+	RGBA, // Red   Green Blue  Alpha
+	BGRA, // Blue  Green Red   Alpha
+	ARGB, // Alpha Red   Green Blue
+	ABGR  // Alpha Blue  Green Red
+};
+
+inline String& string_toStreamIndented(String& target, const PackOrderIndex& index, const ReadableString& indentation) {
+	ReadableString name;
+	if (index == PackOrderIndex::RGBA) {
+		name = U"RGBA";
+	} else if (index == PackOrderIndex::BGRA) {
+		name = U"BGRA";
+	} else if (index == PackOrderIndex::ARGB) {
+		name = U"ARGB";
+	} else if (index == PackOrderIndex::ABGR) {
+		name = U"ABGR";
+	} else {
+		name = U"?";
+	}
+	string_append(target, indentation, name);
+	return target;
+}
 
 struct PackOrder {
 public:
-	// The index that it was constructed from
-	PackOrderIndex packOrderIndex;
 	// Byte array indices for each channel
 	// Indices are the locations of each color, not which color that holds each location
 	//   Example:
-	//     The indices for ARGB are (1, 2, 3, 0)
-	//     Because red is second at byte[1], green is third byte[2], blue is last in byte[3] and alpha is first in byte[0]
-	int redIndex, greenIndex, blueIndex, alphaIndex;
+	//     The indices for ARGB are (1, 2, 3, 0), because RGB are placed at byte indices 1..3 and A is placed first at byte index 0.
+	int32_t redIndex, greenIndex, blueIndex, alphaIndex;
 	// Pre-multipled bit offsets
-	int redOffset, greenOffset, blueOffset, alphaOffset;
+	int32_t redOffset, greenOffset, blueOffset, alphaOffset;
 	uint32_t redMask, greenMask, blueMask, alphaMask;
 private:
-	PackOrder(PackOrderIndex packOrderIndex, int redIndex, int greenIndex, int blueIndex, int alphaIndex) :
-	  packOrderIndex(packOrderIndex),
+	constexpr PackOrder(int32_t redIndex, int32_t greenIndex, int32_t blueIndex, int32_t alphaIndex) :
 	  redIndex(redIndex), greenIndex(greenIndex), blueIndex(blueIndex), alphaIndex(alphaIndex),
-	  redOffset(redIndex * 8), greenOffset(greenIndex * 8), blueOffset(blueIndex * 8), alphaOffset(alphaIndex * 8),
+	  redOffset(redIndex << 3), greenOffset(greenIndex << 3), blueOffset(blueIndex << 3), alphaOffset(alphaIndex << 3),
 	  redMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->redOffset)),
 	  greenMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->greenOffset)),
 	  blueMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->blueOffset)),
@@ -59,24 +80,22 @@ private:
 public:
 	// Constructors
 	PackOrder() :
-	  packOrderIndex(PackOrderIndex::RGBA),
 	  redIndex(0), greenIndex(1), blueIndex(2), alphaIndex(3),
 	  redOffset(0), greenOffset(8), blueOffset(16), alphaOffset(24),
 	  redMask(ENDIAN32_BYTE_0), greenMask(ENDIAN32_BYTE_1), blueMask(ENDIAN32_BYTE_2), alphaMask(ENDIAN32_BYTE_3) {}
 	static PackOrder getPackOrder(PackOrderIndex index) {
-		if (index == PackOrderIndex::RGBA) {
-			return PackOrder(index, 0, 1, 2, 3);
-		} else if (index == PackOrderIndex::BGRA) {
-			return PackOrder(index, 2, 1, 0, 3);
+		// Because the PackOrder constuctor is constexpr and all arguments are constant, these pack orders should be generated in compile time.
+		if (index == PackOrderIndex::BGRA) {
+			return PackOrder(2, 1, 0, 3); // PackOrderIndex::BGRA
 		} else if (index == PackOrderIndex::ARGB) {
-			return PackOrder(index, 1, 2, 3, 0);
+			return PackOrder(1, 2, 3, 0); // PackOrderIndex::ARGB
 		} else if (index == PackOrderIndex::ABGR) {
-			return PackOrder(index, 3, 2, 1, 0);
+			return PackOrder(3, 2, 1, 0); // PackOrderIndex::ABGR
 		} else {
-			printText("Warning! Unknown packing order index ", index, ". Falling back on RGBA.");
-			return PackOrder(index, 0, 1, 2, 3);
+			return PackOrder(0, 1, 2, 3); // PackOrderIndex::RGBA
 		}
 	}
+	// Pack the channels into a pixel color.
 	uint32_t packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const {
 		uint32_t result;
 		uint8_t *channels = (uint8_t*)(&result);
@@ -86,68 +105,106 @@ public:
 		channels[this->alphaIndex] = alpha;
 		return result;
 	}
+	// Limit color to a 0..255 range and pack the channels into a pixel color.
+	uint32_t saturateAndPackRgba(const ColorRgbaI32& color) {
+		return this->packRgba(clamp(0, color.red, 255), clamp(0, color.green, 255), clamp(0, color.blue, 255), clamp(0, color.alpha, 255));
+	}
+	// A faster way of limiting input when you are sure that it won't overflow.
+	uint32_t truncateAndPackRgba(const ColorRgbaI32& color) {
+		return this->packRgba((uint8_t)color.red, (uint8_t)color.green, (uint8_t)color.blue, (uint8_t)color.alpha);
+	}
+	// The inverse of packRgba putting the channels back in order.
+	ColorRgbaI32 unpackRgba(uint32_t packedColor) {
+		uint8_t *channels = (uint8_t*)(&packedColor);
+		return ColorRgbaI32(channels[this->redIndex], channels[this->greenIndex], channels[this->blueIndex], channels[this->alphaIndex]);
+	}
 };
 
-inline bool operator==(const PackOrder &left, const PackOrder &right) {
-	return left.packOrderIndex == right.packOrderIndex;
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getRed(U color) {
+	return color & ENDIAN32_BYTE_0;
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getRed(U color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.redMask, U(order.redOffset));
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getGreen(U color) {
+	return ENDIAN_NEG_ADDR_IMM(color & ENDIAN32_BYTE_1, 8);
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getGreen(U color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.greenMask, U(order.greenOffset));
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getBlue(U color) {
+	return ENDIAN_NEG_ADDR_IMM(color & ENDIAN32_BYTE_2, 16);
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getBlue(U color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.blueMask, U(order.blueOffset));
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getAlpha(U color) {
+	return ENDIAN_NEG_ADDR_IMM(color & ENDIAN32_BYTE_3, 24);
+}
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+inline U packOrder_getAlpha(U color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.alphaMask, U(order.alphaOffset));
 }
 
 // Each input 32-bit element is from 0 to 255. Otherwise, the remainder will leak to other elements.
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-T packBytes(const T &s0, const T &s1, const T &s2) {
-	return s0 | ENDIAN_POS_ADDR(s1, 8) | ENDIAN_POS_ADDR(s2, 16);
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+U packOrder_packBytes(const U &s0, const U &s1, const U &s2) {
+	return s0 | ENDIAN_POS_ADDR_IMM(s1, 8) | ENDIAN_POS_ADDR_IMM(s2, 16);
 }
 // Using a specified packing order
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-T packBytes(const T &s0, const T &s1, const T &s2, const PackOrder &order) {
-	return ENDIAN_POS_ADDR(s0, order.redOffset)
-	     | ENDIAN_POS_ADDR(s1, order.greenOffset)
-	     | ENDIAN_POS_ADDR(s2, order.blueOffset);
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+U packOrder_packBytes(const U &s0, const U &s1, const U &s2, const PackOrder &order) {
+	return ENDIAN_POS_ADDR(s0, U(order.redOffset))
+		 | ENDIAN_POS_ADDR(s1, U(order.greenOffset))
+		 | ENDIAN_POS_ADDR(s2, U(order.blueOffset));
 }
 
 // Each input 32-bit element is from 0 to 255. Otherwise, the remainder will leak to other elements.
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-T packBytes(const T &s0, const T &s1, const T &s2, const T &s3) {
-	return s0 | ENDIAN_POS_ADDR(s1, 8) | ENDIAN_POS_ADDR(s2, 16) | ENDIAN_POS_ADDR(s3, 24);
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+U packOrder_packBytes(const U &s0, const U &s1, const U &s2, const U &s3) {
+	return s0 | ENDIAN_POS_ADDR_IMM(s1, 8) | ENDIAN_POS_ADDR_IMM(s2, 16) | ENDIAN_POS_ADDR_IMM(s3, 24);
 }
 // Using a specified packing order
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-T packBytes(const T &s0, const T &s1, const T &s2, const T &s3, const PackOrder &order) {
-	return ENDIAN_POS_ADDR(s0, order.redOffset)
-	     | ENDIAN_POS_ADDR(s1, order.greenOffset)
-	     | ENDIAN_POS_ADDR(s2, order.blueOffset)
-	     | ENDIAN_POS_ADDR(s3, order.alphaOffset);
+template<typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U))> // Accepting uint32_t, U32x4, U32x8 ... U32xX
+U packOrder_packBytes(const U &s0, const U &s1, const U &s2, const U &s3, const PackOrder &order) {
+	return ENDIAN_POS_ADDR(s0, U(order.redOffset))
+		 | ENDIAN_POS_ADDR(s1, U(order.greenOffset))
+		 | ENDIAN_POS_ADDR(s2, U(order.blueOffset))
+		 | ENDIAN_POS_ADDR(s3, U(order.alphaOffset));
 }
 
 // Pack separate floats into saturated bytes
-inline U32x4 floatToSaturatedByte(const F32x4 &s0, const F32x4 &s1, const F32x4 &s2, const F32x4 &s3) {
-	return packBytes(
-	  truncateToU32(s0.clamp(0.1f, 255.1f)),
-	  truncateToU32(s1.clamp(0.1f, 255.1f)),
-	  truncateToU32(s2.clamp(0.1f, 255.1f)),
-	  truncateToU32(s3.clamp(0.1f, 255.1f))
-	);
-}
-inline U32x8 floatToSaturatedByte(const F32x8 &s0, const F32x8 &s1, const F32x8 &s2, const F32x8 &s3) {
-	return packBytes(
+//   From float to uint32_t
+//   From F32x4 to U32x4
+//   From F32x8 to U32x8
+//   From F32xX to U32xX
+//   From F32xF to U32xF
+template<typename U, typename F, DSR_ENABLE_IF(
+	 DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U)
+  && DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F)
+)>
+inline U packOrder_floatToSaturatedByte(const F &s0, const F &s1, const F &s2, const F &s3) {
+	return packOrder_packBytes(
 	  truncateToU32(s0.clamp(0.1f, 255.1f)),
 	  truncateToU32(s1.clamp(0.1f, 255.1f)),
 	  truncateToU32(s2.clamp(0.1f, 255.1f)),
 	  truncateToU32(s3.clamp(0.1f, 255.1f))
 	);
 }
-// Using a specified packing order
-inline U32x4 floatToSaturatedByte(const F32x4 &s0, const F32x4 &s1, const F32x4 &s2, const F32x4 &s3, const PackOrder &order) {
-	return packBytes(
-	  truncateToU32(s0.clamp(0.1f, 255.1f)),
-	  truncateToU32(s1.clamp(0.1f, 255.1f)),
-	  truncateToU32(s2.clamp(0.1f, 255.1f)),
-	  truncateToU32(s3.clamp(0.1f, 255.1f)),
-	  order
-	);
-}
-inline U32x8 floatToSaturatedByte(const F32x8 &s0, const F32x8 &s1, const F32x8 &s2, const F32x8 &s3, const PackOrder &order) {
-	return packBytes(
+// Using a specified pack order
+template<typename U, typename F, DSR_ENABLE_IF(
+	 DSR_CHECK_PROPERTY(DsrTrait_Any_U32, U)
+  && DSR_CHECK_PROPERTY(DsrTrait_Any_F32, F)
+)>
+inline U packOrder_floatToSaturatedByte(const F &s0, const F &s1, const F &s2, const F &s3, const PackOrder &order) {
+	return packOrder_packBytes(
 	  truncateToU32(s0.clamp(0.1f, 255.1f)),
 	  truncateToU32(s1.clamp(0.1f, 255.1f)),
 	  truncateToU32(s2.clamp(0.1f, 255.1f)),
@@ -156,62 +213,6 @@ inline U32x8 floatToSaturatedByte(const F32x8 &s0, const F32x8 &s1, const F32x8
 	);
 }
 
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getRed(T color) {
-	return color & ENDIAN32_BYTE_0;
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getRed(T color, const PackOrder &order) {
-	return ENDIAN_NEG_ADDR(color & order.redMask, order.redOffset);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getGreen(T color) {
-	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_1, 8);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getGreen(T color, const PackOrder &order) {
-	return ENDIAN_NEG_ADDR(color & order.greenMask, order.greenOffset);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getBlue(T color) {
-	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_2, 16);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getBlue(T color, const PackOrder &order) {
-	return ENDIAN_NEG_ADDR(color & order.blueMask, order.blueOffset);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getAlpha(T color) {
-	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_3, 24);
-}
-template<typename T> // Accepting uint32_t, U32x4, U32x8...
-inline T getAlpha(T color, const PackOrder &order) {
-	return ENDIAN_NEG_ADDR(color & order.alphaMask, order.alphaOffset);
-}
-
-inline String getName(PackOrderIndex index) {
-	if (index == PackOrderIndex::RGBA) {
-		return U"RGBA";
-	} else if (index == PackOrderIndex::BGRA) {
-		return U"BGRA";
-	} else if (index == PackOrderIndex::ARGB) {
-		return U"ARGB";
-	} else if (index == PackOrderIndex::ABGR) {
-		return U"ABGR";
-	} else {
-		return U"?";
-	}
-}
-inline String& string_toStreamIndented(String& target, const PackOrderIndex& source, const ReadableString& indentation) {
-	string_append(target, indentation, getName(source));
-	return target;
-}
-inline String& string_toStreamIndented(String& target, const PackOrder& source, const ReadableString& indentation) {
-	string_append(target, indentation, getName(source.packOrderIndex));
-	return target;
-}
-
 }
 
 #endif
-

+ 106 - 0
Source/DFPSR/image/Texture.h

@@ -0,0 +1,106 @@
+
+// zlib open source license
+//
+// Copyright (c) 2025 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_TEXTURE_TYPES
+#define DFPSR_TEXTURE_TYPES
+
+#include "Image.h"
+#include "../base/noSimd.h" // Scalar versions of the SIMD functions for creating template functions both with and without SIMD.
+#include "../base/DsrTraits.h" // Scalar versions of the SIMD functions for creating template functions both with and without SIMD.
+#include "../math/scalar.h"
+
+namespace dsr {
+
+// MIP is a latin acronym "multum in parvo" meaning much in little.
+static const uint32_t DSR_MIP_LEVEL_COUNT = 16;
+
+// Mip index 0 is full resolution.
+// Mip index 1 is half resolution.
+// Mip index 2 is quarter resolution.
+// ...
+struct Texture {
+	Buffer impl_buffer;
+	// Base-two logarithms of the highest resolution.
+	uint32_t impl_log2width = 0;
+	uint32_t impl_log2height = 0;
+	// Mip level indices from 0 to impl_maxMipLevel.
+	uint32_t impl_maxMipLevel = 0;
+	// Number of pixels before the largest mip level.
+	uint32_t impl_startOffset = 0;
+	uint32_t impl_maxLevelMask = 0;
+	// Tiling of unsigned pixel coordinates using bit masks.
+	uint32_t impl_minWidthOrMask = 0;
+	uint32_t impl_minHeightOrMask = 0;
+	uint32_t impl_maxWidthAndMask = 0;
+	uint32_t impl_maxHeightAndMask = 0;
+	// Maximum dimensions for calculating mip level.
+	float impl_floatMaxWidth = 0.0f;
+	float impl_floatMaxHeight = 0.0f;
+	// What each pixel contains.
+	uint8_t impl_pixelFormat = 0;
+	Texture() {}
+	// TODO: Allow creating a single layer from an existing pixel buffer, which must be free from padding.
+	//       If not using multi-threading to write to an image, one can use less than a cache line for alignment.
+	//       Store a bit in image saying if the image is a thread-safe write target with cache aligned rows.
+	Texture(uint32_t log2width, uint32_t log2height, uint32_t maxMipLevel, PixelFormat format, uint32_t pixelSize)
+	: impl_log2width(log2width), impl_log2height(log2height), impl_maxMipLevel(maxMipLevel), impl_pixelFormat(uint8_t(format)) {
+		if (maxMipLevel < 0) maxMipLevel = 0;
+		if (maxMipLevel >= DSR_MIP_LEVEL_COUNT) maxMipLevel = DSR_MIP_LEVEL_COUNT - 1;
+		if ((int32_t)log2width - maxMipLevel < 0 || (int32_t)log2height - maxMipLevel < 0) {
+			// TODO: Indicate failure.
+			this->impl_pixelFormat = 0;
+		} else {
+			uint32_t highestLayerPixelCount = uint32_t(1) << (log2width + log2height);
+			uint64_t pixelCount = 0;
+			uint32_t levelPixelCount = highestLayerPixelCount;
+			for (int32_t level = maxMipLevel; level >= 0; level--) {
+				pixelCount = pixelCount | levelPixelCount;
+				levelPixelCount = levelPixelCount >> 2;
+			}
+			if (pixelCount > 4294967296) {
+				// TODO: Indicate failure to index pixels using 32-bit gather.
+			} else {
+				this->impl_startOffset = (uint32_t)pixelCount & ~highestLayerPixelCount;
+				this->impl_maxLevelMask = highestLayerPixelCount - 1;
+				this->impl_minWidthOrMask = (uint32_t(1) << (log2width - maxMipLevel)) - 1;
+				this->impl_minHeightOrMask = (uint32_t(1) << (log2height - maxMipLevel)) - 1;
+				this->impl_maxWidthAndMask = (uint32_t(1) << log2width) - 1;
+				this->impl_maxHeightAndMask = (uint32_t(1) << log2height) - 1;
+				this->impl_floatMaxWidth = float((uint32_t(1) << log2width));
+				this->impl_floatMaxHeight = float((uint32_t(1) << log2height));
+				this->impl_buffer = buffer_create((uint32_t)pixelCount * pixelSize);
+			}
+		}
+	}
+};
+
+struct TextureRgbaU8 : public Texture {
+	TextureRgbaU8() {}
+	TextureRgbaU8(uint32_t log2width, uint32_t log2height, uint32_t maxMipLevel = DSR_MIP_LEVEL_COUNT - 1)
+	: Texture(log2width, log2height, min(log2width, log2height, maxMipLevel), PixelFormat::RgbaU8, sizeof(uint32_t)) {}
+};
+
+}
+
+#endif

+ 0 - 1576
Source/DFPSR/image/draw.cpp

@@ -1,1576 +0,0 @@
-// zlib open source license
-// zlib open source license
-//
-// Copyright (c) 2018 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#include "../base/simd.h"
-#include "draw.h"
-#include "internal/imageInternal.h"
-#include "../math/scalar.h"
-#include <limits>
-
-using namespace dsr;
-
-// Preconditions:
-//   0 <= a <= 255
-//   0 <= b <= 255
-// Postconditions:
-//   Returns the normalized multiplication of a and b, where the 0..255 range represents decimal values from 0.0 to 1.0.
-//   The result may not be less than zero or larger than any of the inputs.
-// Examples:
-//   normalizedByteMultiplication(0, 0) = 0
-//   normalizedByteMultiplication(x, 0) = 0
-//   normalizedByteMultiplication(0, x) = 0
-//   normalizedByteMultiplication(x, 255) = x
-//   normalizedByteMultiplication(255, x) = x
-//   normalizedByteMultiplication(255, 255) = 255
-static inline uint32_t normalizedByteMultiplication(uint32_t a, uint32_t b) {
-	// Approximate the reciprocal of an unsigned byte's maximum value 255 for normalization
-	//   256³ / 255 ≈ 65793
-	// Truncation goes down, so add half a unit before rounding to get the closest value
-	//   2^24 / 2 = 8388608
-	// No overflow for unsigned 32-bit integers
-	//   255² * 65793 + 8388608 = 4286578433 < 2^32
-	return (a * b * 65793 + 8388608) >> 24;
-}
-
-// True iff high and low bytes are equal
-//   Equivalent to value % 257 == 0 because A + B * 256 = A * 257 when A = B.
-inline bool isUniformByteU16(uint16_t value) {
-	return (value & 0x00FF) == ((value & 0xFF00) >> 8);
-}
-
-// -------------------------------- Drawing shapes --------------------------------
-
-template <typename COLOR_TYPE>
-static inline void drawSolidRectangleAssign(ImageImpl &target, int left, int top, int right, int bottom, COLOR_TYPE color) {
-	int leftBound = std::max(0, left);
-	int topBound = std::max(0, top);
-	int rightBound = std::min(right, target.width);
-	int bottomBound = std::min(bottom, target.height);
-	int stride = target.stride;
-	SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
-	rowData += leftBound;
-	for (int y = topBound; y < bottomBound; y++) {
-		SafePointer<COLOR_TYPE> pixelData = rowData;
-		for (int x = leftBound; x < rightBound; x++) {
-			pixelData.get() = color;
-			pixelData += 1;
-		}
-		rowData.increaseBytes(stride);
-	}
-}
-
-template <typename COLOR_TYPE>
-static inline void drawSolidRectangleMemset(ImageImpl &target, int left, int top, int right, int bottom, uint8_t uniformByte) {
-	int leftBound = std::max(0, left);
-	int topBound = std::max(0, top);
-	int rightBound = std::min(right, target.width);
-	int bottomBound = std::min(bottom, target.height);
-	if (rightBound > leftBound && bottomBound > topBound) {
-		int stride = target.stride;
-		SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
-		rowData += leftBound;
-		int filledWidth = rightBound - leftBound;
-		int rowSize = filledWidth * sizeof(COLOR_TYPE);
-		int rowCount = bottomBound - topBound;
-		if (!target.isSubImage && filledWidth == target.width) {
-			// Write over any padding for parent images owning the whole buffer.
-			// Including parent images with sub-images using the same data
-			//   because no child image may display the parent-image's padding bytes.
-			safeMemorySet(rowData, uniformByte, (stride * (rowCount - 1)) + rowSize);
-		} else if (rowSize == stride) {
-			// When the filled row stretches all the way from left to right in the main allocation
-			//   there's no unseen pixels being overwritten in other images sharing the buffer.
-			// This case handles sub-images that uses the full width of
-			//   the parent image which doesn't have any padding.
-			safeMemorySet(rowData, uniformByte, rowSize * rowCount);
-		} else {
-			// Fall back on using one memset operation per row.
-			// This case is for sub-images that must preserve interleaved pixel rows belonging
-			//   to other images that aren't visible and therefore not owned by this image.
-			for (int y = topBound; y < bottomBound; y++) {
-				safeMemorySet(rowData, uniformByte, rowSize);
-				rowData.increaseBytes(stride);
-			}
-		}
-	}
-}
-
-void dsr::imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color) {
-	if (color < 0) { color = 0; }
-	if (color > 255) { color = 255; }
-	drawSolidRectangleMemset<uint8_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
-}
-
-void dsr::imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color) {
-	if (color < 0) { color = 0; }
-	if (color > 65535) { color = 65535; }
-	uint16_t uColor = color;
-	if (isUniformByteU16(uColor)) {
-		drawSolidRectangleMemset<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
-	} else {
-		drawSolidRectangleAssign<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), uColor);
-	}
-}
-
-void dsr::imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color) {
-	if (color == 0.0f) {
-		drawSolidRectangleMemset<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
-	} else {
-		drawSolidRectangleAssign<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
-	}
-}
-
-void dsr::imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color) {
-	Color4xU8 packedColor = image.packRgba(color.saturate());
-	if (packedColor.isUniformByte()) {
-		drawSolidRectangleMemset<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor.channels[0]);
-	} else {
-		drawSolidRectangleAssign<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
-	}
-}
-
-template <typename IMAGE_TYPE, typename COLOR_TYPE>
-inline void drawLineSuper(IMAGE_TYPE &target, int x1, int y1, int x2, int y2, COLOR_TYPE color) {
-	if (y1 == y2) {
-		// Sideways
-		int left = std::min(x1, x2);
-		int right = std::max(x1, x2);
-		for (int x = left; x <= right; x++) {
-			IMAGE_TYPE::writePixel(target, x, y1, color);
-		}
-	} else if (x1 == x2) {
-		// Down
-		int top = std::min(y1, y2);
-		int bottom = std::max(y1, y2);
-		for (int y = top; y <= bottom; y++) {
-			IMAGE_TYPE::writePixel(target, x1, y, color);
-		}
-	} else {
-		if (std::abs(y2 - y1) >= std::abs(x2 - x1)) {
-			if (y2 < y1) {
-				swap(x1, x2);
-				swap(y1, y2);
-			}
-			assert(y2 > y1);
-			if (x2 > x1) {
-				// Down right
-				int x = x1;
-				int y = y1;
-				int tilt = (x2 - x1) * 2;
-				int maxError = y2 - y1;
-				int error = 0;
-				while (y <= y2) {
-					IMAGE_TYPE::writePixel(target, x, y, color);
-					error += tilt;
-					if (error >= maxError) {
-						x++;
-						error -= maxError * 2;
-					}
-					y++;
-				}
-			} else {
-				// Down left
-				int x = x1;
-				int y = y1;
-				int tilt = (x1 - x2) * 2;
-				int maxError = y2 - y1;
-				int error = 0;
-				while (y <= y2) {
-					IMAGE_TYPE::writePixel(target, x, y, color);
-					error += tilt;
-					if (error >= maxError) {
-						x--;
-						error -= maxError * 2;
-					}
-					y++;
-				}
-			}
-		} else {
-			if (x2 < x1) {
-				swap(x1, x2);
-				swap(y1, y2);
-			}
-			assert(x2 > x1);
-			if (y2 > y1) {
-				// Down right
-				int x = x1;
-				int y = y1;
-				int tilt = (y2 - y1) * 2;
-				int maxError = x2 - x1;
-				int error = 0;
-				while (x <= x2) {
-					IMAGE_TYPE::writePixel(target, x, y, color);
-					error += tilt;
-					if (error >= maxError) {
-						y++;
-						error -= maxError * 2;
-					}
-					x++;
-				}
-			} else {
-				// Up right
-				int x = x1;
-				int y = y1;
-				int tilt = (y1 - y2) * 2;
-				int maxError = x2 - x1;
-				int error = 0;
-				while (x <= x2) {
-					IMAGE_TYPE::writePixel(target, x, y, color);
-					error += tilt;
-					if (error >= maxError) {
-						y--;
-						error -= maxError * 2;
-					}
-					x++;
-				}
-			}
-		}
-	}
-}
-
-void dsr::imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
-	if (color < 0) { color = 0; }
-	if (color > 255) { color = 255; }
-	drawLineSuper<ImageU8Impl, uint8_t>(image, x1, y1, x2, y2, color);
-}
-
-void dsr::imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
-	if (color < 0) { color = 0; }
-	if (color > 65535) { color = 65535; }
-	drawLineSuper<ImageU16Impl, uint16_t>(image, x1, y1, x2, y2, color);
-}
-
-void dsr::imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
-	drawLineSuper<ImageF32Impl, float>(image, x1, y1, x2, y2, color);
-}
-
-void dsr::imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
-	drawLineSuper<ImageRgbaU8Impl, Color4xU8>(image, x1, y1, x2, y2, image.packRgba(color.saturate()));
-}
-
-// -------------------------------- Drawing images --------------------------------
-
-// A packet with the dimensions of an image
-struct ImageDimensions {
-	// width is the number of used pixels on each row.
-	// height is the number of rows.
-	// stride is the byte offset from one row to another including any padding.
-	// pixelSize is the byte offset from one pixel to another from left to right.
-	int32_t width, height, stride, pixelSize;
-	ImageDimensions() : width(0), height(0), stride(0), pixelSize(0) {}
-	ImageDimensions(const ImageImpl& image) :
-	  width(image.width), height(image.height), stride(image.stride), pixelSize(image.pixelSize) {}
-};
-
-struct ImageWriter : public ImageDimensions {
-	uint8_t *data;
-	ImageWriter(const ImageDimensions &dimensions, uint8_t *data) :
-	  ImageDimensions(dimensions), data(data) {}
-};
-
-struct ImageReader : public ImageDimensions {
-	const uint8_t *data;
-	ImageReader(const ImageDimensions &dimensions, const uint8_t *data) :
-	  ImageDimensions(dimensions), data(data) {}
-};
-
-static ImageWriter getWriter(ImageImpl &image) {
-	return ImageWriter(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
-}
-
-static ImageReader getReader(const ImageImpl &image) {
-	return ImageReader(ImageDimensions(image), buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset);
-}
-
-static ImageImpl getGenericSubImage(const ImageImpl &image, int32_t left, int32_t top, int32_t width, int32_t height) {
-	assert(left >= 0 && top >= 0 && width >= 1 && height >= 1 && left + width <= image.width && top + height <= image.height);
-	intptr_t newOffset = image.startOffset + (left * image.pixelSize) + (top * image.stride);
-	return ImageImpl(width, height, image.stride, image.pixelSize, image.buffer, newOffset);
-}
-
-struct ImageIntersection {
-	ImageWriter subTarget;
-	ImageReader subSource;
-	ImageIntersection(const ImageWriter &subTarget, const ImageReader &subSource) :
-	  subTarget(subTarget), subSource(subSource) {}
-	static bool canCreate(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
-		int32_t targetRegionRight = left + source.width;
-		int32_t targetRegionBottom = top + source.height;
-		return left < target.width && top < target.height && targetRegionRight > 0 && targetRegionBottom > 0;
-	}
-	// Only call if canCreate passed with the same arguments
-	static ImageIntersection create(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
-		int32_t targetRegionRight = left + source.width;
-		int32_t targetRegionBottom = top + source.height;
-		assert(ImageIntersection::canCreate(target, source, left, top));
-		// Check if the source has to be clipped
-		if (left < 0 || top < 0 || targetRegionRight > target.width || targetRegionBottom > target.height) {
-			int32_t clipLeft = std::max(0, -left);
-			int32_t clipTop = std::max(0, -top);
-			int32_t clipRight = std::max(0, targetRegionRight - target.width);
-			int32_t clipBottom = std::max(0, targetRegionBottom - target.height);
-			int32_t newWidth = source.width - (clipLeft + clipRight);
-			int32_t newHeight = source.height - (clipTop + clipBottom);
-			assert(newWidth > 0 && newHeight > 0);
-			// Partial drawing
-			ImageImpl subTarget = getGenericSubImage(target, left + clipLeft, top + clipTop, newWidth, newHeight);
-			ImageImpl subSource = getGenericSubImage(source, clipLeft, clipTop, newWidth, newHeight);
-			return ImageIntersection(getWriter(subTarget), getReader(subSource));
-		} else {
-			// Full drawing
-			ImageImpl subTarget = getGenericSubImage(target, left, top, source.width, source.height);
-			return ImageIntersection(getWriter(subTarget), getReader(source));
-		}
-	}
-};
-
-#define ITERATE_ROWS(WRITER, READER, OPERATION) \
-{ \
-	uint8_t *targetRow = WRITER.data; \
-	const uint8_t *sourceRow = READER.data; \
-	for (int32_t y = 0; y < READER.height; y++) { \
-		OPERATION; \
-		targetRow += WRITER.stride; \
-		sourceRow += READER.stride; \
-	} \
-}
-
-#define ITERATE_PIXELS(WRITER, READER, OPERATION) \
-{ \
-	uint8_t *targetRow = WRITER.data; \
-	const uint8_t *sourceRow = READER.data; \
-	for (int32_t y = 0; y < READER.height; y++) { \
-		uint8_t *targetPixel = targetRow; \
-		const uint8_t *sourcePixel = sourceRow; \
-		for (int32_t x = 0; x < READER.width; x++) { \
-			{OPERATION;} \
-			targetPixel += WRITER.pixelSize; \
-			sourcePixel += READER.pixelSize; \
-		} \
-		targetRow += WRITER.stride; \
-		sourceRow += READER.stride; \
-	} \
-}
-
-#define ITERATE_PIXELS_2(WRITER1, READER1, WRITER2, READER2, OPERATION) \
-{ \
-	uint8_t *targetRow1 = WRITER1.data; \
-	uint8_t *targetRow2 = WRITER2.data; \
-	const uint8_t *sourceRow1 = READER1.data; \
-	const uint8_t *sourceRow2 = READER2.data; \
-	int minWidth = std::min(READER1.width, READER2.width); \
-	int minHeight = std::min(READER1.height, READER2.height); \
-	for (int32_t y = 0; y < minHeight; y++) { \
-		uint8_t *targetPixel1 = targetRow1; \
-		uint8_t *targetPixel2 = targetRow2; \
-		const uint8_t *sourcePixel1 = sourceRow1; \
-		const uint8_t *sourcePixel2 = sourceRow2; \
-		for (int32_t x = 0; x < minWidth; x++) { \
-			{OPERATION;} \
-			targetPixel1 += WRITER1.pixelSize; \
-			targetPixel2 += WRITER2.pixelSize; \
-			sourcePixel1 += READER1.pixelSize; \
-			sourcePixel2 += READER2.pixelSize; \
-		} \
-		targetRow1 += WRITER1.stride; \
-		targetRow2 += WRITER2.stride; \
-		sourceRow1 += READER1.stride; \
-		sourceRow2 += READER2.stride; \
-	} \
-}
-
-#define ITERATE_PIXELS_3(WRITER1, READER1, WRITER2, READER2, WRITER3, READER3, OPERATION) \
-{ \
-	uint8_t *targetRow1 = WRITER1.data; \
-	uint8_t *targetRow2 = WRITER2.data; \
-	uint8_t *targetRow3 = WRITER3.data; \
-	const uint8_t *sourceRow1 = READER1.data; \
-	const uint8_t *sourceRow2 = READER2.data; \
-	const uint8_t *sourceRow3 = READER3.data; \
-	int minWidth = std::min(std::min(READER1.width, READER2.width), READER3.width); \
-	int minHeight = std::min(std::min(READER1.height, READER2.height), READER3.height); \
-	for (int32_t y = 0; y < minHeight; y++) { \
-		uint8_t *targetPixel1 = targetRow1; \
-		uint8_t *targetPixel2 = targetRow2; \
-		uint8_t *targetPixel3 = targetRow3; \
-		const uint8_t *sourcePixel1 = sourceRow1; \
-		const uint8_t *sourcePixel2 = sourceRow2; \
-		const uint8_t *sourcePixel3 = sourceRow3; \
-		for (int32_t x = 0; x < minWidth; x++) { \
-			{OPERATION;} \
-			targetPixel1 += WRITER1.pixelSize; \
-			targetPixel2 += WRITER2.pixelSize; \
-			targetPixel3 += WRITER3.pixelSize; \
-			sourcePixel1 += READER1.pixelSize; \
-			sourcePixel2 += READER2.pixelSize; \
-			sourcePixel3 += READER3.pixelSize; \
-		} \
-		targetRow1 += WRITER1.stride; \
-		targetRow2 += WRITER2.stride; \
-		targetRow3 += WRITER3.stride; \
-		sourceRow1 += READER1.stride; \
-		sourceRow2 += READER2.stride; \
-		sourceRow3 += READER3.stride; \
-	} \
-}
-
-static inline int saturateFloat(float value) {
-	if (!(value >= 0.0f)) {
-		// NaN or negative
-		return 0;
-	} else if (value > 255.0f) {
-		// Too large
-		return 255;
-	} else {
-		// Round to closest
-		return (int)(value + 0.5f);
-	}
-}
-
-// Copy data from one image region to another of the same size.
-//   Packing order is reinterpreted without conversion.
-static void copyImageData(ImageWriter writer, ImageReader reader) {
-	assert(writer.width == reader.width && writer.height == reader.height && writer.pixelSize == reader.pixelSize);
-	ITERATE_ROWS(writer, reader, std::memcpy(targetRow, sourceRow, reader.width * reader.pixelSize));
-}
-
-void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		if (target.packOrder == source.packOrder) {
-			// No conversion needed
-			copyImageData(intersection.subTarget, intersection.subSource);
-		} else {
-			// Read and repack to convert between different color formats
-			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-				targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
-				targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
-				targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
-				targetPixel[target.packOrder.alphaIndex] = sourcePixel[source.packOrder.alphaIndex];
-			);
-		}
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		copyImageData(intersection.subTarget, intersection.subSource);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		copyImageData(intersection.subTarget, intersection.subSource);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		copyImageData(intersection.subTarget, intersection.subSource);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			uint8_t luma = *sourcePixel;
-			targetPixel[target.packOrder.redIndex]   = luma;
-			targetPixel[target.packOrder.greenIndex] = luma;
-			targetPixel[target.packOrder.blueIndex]  = luma;
-			targetPixel[target.packOrder.alphaIndex] = 255;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			int luma = *((const uint16_t*)sourcePixel);
-			if (luma > 255) { luma = 255; }
-			targetPixel[target.packOrder.redIndex]   = luma;
-			targetPixel[target.packOrder.greenIndex] = luma;
-			targetPixel[target.packOrder.blueIndex]  = luma;
-			targetPixel[target.packOrder.alphaIndex] = 255;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			int luma = saturateFloat(*((const float*)sourcePixel));
-			targetPixel[target.packOrder.redIndex]   = luma;
-			targetPixel[target.packOrder.greenIndex] = luma;
-			targetPixel[target.packOrder.blueIndex]  = luma;
-			targetPixel[target.packOrder.alphaIndex] = 255;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			*targetPixel = saturateFloat(*((const float*)sourcePixel));
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			int luma = *((const uint16_t*)sourcePixel);
-			if (luma > 255) { luma = 255; }
-			*targetPixel = luma;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			*((uint16_t*)targetPixel) = *sourcePixel;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			int luma = *((const float*)sourcePixel);
-			if (luma < 0) { luma = 0; }
-			if (luma > 65535) { luma = 65535; }
-			*((uint16_t*)targetPixel) = *sourcePixel;
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			*((float*)targetPixel) = (float)(*sourcePixel);
-		);
-	}
-}
-void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			int luma = *((const uint16_t*)sourcePixel);
-			if (luma > 255) { luma = 255; }
-			*((float*)targetPixel) = (float)luma;
-		);
-	}
-}
-
-void dsr::imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		// Read and repack to convert between different color formats
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			// Optimized for anti-aliasing, where most alpha values are 0 or 255
-			uint32_t sourceRatio = sourcePixel[source.packOrder.alphaIndex];
-			if (sourceRatio > 0) {
-				if (sourceRatio == 255) {
-					targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
-					targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
-					targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
-					targetPixel[target.packOrder.alphaIndex] = 255;
-				} else {
-					uint32_t targetRatio = 255 - sourceRatio;
-					targetPixel[target.packOrder.redIndex]   = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.redIndex], sourceRatio);
-					targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.greenIndex], sourceRatio);
-					targetPixel[target.packOrder.blueIndex]  = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(sourcePixel[source.packOrder.blueIndex], sourceRatio);
-					targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
-				}
-			}
-		);
-	}
-}
-
-void dsr::imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		// Read and repack to convert between different color formats
-		if (sourceAlphaOffset == 0) {
-			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-				int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
-				if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
-					targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
-					targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
-					targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
-					targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
-				}
-			);
-		} else {
-			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-				int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
-				if (sourceAlpha > 0) {
-					sourceAlpha += sourceAlphaOffset;
-					if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
-						targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
-						targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
-						targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
-						if (sourceAlpha < 0) { sourceAlpha = 0; }
-						if (sourceAlpha > 255) { sourceAlpha = 255; }
-						targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
-					}
-				}
-			);
-		}
-	}
-}
-
-void dsr::imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t threshold) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		// Read and repack to convert between different color formats
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			if (sourcePixel[source.packOrder.alphaIndex] > threshold) {
-				targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
-				targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
-				targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
-				targetPixel[target.packOrder.alphaIndex] = 255;
-			}
-		);
-	}
-}
-
-template <bool FULL_ALPHA>
-static void drawSilhouette_template(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
-	if (ImageIntersection::canCreate(target, source, left, top)) {
-		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
-		// Read and repack to convert between different color formats
-		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
-			uint32_t sourceRatio;
-			if (FULL_ALPHA) {
-				sourceRatio = *sourcePixel;
-			} else {
-				sourceRatio = normalizedByteMultiplication(*sourcePixel, color.alpha);
-			}
-			if (sourceRatio > 0) {
-				if (sourceRatio == 255) {
-					targetPixel[target.packOrder.redIndex]   = color.red;
-					targetPixel[target.packOrder.greenIndex] = color.green;
-					targetPixel[target.packOrder.blueIndex]  = color.blue;
-					targetPixel[target.packOrder.alphaIndex] = 255;
-				} else {
-					uint32_t targetRatio = 255 - sourceRatio;
-					targetPixel[target.packOrder.redIndex]   = normalizedByteMultiplication(targetPixel[target.packOrder.redIndex], targetRatio) + normalizedByteMultiplication(color.red, sourceRatio);
-					targetPixel[target.packOrder.greenIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.greenIndex], targetRatio) + normalizedByteMultiplication(color.green, sourceRatio);
-					targetPixel[target.packOrder.blueIndex]  = normalizedByteMultiplication(targetPixel[target.packOrder.blueIndex], targetRatio) + normalizedByteMultiplication(color.blue, sourceRatio);
-					targetPixel[target.packOrder.alphaIndex] = normalizedByteMultiplication(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
-				}
-			}
-		);
-	}
-}
-void dsr::imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
-	if (color.alpha > 0) {
-		ColorRgbaI32 saturatedColor = color.saturate();
-		if (color.alpha < 255) {
-			drawSilhouette_template<false>(target, source, saturatedColor, left, top);
-		} else {
-			drawSilhouette_template<true>(target, source, saturatedColor, left, top);
-		}
-	}
-}
-
-void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
-			int32_t newHeight = *((const uint16_t*)sourcePixel);
-			if (newHeight > 0) {
-				newHeight += sourceHeightOffset;
-				if (newHeight < 0) { newHeight = 0; }
-				if (newHeight > 65535) { newHeight = 65535; }
-				if (newHeight > 0 && newHeight > *((uint16_t*)targetPixel)) {
-					*((uint16_t*)targetPixel) = newHeight;
-				}
-			}
-		);
-	}
-}
-void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	assert(sourceA.width == sourceHeight.width);
-	assert(sourceA.height == sourceHeight.height);
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
-		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
-			int32_t newHeight = *((const uint16_t*)sourcePixel1);
-			if (newHeight > 0) {
-				newHeight += sourceHeightOffset;
-				if (newHeight < 0) { newHeight = 0; }
-				if (newHeight > 65535) { newHeight = 65535; }
-				if (newHeight > *((uint16_t*)targetPixel1)) {
-					*((uint16_t*)targetPixel1) = newHeight;
-					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
-					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
-					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
-					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
-				}
-			}
-		);
-	}
-}
-void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
-	assert(sourceA.width == sourceHeight.width);
-	assert(sourceA.height == sourceHeight.height);
-	assert(sourceB.width == sourceHeight.width);
-	assert(sourceB.height == sourceHeight.height);
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
-		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
-		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
-			int32_t newHeight = *((const uint16_t*)sourcePixel1);
-			if (newHeight > 0) {
-				newHeight += sourceHeightOffset;
-				if (newHeight < 0) { newHeight = 0; }
-				if (newHeight > 65535) { newHeight = 65535; }
-				if (newHeight > *((uint16_t*)targetPixel1)) {
-					*((uint16_t*)targetPixel1) = newHeight;
-					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
-					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
-					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
-					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
-					targetPixel3[targetB.packOrder.redIndex]   = sourcePixel3[sourceB.packOrder.redIndex];
-					targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
-					targetPixel3[targetB.packOrder.blueIndex]  = sourcePixel3[sourceB.packOrder.blueIndex];
-					targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
-				}
-			}
-		);
-	}
-}
-
-void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
-			float newHeight = *((const float*)sourcePixel);
-			if (newHeight > -std::numeric_limits<float>::infinity()) {
-				newHeight += sourceHeightOffset;
-				if (newHeight > *((float*)targetPixel)) {
-					*((float*)targetPixel) = newHeight;
-				}
-			}
-		);
-	}
-}
-void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  int32_t left, int32_t top, float sourceHeightOffset) {
-	assert(sourceA.width == sourceHeight.width);
-	assert(sourceA.height == sourceHeight.height);
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
-		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
-			float newHeight = *((const float*)sourcePixel1);
-			if (newHeight > -std::numeric_limits<float>::infinity()) {
-				newHeight += sourceHeightOffset;
-				if (newHeight > *((float*)targetPixel1)) {
-					*((float*)targetPixel1) = newHeight;
-					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
-					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
-					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
-					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
-				}
-			}
-		);
-	}
-}
-void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
-	assert(sourceA.width == sourceHeight.width);
-	assert(sourceA.height == sourceHeight.height);
-	assert(sourceB.width == sourceHeight.width);
-	assert(sourceB.height == sourceHeight.height);
-	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
-		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
-		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
-		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
-		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
-			float newHeight = *((const float*)sourcePixel1);
-			if (newHeight > -std::numeric_limits<float>::infinity()) {
-				newHeight += sourceHeightOffset;
-				if (newHeight > *((float*)targetPixel1)) {
-					*((float*)targetPixel1) = newHeight;
-					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
-					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
-					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
-					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
-					targetPixel3[targetB.packOrder.redIndex]   = sourcePixel3[sourceB.packOrder.redIndex];
-					targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
-					targetPixel3[targetB.packOrder.blueIndex]  = sourcePixel3[sourceB.packOrder.blueIndex];
-					targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
-				}
-			}
-		);
-	}
-}
-
-
-// -------------------------------- Resize --------------------------------
-
-
-static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
-	return U32x4(color.red, color.green, color.blue, color.alpha);
-}
-
-static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
-	UVector4D vResult = color.get();
-	return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
-}
-
-// Uniform linear interpolation of colors from a 16-bit sub-pixel weight
-// Pre-condition0 <= fineRatio <= 65536
-// Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
-static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
-	uint16_t ratio = fineRatio >> 8;
-	uint16_t invRatio = 256 - ratio;
-	U16x8 weightA = U16x8(invRatio);
-	U16x8 weightB = U16x8(ratio);
-	U32x4 lowMask(0x00FF00FFu);
-	U16x8 lowColorA = U16x8(colorA & lowMask);
-	U16x8 lowColorB = U16x8(colorB & lowMask);
-	U32x4 highMask(0xFF00FF00u);
-	U16x8 highColorA = U16x8((colorA & highMask) >> 8);
-	U16x8 highColorB = U16x8((colorB & highMask) >> 8);
-	U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
-	U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
-	return (((lowColor >> 8) & lowMask) | (highColor & highMask));
-}
-
-#define READ_RGBAU8_CLAMP(X,Y) ImageRgbaU8Impl::unpackRgba(ImageRgbaU8Impl::readPixel_clamp(source, X, Y), source.packOrder)
-#define READ_RGBAU8_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_RGBAU8_CLAMP(X,Y))
-
-// Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
-static const uint32_t interpolationFullPixel = 65536;
-static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
-// Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
-static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
-
-template <bool BILINEAR>
-static uint32_t samplePixel(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
-	if (BILINEAR) {
-		uint32_t upperRatio = 65536 - lowerRatio;
-		uint32_t leftRatio = 65536 - rightRatio;
-		U32x4 vUpperLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY);
-		U32x4 vUpperRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY);
-		U32x4 vLowerLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, upperY + 1);
-		U32x4 vLowerRightColor = READ_RGBAU8_CLAMP_SIMD(leftX + 1, upperY + 1);
-		U32x4 vLeftRatio = U32x4(leftRatio);
-		U32x4 vRightRatio = U32x4(rightRatio);
-		U32x4 vUpperColor = ((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio)) >> 16;
-		U32x4 vLowerColor = ((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio)) >> 16;
-		U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
-		return (target.packRgba(U32x4_to_ColorRgbaI32(vCenterColor))).packed;
-	} else {
-		return (target.packRgba(READ_RGBAU8_CLAMP(leftX, upperY))).packed;
-	}
-}
-
-template <bool BILINEAR>
-static uint8_t samplePixel(const ImageU8Impl& target, const ImageU8Impl& source, uint32_t leftX, uint32_t upperY, uint32_t rightRatio, uint32_t lowerRatio) {
-	if (BILINEAR) {
-		uint32_t upperRatio = 65536 - lowerRatio;
-		uint32_t leftRatio = 65536 - rightRatio;
-		uint32_t upperLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY);
-		uint32_t upperRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY);
-		uint32_t lowerLeftLuma = ImageU8Impl::readPixel_clamp(source, leftX, upperY + 1);
-		uint32_t lowerRightLuma = ImageU8Impl::readPixel_clamp(source, leftX + 1, upperY + 1);
-		uint32_t upperLuma = ((upperLeftLuma * leftRatio) + (upperRightLuma * rightRatio)) >> 16;
-		uint32_t lowerLuma = ((lowerLeftLuma * leftRatio) + (lowerRightLuma * rightRatio)) >> 16;
-		return ((upperLuma * upperRatio) + (lowerLuma * lowerRatio)) >> 16;
-	} else {
-		return ImageU8Impl::readPixel_clamp(source, leftX, upperY);
-	}
-}
-
-// BILINEAR: Enables linear interpolation
-// scaleRegion:
-//     The stretched location of the source image in the target image
-//     Making it smaller than the target image will fill the outside with stretched pixels
-//     Allowing the caller to crop away parts of the source image that aren't interesting
-//     Can be used to round the region to a multiple of the input size for a fixed pixel size
-template <bool BILINEAR, typename IMAGE_TYPE, typename PIXEL_TYPE>
-static void resize_reference(IMAGE_TYPE& target, const IMAGE_TYPE& source, const IRect& scaleRegion) {
-	// Reference implementation
-
-	// Offset in source pixels per target pixel
-	int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
-	int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
-	int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
-	int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
-	if (BILINEAR) {
-		startX -= interpolationHalfPixel;
-		startY -= interpolationHalfPixel;
-	}
-	SafePointer<PIXEL_TYPE> targetRow = imageInternal::getSafeData<PIXEL_TYPE>(target);
-	int32_t readY = startY;
-	for (int32_t y = 0; y < target.height; y++) {
-		int32_t naturalY = readY;
-		if (naturalY < 0) { naturalY = 0; }
-		uint32_t sampleY = (uint32_t)naturalY;
-		uint32_t upperY = sampleY >> 16;
-		uint32_t lowerRatio = sampleY & interpolationWeightMask;
-		SafePointer<PIXEL_TYPE> targetPixel = targetRow;
-		int32_t readX = startX;
-		for (int32_t x = 0; x < target.width; x++) {
-			int32_t naturalX = readX;
-			if (naturalX < 0) { naturalX = 0; }
-			uint32_t sampleX = (uint32_t)naturalX;
-			uint32_t leftX = sampleX >> 16;
-			uint32_t rightRatio = sampleX & interpolationWeightMask;
-			*targetPixel = samplePixel<BILINEAR>(target, source, leftX, upperY, rightRatio, lowerRatio);
-			targetPixel += 1;
-			readX += offsetX;
-		}
-		targetRow.increaseBytes(target.stride);
-		readY += offsetY;
-	}
-}
-
-// BILINEAR: Enables linear interpolation
-// SIMD_ALIGNED: Each line starts 16-byte aligned, has a stride divisible with 16-bytes and is allowed to overwrite padding.
-template <bool BILINEAR, bool SIMD_ALIGNED>
-static void resize_optimized(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, const IRect& scaleRegion) {
-	// Get source information
-	// Compare dimensions
-	const bool sameWidth = source.width == scaleRegion.width() && scaleRegion.left() == 0;
-	const bool sameHeight = source.height == scaleRegion.height() && scaleRegion.top() == 0;
-	const bool samePackOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
-	if (sameWidth && sameHeight) {
-		// No need to resize, just make a copy to save time
-		imageImpl_drawCopy(target, source);
-	} else if (sameWidth && (samePackOrder || BILINEAR)) {
-		// Only vertical interpolation
-
-		// Offset in source pixels per target pixel
-		int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
-		int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
-		if (BILINEAR) {
-			startY -= interpolationHalfPixel;
-		}
-		SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
-		int32_t readY = startY;
-		for (int32_t y = 0; y < target.height; y++) {
-			int32_t naturalY = readY;
-			if (naturalY < 0) { naturalY = 0; }
-			uint32_t sampleY = (uint32_t)naturalY;
-			uint32_t upperY = sampleY >> 16;
-			uint32_t lowerY = upperY + 1;
-			if (upperY >= (uint32_t)source.height) upperY = source.height - 1;
-			if (lowerY >= (uint32_t)source.height) lowerY = source.height - 1;
-			if (BILINEAR) {
-				uint32_t lowerRatio = sampleY & interpolationWeightMask;
-				uint32_t upperRatio = 65536 - lowerRatio;
-				SafePointer<uint32_t> targetPixel = targetRow;
-				if (SIMD_ALIGNED) {
-					const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
-					const SafePointer<uint32_t> sourceRowLower = imageInternal::getSafeData<uint32_t>(source, lowerY);
-					for (int32_t x = 0; x < target.width; x += 4) {
-						ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
-						ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
-						ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
-						vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
-						sourceRowUpper += 4;
-						sourceRowLower += 4;
-						targetPixel += 4;
-					}
-				} else {
-					for (int32_t x = 0; x < target.width; x++) {
-						ALIGN16 U32x4 vUpperColor = READ_RGBAU8_CLAMP_SIMD(x, upperY);
-						ALIGN16 U32x4 vLowerColor = READ_RGBAU8_CLAMP_SIMD(x, lowerY);
-						ALIGN16 U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
-						ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
-						*targetPixel = target.packRgba(finalColor).packed;
-						targetPixel += 1;
-					}
-				}
-			} else {
-				const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
-				// Nearest neighbor sampling from a same width can be done using one copy per row
-				safeMemoryCopy(targetRow, sourceRowUpper, source.width * 4);
-			}
-			targetRow.increaseBytes(target.stride);
-			readY += offsetY;
-		}
-	} else if (sameHeight) {
-		// Only horizontal interpolation
-
-		// Offset in source pixels per target pixel
-		int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
-		int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
-		if (BILINEAR) {
-			startX -= interpolationHalfPixel;
-		}
-		SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
-		for (int32_t y = 0; y < target.height; y++) {
-			SafePointer<uint32_t> targetPixel = targetRow;
-			int32_t readX = startX;
-			for (int32_t x = 0; x < target.width; x++) {
-				int32_t naturalX = readX;
-				if (naturalX < 0) { naturalX = 0; }
-				uint32_t sampleX = (uint32_t)naturalX;
-				uint32_t leftX = sampleX >> 16;
-				uint32_t rightX = leftX + 1;
-				uint32_t rightRatio = sampleX & interpolationWeightMask;
-				uint32_t leftRatio = 65536 - rightRatio;
-				ColorRgbaI32 finalColor;
-				if (BILINEAR) {
-					ALIGN16 U32x4 vLeftColor = READ_RGBAU8_CLAMP_SIMD(leftX, y);
-					ALIGN16 U32x4 vRightColor = READ_RGBAU8_CLAMP_SIMD(rightX, y);
-					ALIGN16 U32x4 vCenterColor = ((vLeftColor * leftRatio) + (vRightColor * rightRatio)) >> 16;
-					finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
-				} else {
-					finalColor = READ_RGBAU8_CLAMP(leftX, y);
-				}
-				*targetPixel = target.packRgba(finalColor).packed;
-				targetPixel += 1;
-				readX += offsetX;
-			}
-			targetRow.increaseBytes(target.stride);
-		}
-	} else {
-		// Call the reference implementation
-		resize_reference<BILINEAR, ImageRgbaU8Impl, uint32_t>(target, source, scaleRegion);
-	}
-}
-
-// Returns true iff each line start in image is aligned with 16 bytes
-//   Often not the case for sub-images, even if the parent image is aligned
-static bool imageIs16ByteAligned(const ImageImpl& image) {
-	return (uint32_t)((image.stride & 15) == 0 && ((uintptr_t)(imageInternal::getSafeData<uint8_t>(image).getUnsafe()) & 15) == 0);
-}
-
-// Converting run-time flags into compile-time constants
-static void resize_aux(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
-	// If writing to padding is allowed and both images are 16-byte aligned with the same pack order
-	if (paddWrite && imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
-		// SIMD resize allowed
-		if (interpolate) {
-			resize_optimized<true, true>(target, source, scaleRegion);
-		} else {
-			resize_optimized<false, true>(target, source, scaleRegion);
-		}
-	} else {
-		// Non-SIMD resize
-		if (interpolate) {
-			resize_optimized<true, false>(target, source, scaleRegion);
-		} else {
-			resize_optimized<false, false>(target, source, scaleRegion);
-		}
-	}
-}
-
-// TODO: Optimize monochrome resizing.
-static void resize_aux(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
-	if (interpolate) {
-		resize_reference<true, ImageU8Impl, uint8_t>(target, source, scaleRegion);
-	} else {
-		resize_reference<false, ImageU8Impl, uint8_t>(target, source, scaleRegion);
-	}
-}
-
-// Creating an image to replacedImage with the same pack order as originalImage when applicable to the image format.
-static ImageRgbaU8Impl createWithSamePackOrder(const ImageRgbaU8Impl& originalImage, int32_t width, int32_t height) {
-	return ImageRgbaU8Impl(width, height, originalImage.packOrder.packOrderIndex);
-}
-static ImageU8Impl createWithSamePackOrder(const ImageU8Impl& originalImage, int32_t width, int32_t height) {
-	return ImageU8Impl(width, height);
-}
-
-template <typename IMAGE_TYPE>
-void resizeToTarget(IMAGE_TYPE& target, const IMAGE_TYPE& source, bool interpolate) {
-	IRect scaleRegion = imageInternal::getBound(target);
-	if (target.width != source.width && target.height > source.height) {
-		// Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
-		int tempWidth = target.width;
-		int tempHeight = source.height;
-		IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), source.height);
-		// Create a temporary buffer.
-		IMAGE_TYPE newTempImage = createWithSamePackOrder(target, tempWidth, tempHeight);
-		resize_aux(newTempImage, source, interpolate, true, tempScaleRegion);
-		resize_aux(target, newTempImage, interpolate, true, scaleRegion);
-	} else {
-		// Downscaling or only changing one dimension is faster in one step.
-		resize_aux(target, source, interpolate, true, scaleRegion);
-	}
-}
-
-void dsr::imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate) {
-	resizeToTarget<ImageRgbaU8Impl>(target, source, interpolate);
-}
-
-void dsr::imageImpl_resizeToTarget(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate) {
-	resizeToTarget<ImageU8Impl>(target, source, interpolate);
-}
-
-template <bool CONVERT_COLOR>
-static inline Color4xU8 convertRead(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int x, int y) {
-	Color4xU8 result = ImageRgbaU8Impl::readPixel_clamp(source, x, y);
-	if (CONVERT_COLOR) {
-		result = target.packRgba(ImageRgbaU8Impl::unpackRgba(result, source.packOrder));
-	}
-	return result;
-}
-
-// Used for drawing large pixels
-static inline void fillRectangle(ImageRgbaU8Impl& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const Color4xU8& packedColor) {
-	SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target, pixelTop) + pixelLeft;
-	for (int y = pixelTop; y < pixelBottom; y++) {
-		SafePointer<Color4xU8> targetPixel = targetRow;
-		for (int x = pixelLeft; x < pixelRight; x++) {
-			*targetPixel = packedColor;
-			targetPixel += 1;
-		}
-		targetRow.increaseBytes(target.stride);
-	}
-}
-
-template <bool CONVERT_COLOR>
-static void blockMagnify_reference(
-  ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source,
-  int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
-	int sourceY = 0;
-	int maxSourceX = source.width - 1;
-	int maxSourceY = source.height - 1;
-	if (clipWidth > target.width) { clipWidth = target.width; }
-	if (clipHeight > target.height) { clipHeight = target.height; }
-	for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
-		int sourceX = 0;
-		for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
-			// Read the pixel once
-			Color4xU8 sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
-			// Write to all target pixels in a conditionless loop
-			fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
-			// Iterate and clamp the read coordinate
-			sourceX++;
-			if (sourceX > maxSourceX) { sourceX = maxSourceX; }
-		}
-		// Iterate and clamp the read coordinate
-		sourceY++;
-		if (sourceY > maxSourceY) { sourceY = maxSourceY; }
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 2 == 0
-//   * clipHeight % 2 == 0
-static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	int blockTargetStride = target.stride * 2;
-	for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		// Write to whole multiples of 8 pixels
-		int writeLeftX = 0;
-		while (writeLeftX + 2 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue;
-			targetPixelA += 2;
-			targetPixelB += 2;
-			// Count
-			writeLeftX += 2;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 3 == 0
-//   * clipHeight % 3 == 0
-static void blockMagnify_3x3(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	int blockTargetStride = target.stride * 3;
-	for (int upperTargetY = 0; upperTargetY + 3 <= clipHeight; upperTargetY+=3) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		int writeLeftX = 0;
-		while (writeLeftX + 3 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue;
-			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue;
-			targetPixelA += 3;
-			targetPixelB += 3;
-			targetPixelC += 3;
-			// Count
-			writeLeftX += 3;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-		targetRowC.increaseBytes(blockTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 4 == 0
-//   * clipHeight % 4 == 0
-static void blockMagnify_4x4(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
-	int quadTargetStride = target.stride * 4;
-	for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		SafePointer<uint32_t> targetPixelD = targetRowD;
-		int writeLeftX = 0;
-		while (writeLeftX + 4 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Convert scalar to SIMD vector of 4 repeated pixels
-			ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
-			// Write to 4x4 pixels using 4 SIMD writes
-			sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
-			sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
-			sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
-			sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
-			targetPixelA += 4;
-			targetPixelB += 4;
-			targetPixelC += 4;
-			targetPixelD += 4;
-			// Count
-			writeLeftX += 4;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(quadTargetStride);
-		targetRowB.increaseBytes(quadTargetStride);
-		targetRowC.increaseBytes(quadTargetStride);
-		targetRowD.increaseBytes(quadTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 5 == 0
-//   * clipHeight % 5 == 0
-static void blockMagnify_5x5(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
-	SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
-	int blockTargetStride = target.stride * 5;
-	for (int upperTargetY = 0; upperTargetY + 5 <= clipHeight; upperTargetY+=5) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		SafePointer<uint32_t> targetPixelD = targetRowD;
-		SafePointer<uint32_t> targetPixelE = targetRowE;
-		int writeLeftX = 0;
-		while (writeLeftX + 5 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue;
-			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue;
-			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue;
-			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue;
-			targetPixelA += 5;
-			targetPixelB += 5;
-			targetPixelC += 5;
-			targetPixelD += 5;
-			targetPixelE += 5;
-			// Count
-			writeLeftX += 5;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-		targetRowC.increaseBytes(blockTargetStride);
-		targetRowD.increaseBytes(blockTargetStride);
-		targetRowE.increaseBytes(blockTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 6 == 0
-//   * clipHeight % 6 == 0
-static void blockMagnify_6x6(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
-	SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
-	SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
-	int blockTargetStride = target.stride * 6;
-	for (int upperTargetY = 0; upperTargetY + 6 <= clipHeight; upperTargetY+=6) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		SafePointer<uint32_t> targetPixelD = targetRowD;
-		SafePointer<uint32_t> targetPixelE = targetRowE;
-		SafePointer<uint32_t> targetPixelF = targetRowF;
-		int writeLeftX = 0;
-		while (writeLeftX + 6 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue;
-			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue;
-			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue;
-			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue;
-			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue;
-			targetPixelA += 6;
-			targetPixelB += 6;
-			targetPixelC += 6;
-			targetPixelD += 6;
-			targetPixelE += 6;
-			targetPixelF += 6;
-			// Count
-			writeLeftX += 6;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-		targetRowC.increaseBytes(blockTargetStride);
-		targetRowD.increaseBytes(blockTargetStride);
-		targetRowE.increaseBytes(blockTargetStride);
-		targetRowF.increaseBytes(blockTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 7 == 0
-//   * clipHeight % 7 == 0
-static void blockMagnify_7x7(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
-	SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
-	SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
-	SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
-	int blockTargetStride = target.stride * 7;
-	for (int upperTargetY = 0; upperTargetY + 7 <= clipHeight; upperTargetY+=7) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		SafePointer<uint32_t> targetPixelD = targetRowD;
-		SafePointer<uint32_t> targetPixelE = targetRowE;
-		SafePointer<uint32_t> targetPixelF = targetRowF;
-		SafePointer<uint32_t> targetPixelG = targetRowG;
-		int writeLeftX = 0;
-		while (writeLeftX + 7 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue;
-			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue;
-			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue;
-			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue;
-			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue;
-			targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue;
-			targetPixelA += 7;
-			targetPixelB += 7;
-			targetPixelC += 7;
-			targetPixelD += 7;
-			targetPixelE += 7;
-			targetPixelF += 7;
-			targetPixelG += 7;
-			// Count
-			writeLeftX += 7;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-		targetRowC.increaseBytes(blockTargetStride);
-		targetRowD.increaseBytes(blockTargetStride);
-		targetRowE.increaseBytes(blockTargetStride);
-		targetRowF.increaseBytes(blockTargetStride);
-		targetRowG.increaseBytes(blockTargetStride);
-	}
-}
-
-// Pre-condition:
-//   * The source and target images have the same pack order
-//   * Both source and target are 16-byte aligned, but does not have to own their padding
-//   * clipWidth % 8 == 0
-//   * clipHeight % 8 == 0
-static void blockMagnify_8x8(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
-	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
-	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
-	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
-	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
-	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
-	SafePointer<uint32_t> targetRowE = imageInternal::getSafeData<uint32_t>(target, 4);
-	SafePointer<uint32_t> targetRowF = imageInternal::getSafeData<uint32_t>(target, 5);
-	SafePointer<uint32_t> targetRowG = imageInternal::getSafeData<uint32_t>(target, 6);
-	SafePointer<uint32_t> targetRowH = imageInternal::getSafeData<uint32_t>(target, 7);
-	int blockTargetStride = target.stride * 8;
-	for (int upperTargetY = 0; upperTargetY + 8 <= clipHeight; upperTargetY+=8) {
-		// Carriage return
-		const SafePointer<uint32_t> sourcePixel = sourceRow;
-		SafePointer<uint32_t> targetPixelA = targetRowA;
-		SafePointer<uint32_t> targetPixelB = targetRowB;
-		SafePointer<uint32_t> targetPixelC = targetRowC;
-		SafePointer<uint32_t> targetPixelD = targetRowD;
-		SafePointer<uint32_t> targetPixelE = targetRowE;
-		SafePointer<uint32_t> targetPixelF = targetRowF;
-		SafePointer<uint32_t> targetPixelG = targetRowG;
-		SafePointer<uint32_t> targetPixelH = targetRowH;
-		int writeLeftX = 0;
-		while (writeLeftX + 8 <= clipWidth) {
-			// Read one pixel at a time
-			uint32_t scalarValue = *sourcePixel;
-			sourcePixel += 1;
-			// Write to a whole block of pixels
-			targetPixelA[0] = scalarValue; targetPixelA[1] = scalarValue; targetPixelA[2] = scalarValue; targetPixelA[3] = scalarValue; targetPixelA[4] = scalarValue; targetPixelA[5] = scalarValue; targetPixelA[6] = scalarValue; targetPixelA[7] = scalarValue;
-			targetPixelB[0] = scalarValue; targetPixelB[1] = scalarValue; targetPixelB[2] = scalarValue; targetPixelB[3] = scalarValue; targetPixelB[4] = scalarValue; targetPixelB[5] = scalarValue; targetPixelB[6] = scalarValue; targetPixelB[7] = scalarValue;
-			targetPixelC[0] = scalarValue; targetPixelC[1] = scalarValue; targetPixelC[2] = scalarValue; targetPixelC[3] = scalarValue; targetPixelC[4] = scalarValue; targetPixelC[5] = scalarValue; targetPixelC[6] = scalarValue; targetPixelC[7] = scalarValue;
-			targetPixelD[0] = scalarValue; targetPixelD[1] = scalarValue; targetPixelD[2] = scalarValue; targetPixelD[3] = scalarValue; targetPixelD[4] = scalarValue; targetPixelD[5] = scalarValue; targetPixelD[6] = scalarValue; targetPixelD[7] = scalarValue;
-			targetPixelE[0] = scalarValue; targetPixelE[1] = scalarValue; targetPixelE[2] = scalarValue; targetPixelE[3] = scalarValue; targetPixelE[4] = scalarValue; targetPixelE[5] = scalarValue; targetPixelE[6] = scalarValue; targetPixelE[7] = scalarValue;
-			targetPixelF[0] = scalarValue; targetPixelF[1] = scalarValue; targetPixelF[2] = scalarValue; targetPixelF[3] = scalarValue; targetPixelF[4] = scalarValue; targetPixelF[5] = scalarValue; targetPixelF[6] = scalarValue; targetPixelF[7] = scalarValue;
-			targetPixelG[0] = scalarValue; targetPixelG[1] = scalarValue; targetPixelG[2] = scalarValue; targetPixelG[3] = scalarValue; targetPixelG[4] = scalarValue; targetPixelG[5] = scalarValue; targetPixelG[6] = scalarValue; targetPixelG[7] = scalarValue;
-			targetPixelH[0] = scalarValue; targetPixelH[1] = scalarValue; targetPixelH[2] = scalarValue; targetPixelH[3] = scalarValue; targetPixelH[4] = scalarValue; targetPixelH[5] = scalarValue; targetPixelH[6] = scalarValue; targetPixelH[7] = scalarValue;
-			targetPixelA += 8;
-			targetPixelB += 8;
-			targetPixelC += 8;
-			targetPixelD += 8;
-			targetPixelE += 8;
-			targetPixelF += 8;
-			targetPixelG += 8;
-			targetPixelH += 8;
-			// Count
-			writeLeftX += 8;
-		}
-		// Line feed
-		sourceRow.increaseBytes(source.stride);
-		targetRowA.increaseBytes(blockTargetStride);
-		targetRowB.increaseBytes(blockTargetStride);
-		targetRowC.increaseBytes(blockTargetStride);
-		targetRowD.increaseBytes(blockTargetStride);
-		targetRowE.increaseBytes(blockTargetStride);
-		targetRowF.increaseBytes(blockTargetStride);
-		targetRowG.increaseBytes(blockTargetStride);
-		targetRowH.increaseBytes(blockTargetStride);
-	}
-}
-
-static void blackEdges(ImageRgbaU8Impl& target, int excludedWidth, int excludedHeight) {
-	// Right side
-	drawSolidRectangleMemset<Color4xU8>(target, excludedWidth, 0, target.width, excludedHeight, 0);
-	// Bottom and corner
-	drawSolidRectangleMemset<Color4xU8>(target, 0, excludedHeight, target.width, target.height, 0);
-}
-
-void dsr::imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight) {
-	if (pixelWidth < 1) { pixelWidth = 1; }
-	if (pixelHeight < 1) { pixelHeight = 1; }
-	bool sameOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
-	// Find the part of source which fits into target with whole pixels
-	int clipWidth = roundDown(std::min(target.width, source.width * pixelWidth), pixelWidth);
-	int clipHeight = roundDown(std::min(target.height, source.height * pixelHeight), pixelHeight);
-	if (sameOrder) {
-		if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
-			if (pixelWidth == 2 && pixelHeight == 2) {
-				blockMagnify_2x2(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 3 && pixelHeight == 3) {
-				blockMagnify_3x3(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 4 && pixelHeight == 4) {
-				blockMagnify_4x4(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 5 && pixelHeight == 5) {
-				blockMagnify_5x5(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 6 && pixelHeight == 6) {
-				blockMagnify_6x6(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 7 && pixelHeight == 7) {
-				blockMagnify_7x7(target, source, clipWidth, clipHeight);
-			} else if (pixelWidth == 8 && pixelHeight == 8) {
-				blockMagnify_8x8(target, source, clipWidth, clipHeight);
-			} else {
-				blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
-			}
-		} else {
-			blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
-		}
-	} else {
-		blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
-	}
-	blackEdges(target, clipWidth, clipHeight);
-}

+ 0 - 94
Source/DFPSR/image/draw.h

@@ -1,94 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2018 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_DRAW
-#define DFPSR_IMAGE_DRAW
-
-#include "Image.h"
-#include "ImageU8.h"
-#include "ImageU16.h"
-#include "ImageF32.h"
-#include "ImageRgbaU8.h"
-
-namespace dsr {
-
-// An internal draw API to allow having multiple external APIs without code duplication
-
-void imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color);
-void imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color);
-void imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color);
-void imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color);
-
-void imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
-void imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
-void imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color);
-void imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color);
-
-// Integer formats of different size are treated as having the same scale but different ranges
-void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
-
-void imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0);
-void imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0, int32_t sourceAlphaOffset = 0);
-void imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0, int32_t threshold = 0);
-void imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left = 0, int32_t top = 0);
-
-void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
-void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
-void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
-void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
-void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
-void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
-  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
-
-// Pre-conditions:
-//     * target must own its padding
-//       This is automatically true for aligned images
-//       If the target does not own its padding, any pixels being treated as padding at the end of each line may become visible artifacts in another image sharing the buffer.
-// Side-effects:
-//     * Writes a resized version of source to target, including padding
-//     * May also write to any pixels in wideTempImage, including padding
-//     * May also change the pack order of wideTempImage
-void imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate);
-void imageImpl_resizeToTarget(ImageU8Impl& target, const ImageU8Impl& source, bool interpolate);
-void imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight);
-void imageImpl_blockMagnify_aligned(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight);
-
-}
-
-#endif
-

+ 0 - 82
Source/DFPSR/image/internal/imageInternal.h

@@ -1,82 +0,0 @@
-
-// zlib open source license
-//
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_INTERNAL
-#define DFPSR_IMAGE_INTERNAL
-
-#include "../Image.h"
-#include "../ImageRgbaU8.h"
-
-namespace dsr {
-namespace imageInternal {
-
-//inline int32_t getWidth(const ImageImpl &image) { return image.width; }
-inline int32_t getWidth(const ImageImpl *image) { return image ? image->width : 0; }
-//inline int32_t getHeight(const ImageImpl &image) { return image.height; }
-inline int32_t getHeight(const ImageImpl *image) { return image ? image->height : 0; }
-//inline int32_t getStride(const ImageImpl &image) { return image.stride; }
-inline int32_t getStride(const ImageImpl *image) { return image ? image->stride : 0; }
-inline int32_t getRowSize(const ImageImpl &image) { return image.width * image.pixelSize; }
-inline int32_t getRowSize(const ImageImpl *image) { return image ? getRowSize(*image) : 0; }
-inline int32_t getUsedBytes(const ImageImpl &image) { return (image.stride * (image.height - 1)) + (image.width * image.pixelSize); }
-inline int32_t getUsedBytes(const ImageImpl *image) { return image ? getUsedBytes(*image) : 0; }
-//inline int32_t getPixelSize(const ImageImpl &image) { return image.pixelSize; }
-inline int32_t getPixelSize(const ImageImpl *image) { return image ? image->pixelSize : 0; }
-//inline int32_t getStartOffset(const ImageImpl &image) { return image.startOffset; }
-inline int32_t getStartOffset(const ImageImpl *image) { return image ? image->startOffset : 0; }
-inline Buffer getBuffer(const ImageImpl &image) { return image.buffer; }
-inline Buffer getBuffer(const ImageImpl *image) { return image ? getBuffer(*image) : Buffer(); }
-inline IRect getBound(const ImageImpl &image) { return IRect(0, 0, image.width, image.height); }
-inline IRect getBound(const ImageImpl *image) { return image ? getBound(*image) : IRect(); }
-inline PackOrder getPackOrder(const ImageRgbaU8Impl *image) { return image ? image->packOrder : PackOrder(); }
-
-// Get data
-//   The pointer has access to the whole parent buffer,
-//   to allow aligning SIMD vectors outside of the used region.
-template <typename T>
-static inline const SafePointer<T> getSafeData(const ImageImpl &image, int rowIndex = 0) {
-	auto result = buffer_getSafeData<T>(image.buffer, "Image buffer");
-	result.increaseBytes(image.startOffset + image.stride * rowIndex);
-	return result;
-}
-template <typename T>
-inline const SafePointer<T> getSafeData(const ImageImpl *image, int rowIndex = 0) {
-	return image ? getSafeData<T>(*image, rowIndex) : SafePointer<T>("Null image buffer");
-}
-template <typename T>
-static inline SafePointer<T> getSafeData(ImageImpl &image, int rowIndex = 0) {
-	auto result = buffer_getSafeData<T>(image.buffer, "Image buffer");
-	result.increaseBytes(image.startOffset + image.stride * rowIndex);
-	return result;
-}
-template <typename T>
-inline SafePointer<T> getSafeData(ImageImpl *image, int rowIndex = 0) {
-	return image ? getSafeData<T>(*image, rowIndex) : SafePointer<T>("Null image buffer");
-}
-
-}
-}
-
-#endif
-

+ 0 - 65
Source/DFPSR/image/internal/imageTemplate.h

@@ -1,65 +0,0 @@
-// zlib open source license
-//
-// Copyright (c) 2018 to 2019 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
-
-#ifndef DFPSR_IMAGE_TEMPLATE
-#define DFPSR_IMAGE_TEMPLATE
-
-#include "imageInternal.h"
-#include "../../math/scalar.h"
-#include "../Image.h"
-#include <limits>
-
-namespace dsr {
-
-// TODO: Remove clamped pixel operation
-// Each image type must define initializeImage instead of a constructor;
-// These macros are used to compile instances of template functions because it's much safer than exposing header defined template classes.
-#define IMAGE_DEFINITION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \
-	void IMAGE_TYPE::writePixel(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color) { \
-		if (x >= 0 && x < image.width && y >= 0 && y < image.height) { \
-			*(COLOR_TYPE*)(buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)) = color; \
-		} \
-	} \
-	void IMAGE_TYPE::writePixel_unsafe(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color) { \
-		*(COLOR_TYPE*)(buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)) = color; \
-	} \
-	COLOR_TYPE IMAGE_TYPE::readPixel_clamp(const IMAGE_TYPE &image, int32_t x, int32_t y) { \
-		if (image.width > 0 && image.height > 0) { \
-			if (x < 0) { x = 0; } \
-			if (y < 0) { y = 0; } \
-			if (x >= image.width) { x = image.width - 1; } \
-			if (y >= image.height) { y = image.height - 1; } \
-			return *(COLOR_TYPE*)(buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)); \
-		} else { \
-			return COLOR_TYPE(); \
-		} \
-	} \
-	COLOR_TYPE IMAGE_TYPE::readPixel_unsafe(const IMAGE_TYPE &image, int32_t x, int32_t y) { \
-		assert(x >= 0 && x < image.width && y >= 0 && y < image.height); \
-		return *(COLOR_TYPE*)(buffer_dangerous_getUnsafeData(image.buffer) + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)); \
-	}
-
-}
-
-#endif
-

+ 3 - 1
Source/DFPSR/image/stbImage/stbImageWrapper.cpp

@@ -7,9 +7,11 @@
 
 #include "stbImageWrapper.h"
 
+#include "../../api/imageAPI.h"
+
 namespace dsr {
 
-OrderedImageRgbaU8 image_stb_decode_RgbaU8(const SafePointer<uint8_t> data, int size) {
+OrderedImageRgbaU8 image_stb_decode_RgbaU8(SafePointer<const uint8_t> data, int size) {
 	#ifdef SAFE_POINTER_CHECKS
 		// If the safe pointer has debug information, use it to assert that size is within bound.
 		data.assertInside("image_stb_decode_RgbaU8 (data)", data.getUnsafe(), (size_t)size);

+ 2 - 3
Source/DFPSR/image/stbImage/stbImageWrapper.h

@@ -2,13 +2,12 @@
 #ifndef DFPSR_API_IMAGE_STB_WRAPPER
 #define DFPSR_API_IMAGE_STB_WRAPPER
 
-#include "../../api/imageAPI.h"
+#include "../../image/Image.h"
 #include "../../api/stringAPI.h"
-#include "../../api/types.h"
 
 namespace dsr {
 
-OrderedImageRgbaU8 image_stb_decode_RgbaU8(const SafePointer<uint8_t> data, int size);
+OrderedImageRgbaU8 image_stb_decode_RgbaU8(SafePointer<const uint8_t> data, int size);
 
 // Pre-conditions:
 // * The image must be packed in RGBA order at runtime, but can't be in the OrderedImageRgbaU8 format because Ordered inherits from Aligned.

+ 1 - 0
Source/DFPSR/includeFramework.h

@@ -12,6 +12,7 @@
 
 	// 2D API
 	#include "api/imageAPI.h" // Creating images and modifying pixels
+	#include "api/textureAPI.h" // Creating textures and sampling pixels
 	#include "api/drawAPI.h" // Efficient drawing on images
 	#include "api/filterAPI.h" // Efficient image generation, resizing and filtering
 	// 3D API

+ 3 - 3
Source/DFPSR/machine/VirtualMachine.cpp

@@ -26,7 +26,7 @@
 
 using namespace dsr;
 
-VirtualMachine::VirtualMachine(const ReadableString& code, const std::shared_ptr<PlanarMemory>& memory,
+VirtualMachine::VirtualMachine(const ReadableString& code, const Handle<PlanarMemory>& memory,
   const InsSig* machineInstructions, int32_t machineInstructionCount,
   const VMTypeDef* machineTypes, int32_t machineTypeCount)
 : memory(memory), machineInstructions(machineInstructions), machineInstructionCount(machineInstructionCount),
@@ -466,7 +466,7 @@ void VirtualMachine::executeMethod(int methodIndex) {
 				}
 				printText(U")");
 			}
-			word->operation(*this, *(this->memory.get()), word->args);
+			word->operation(*this, this->memory.getReference(), word->args);
 			if (signature) {
 				if (signature->targetCount > 0) {
 					printText(U" -> ");
@@ -480,7 +480,7 @@ void VirtualMachine::executeMethod(int methodIndex) {
 			}
 			printText(U"\n");
 		#else
-			word->operation(*this, *(this->memory.get()), word->args);
+			word->operation(*this, this->memory.getReference(), word->args);
 		#endif
 	}
 	#ifdef VIRTUAL_MACHINE_PROFILE

+ 4 - 4
Source/DFPSR/machine/VirtualMachine.h

@@ -264,7 +264,7 @@ struct VirtualMachine {
 	// Methods
 	List<Method> methods;
 	// Memory
-	std::shared_ptr<PlanarMemory> memory;
+	Handle<PlanarMemory> memory;
 	// Instruction types
 	const InsSig* machineInstructions; int32_t machineInstructionCount;
 	const InsSig* getMachineInstructionFromFunction(MachineOperation functionPointer) {
@@ -296,7 +296,7 @@ struct VirtualMachine {
 		return nullptr;
 	}
 	// Constructor
-	VirtualMachine(const ReadableString& code, const std::shared_ptr<PlanarMemory>& memory,
+	VirtualMachine(const ReadableString& code, const Handle<PlanarMemory>& memory,
 	  const InsSig* machineInstructions, int32_t machineInstructionCount,
 	  const VMTypeDef* machineTypes, int32_t machineTypeCount);
 
@@ -357,7 +357,7 @@ struct VirtualMachine {
 					fullContent = false;
 				#endif
 				if (typeDefinition) {
-					typeDefinition->debugPrinter(*(this->memory.get()), *variable, globalIndex, framePointer, fullContent);
+					typeDefinition->debugPrinter(*(this->memory.getUnsafe()), *variable, globalIndex, framePointer, fullContent);
 					if (globalIndex < 0) {
 						printText(U" @gi(", globalIndex, U")");
 					} else {
@@ -375,7 +375,7 @@ struct VirtualMachine {
 				printText(indentation, U"* ", getName(variable->access), U" ");
 				const VMTypeDef* typeDefinition = getMachineType(variable->typeDescription->dataType);
 				if (typeDefinition) {
-					typeDefinition->debugPrinter(*(this->memory.get()), *variable, variable->getGlobalIndex(), framePointer, false);
+					typeDefinition->debugPrinter(*(this->memory.getUnsafe()), *variable, variable->getGlobalIndex(), framePointer, false);
 				} else {
 					printText(U"?");
 				}

+ 7 - 8
Source/DFPSR/machine/mediaFilters.cpp

@@ -23,7 +23,6 @@
 
 #include "mediaFilters.h"
 #include "../base/simd.h"
-#include "../api/imageAPI.h"
 #include "../api/drawAPI.h"
 
 using namespace dsr;
@@ -145,10 +144,10 @@ void dsr::media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 imageA, A
 		SafePointer<uint8_t> sourcePixelA = sourceRowA;
 		SafePointer<uint8_t> sourcePixelB = sourceRowB;
 		for (int32_t x = 0; x < image_getWidth(targetImage); x += 16) {
-			U8x16 colorA = U8x16::readAligned(sourcePixelA, "media_filter_add (sourcePixelA)");
-			U8x16 colorB = U8x16::readAligned(sourcePixelB, "media_filter_add (sourcePixelB)");
+			U8x16 colorA = U8x16::readAligned(sourcePixelA, "media_filter_sub (sourcePixelA)");
+			U8x16 colorB = U8x16::readAligned(sourcePixelB, "media_filter_sub (sourcePixelB)");
 			U8x16 result = saturatedSubtraction(colorA, colorB);
-			result.writeAligned(targetPixel, "media_filter_add (targetPixel)");
+			result.writeAligned(targetPixel, "media_filter_sub (targetPixel)");
 			targetPixel += 16;
 			sourcePixelA += 16;
 			sourcePixelB += 16;
@@ -174,9 +173,9 @@ void dsr::media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 image, in
 		SafePointer<uint8_t> targetPixel = targetRow;
 		SafePointer<uint8_t> sourcePixel = sourceRowA;
 		for (int32_t x = 0; x < image_getWidth(targetImage); x += 16) {
-			U8x16 colorA = U8x16::readAligned(sourcePixel, "media_filter_add (sourcePixel)");
+			U8x16 colorA = U8x16::readAligned(sourcePixel, "media_filter_sub (sourcePixel)");
 			U8x16 result = saturatedSubtraction(colorA, repeatedLuma);
-			result.writeAligned(targetPixel, "media_filter_add (targetPixel)");
+			result.writeAligned(targetPixel, "media_filter_sub (targetPixel)");
 			targetPixel += 16;
 			sourcePixel += 16;
 		}
@@ -200,9 +199,9 @@ void dsr::media_filter_sub(AlignedImageU8& targetImage, int32_t luma, AlignedIma
 		SafePointer<uint8_t> targetPixel = targetRow;
 		SafePointer<uint8_t> sourcePixel = sourceRowA;
 		for (int32_t x = 0; x < image_getWidth(targetImage); x += 16) {
-			U8x16 colorA = U8x16::readAligned(sourcePixel, "media_filter_add (sourcePixel)");
+			U8x16 colorA = U8x16::readAligned(sourcePixel, "media_filter_sub (sourcePixel)");
 			U8x16 result = saturatedSubtraction(repeatedLuma, colorA);
-			result.writeAligned(targetPixel, "media_filter_add (targetPixel)");
+			result.writeAligned(targetPixel, "media_filter_sub (targetPixel)");
 			targetPixel += 16;
 			sourcePixel += 16;
 		}

+ 2 - 1
Source/DFPSR/machine/mediaFilters.h

@@ -24,7 +24,8 @@
 #ifndef DFPSR_MEDIA_FILTERS
 #define DFPSR_MEDIA_FILTERS
 
-#include "../../DFPSR/includeFramework.h" // TODO: Replace with specific modules
+#include "../api/imageAPI.h"
+#include "../math/FixedPoint.h"
 
 namespace dsr {
 

+ 31 - 23
Source/DFPSR/math/scalar.h

@@ -25,53 +25,63 @@
 #define DFPSR_MATH_SCALAR
 
 #include <cmath>
+#include "../base/DsrTraits.h"
 
 namespace dsr {
 
 // A minimum function that can take more than two arguments.
 // Post-condition: Returns the smallest of all given values, which must be comparable using the < operator and have the same type.
-template <typename T>
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline T min(const T &a, const T &b) {
 	return (a < b) ? a : b;
 }
-template <typename T, typename... TAIL>
+template <typename T, typename... TAIL, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline T min(const T &a, const T &b, TAIL... tail) {
 	return min(min(a, b), tail...);
 }
 
 // A maximum function that can take more than two arguments.
 // Post-condition: Returns the largest of all given values, which must be comparable using the > operator and have the same type.
-template <typename T>
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline T max(const T &a, const T &b) {
 	return (a > b) ? a : b;
 }
-template <typename T, typename... TAIL>
+template <typename T, typename... TAIL, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline T max(const T &a, const T &b, TAIL... tail) {
 	return max(max(a, b), tail...);
 }
 
+// Pre-condition: minValue <= maxValue
+// Post-condition: Returns value clamped from minValue to maxValue.
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
+T clamp(const T &minValue, T value, const T &maxValue) {
+	if (value > maxValue) value = maxValue;
+	if (value < minValue) value = minValue;
+	return value;
+}
+
 // Returns a modulo b where 0 <= a < b
-inline int signedModulo(int a, int b) {
-	int result = 0;
-	if (b > 0) {
-		if (a >= 0) {
-			result = a % b; // Simple modulo
-		} else {
-			result = (b - (-a % b)) % b; // Negative modulo
-		}
+template <typename I, typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar_SignedInteger, I) && DSR_CHECK_PROPERTY(DsrTrait_Scalar_Integer, U))>
+inline int32_t signedModulo(I a, U b) {
+	if (a >= 0) {
+		return a % b; // Simple modulo
+	} else {
+		return (b - (-a % b)) % b; // Negative modulo
 	}
-	return result;
 }
 
-inline int roundUp(int size, int alignment) {
+template <typename I, typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar_SignedInteger, I) && DSR_CHECK_PROPERTY(DsrTrait_Scalar_Integer, U))>
+inline I roundUp(I size, U alignment) {
 	return size + (alignment - 1) - signedModulo(size - 1, alignment);
 }
 
-inline int roundDown(int size, int alignment) {
+template <typename I, typename U, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar_SignedInteger, I) && DSR_CHECK_PROPERTY(DsrTrait_Scalar_Integer, U))>
+inline I roundDown(I size, U alignment) {
 	return size - signedModulo(size, alignment);
 }
 
-inline float absDiff(float a, float b) {
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar_Floating, T))>
+inline T absDiff(T a, T b) {
 	float result = a - b;
 	if (result < 0.0f) {
 		result = -result;
@@ -80,7 +90,7 @@ inline float absDiff(float a, float b) {
 }
 
 inline uint8_t absDiff(uint8_t a, uint8_t b) {
-	int result = (int)a - (int)b;
+	int32_t result = (int32_t)a - (int32_t)b;
 	if (result < 0) {
 		result = -result;
 	}
@@ -88,15 +98,14 @@ inline uint8_t absDiff(uint8_t a, uint8_t b) {
 }
 
 inline uint16_t absDiff(uint16_t a, uint16_t b) {
-	int result = (int)a - (int)b;
+	int32_t result = (int32_t)a - (int32_t)b;
 	if (result < 0) {
 		result = -result;
 	}
 	return (uint16_t)result;
 }
 
-// Allowing compilation on older C++ versions
-// Only use for trivial types if you want to avoid cloning and destruction
+// Only use this for trivial types, use std::swap for objects with non-trivial construction.
 template <typename T>
 inline void swap(T &a, T &b) {
 	T temp = a;
@@ -105,7 +114,7 @@ inline void swap(T &a, T &b) {
 }
 
 // More compact than min(a, b) when reading from the target
-template <typename T>
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline void replaceWithSmaller(T &target, const T &source) {
 	if (source < target) {
 		target = source;
@@ -113,7 +122,7 @@ inline void replaceWithSmaller(T &target, const T &source) {
 }
 
 // More compact than max(a, b) when reading from the target
-template <typename T>
+template <typename T, DSR_ENABLE_IF(DSR_CHECK_PROPERTY(DsrTrait_Scalar, T))>
 inline void replaceWithLarger(T &target, const T &source) {
 	if (source > target) {
 		target = source;
@@ -123,4 +132,3 @@ inline void replaceWithLarger(T &target, const T &source) {
 }
 
 #endif
-

+ 13 - 13
Source/DFPSR/persistent/ClassFactory.cpp

@@ -35,8 +35,8 @@ public:
 };
 static List<ConstructorInfo> persistentClasses;
 
-std::shared_ptr<StructureDefinition> Persistent::getStructure() const {
-	return std::shared_ptr<StructureDefinition>();
+Handle<StructureDefinition> Persistent::getStructure() const {
+	return Handle<StructureDefinition>();
 }
 
 static int findPersistentClass(const String &type) {
@@ -61,7 +61,7 @@ void Persistent::registerPersistentClass() {
 	}
 }
 
-bool Persistent::addChild(std::shared_ptr<Persistent> child) {
+bool Persistent::addChild(Handle<Persistent> child) {
 	return false;
 }
 
@@ -69,8 +69,8 @@ int Persistent::getChildCount() const {
 	return 0;
 }
 
-std::shared_ptr<Persistent> Persistent::getChild(int index) const {
-	return std::shared_ptr<Persistent>();
+Handle<Persistent> Persistent::getChild(int index) const {
+	return Handle<Persistent>();
 }
 
 void Persistent::setProperty(const ReadableString &key, const ReadableString &value, const ReadableString &fromPath) {
@@ -96,8 +96,8 @@ bool Persistent::assignValue(const ReadableString &content, const ReadableString
 }
 
 String& Persistent::toStreamIndented(String& out, const ReadableString& indentation) const {
-	std::shared_ptr<StructureDefinition> structure = this->getStructure();
-	if (structure.get() == nullptr) {
+	Handle<StructureDefinition> structure = this->getStructure();
+	if (structure.isNull()) {
 		throwError(U"Failed to get the structure of a class being serialized.\n");
 	}
 	string_append(out, indentation, U"Begin : ", structure->name, U"\n");
@@ -122,7 +122,7 @@ String& Persistent::toStreamIndented(String& out, const ReadableString& indentat
 	return out;
 }
 
-std::shared_ptr<Persistent> dsr::createPersistentClass(const String &type, bool mustExist) {
+Handle<Persistent> dsr::createPersistentClass(const String &type, bool mustExist) {
 	// Look for the component
 	int existingIndex = findPersistentClass(type);
 	if (existingIndex > -1) {
@@ -132,12 +132,12 @@ std::shared_ptr<Persistent> dsr::createPersistentClass(const String &type, bool
 		throwError(U"Failed to default create a class named ", type, U". Call registerPersistentClass on a temporary instance of the class to register the name.\n");
 	}
 	// Failed to load by name
-	return std::shared_ptr<Persistent>(); // Null
+	return Handle<Persistent>(); // Null
 }
 
-std::shared_ptr<Persistent> dsr::createPersistentClassFromText(const ReadableString &text, const ReadableString &fromPath) {
-	std::shared_ptr<Persistent> rootObject, newObject;
-	List<std::shared_ptr<Persistent>> stack;
+Handle<Persistent> dsr::createPersistentClassFromText(const ReadableString &text, const ReadableString &fromPath) {
+	Handle<Persistent> rootObject, newObject;
+	List<Handle<Persistent>> stack;
 	string_split_callback([&rootObject, &newObject, &stack, &fromPath](ReadableString line) {
 		int equalityIndex = string_findFirst(line, '=');
 		if (equalityIndex > -1) {
@@ -153,7 +153,7 @@ std::shared_ptr<Persistent> dsr::createPersistentClassFromText(const ReadableStr
 				if (string_caseInsensitiveMatch(keyword, U"Begin")) {
 					String type = string_removeOuterWhiteSpace(string_after(line, colonIndex));
 					newObject = createPersistentClass(type);
-					if (rootObject.get() == nullptr) {
+					if (rootObject.isNull()) {
 						rootObject = newObject;
 					} else {
 						if (!(stack.last()->addChild(newObject))) {

+ 15 - 19
Source/DFPSR/persistent/ClassFactory.h

@@ -34,31 +34,31 @@ namespace dsr {
 class Persistent;
 
 // Reference method for creating a persistent class
-inline std::shared_ptr<Persistent> classConstructor() {
-	return std::shared_ptr<Persistent>(); // Null
+inline dsr::Handle<Persistent> classConstructor() {
+	return dsr::Handle<Persistent>(); // Null
 }
 
 // Must be used in each class inheriting from Persistent (both directly and indirectly)
 #define PERSISTENT_DECLARATION(CLASS) \
-	std::shared_ptr<dsr::StructureDefinition> getStructure() const override; \
+	dsr::Handle<dsr::StructureDefinition> getStructure() const override; \
 	decltype(&dsr::classConstructor) getConstructor() const override; \
 	explicit CLASS(const dsr::ReadableString &content, const dsr::ReadableString &fromPath);
 
 // Must be used in the implementation of each class inheriting from Persistent
 #define PERSISTENT_DEFINITION(CLASS) \
-	std::shared_ptr<dsr::StructureDefinition> CLASS##Type; \
-	std::shared_ptr<dsr::StructureDefinition> CLASS::getStructure() const { \
-		if (CLASS##Type.get() == nullptr) { \
-			CLASS##Type = std::make_shared<dsr::StructureDefinition>(U ## #CLASS); \
-			this->declareAttributes(*(CLASS##Type)); \
+	dsr::Handle<dsr::StructureDefinition> CLASS##Type; \
+	dsr::Handle<dsr::StructureDefinition> CLASS::getStructure() const { \
+		if (CLASS##Type.isNull()) { \
+			CLASS##Type = dsr::handle_create<dsr::StructureDefinition>(U ## #CLASS).setName("Persistent " #CLASS " StructureDefinition"); \
+			this->declareAttributes((CLASS##Type).getReference()); \
 		} \
 		return CLASS##Type; \
 	} \
 	CLASS::CLASS(const dsr::ReadableString &content, const dsr::ReadableString &fromPath) { \
 		this->assignValue(content, fromPath); \
 	} \
-	std::shared_ptr<dsr::Persistent> CLASS##Constructor() { \
-		return std::dynamic_pointer_cast<dsr::Persistent>(std::make_shared<CLASS>()); \
+	dsr::Handle<dsr::Persistent> CLASS##Constructor() { \
+		return dsr::handle_dynamicCast<dsr::Persistent>(dsr::handle_create<CLASS>().setName("Persistent " #CLASS)); \
 	} \
 	decltype(&dsr::classConstructor) CLASS::getConstructor() const { \
 		return &CLASS##Constructor; \
@@ -94,7 +94,7 @@ class Persistent : public Printable {
 public:
 	// Persistent attributes may not be write protected
 	virtual Persistent* findAttribute(const ReadableString &name);
-	virtual std::shared_ptr<StructureDefinition> getStructure() const;
+	virtual Handle<StructureDefinition> getStructure() const;
 	virtual decltype(&classConstructor) getConstructor() const = 0;
 	// Call from the start of main, to allow constructing the class by name
 	void registerPersistentClass();
@@ -108,9 +108,9 @@ public:
 	// Attempt to add another persistent object
 	//   Return false if the child object was rejected
 	//   Make sure that connections that would create an infinite loop are rejected
-	virtual bool addChild(std::shared_ptr<Persistent> child);
+	virtual bool addChild(Handle<Persistent> child);
 	virtual int getChildCount() const;
-	virtual std::shared_ptr<Persistent> getChild(int index) const;
+	virtual Handle<Persistent> getChild(int index) const;
 public:
 	// Override for new compound types
 
@@ -128,10 +128,6 @@ public:
 	// Save to a stream using any indentation
 	virtual String& toStreamIndented(String& out, const ReadableString& indentation) const override;
 };
-// Save to a stream without indentation
-inline std::ostream& operator<< (std::ostream& out, const Persistent& p) {
-	return p.toStream(out);
-}
 
 // Macro to be placed at the start of the global main function
 //   The dsr namespace must be used to access registerPersistentClass
@@ -139,10 +135,10 @@ inline std::ostream& operator<< (std::ostream& out, const Persistent& p) {
 (CLASS().registerPersistentClass());
 
 // Create a single class instance without any content
-std::shared_ptr<Persistent> createPersistentClass(const String &type, bool mustExist = true);
+Handle<Persistent> createPersistentClass(const String &type, bool mustExist = true);
 
 // Create a class instance from text
-std::shared_ptr<Persistent> createPersistentClassFromText(const ReadableString &text, const ReadableString &fromPath);
+Handle<Persistent> createPersistentClassFromText(const ReadableString &text, const ReadableString &fromPath);
 
 }
 

+ 45 - 9
Source/DFPSR/render/ResourcePool.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -24,10 +24,12 @@
 #include "ResourcePool.h"
 #include "../image/stbImage/stbImageWrapper.h"
 #include "../api/fileAPI.h"
+#include "../api/imageAPI.h"
+#include "../api/textureAPI.h"
 
 using namespace dsr;
 
-int BasicResourcePool::findImageRgba(const String& name) const {
+int BasicResourcePool::findImageRgba(const ReadableString& name) const {
 	for (int i = 0; i < this->imageRgbaList.length(); i++) {
 		// Warning!
 		// This may cover up bugs with case sensitive matching in the Linux file system.
@@ -39,13 +41,22 @@ int BasicResourcePool::findImageRgba(const String& name) const {
 	return -1;
 }
 
-const ImageRgbaU8 BasicResourcePool::fetchImageRgba(const String& name) {
+int BasicResourcePool::findTextureRgba(const ReadableString& name) const {
+	for (int i = 0; i < this->textureRgbaList.length(); i++) {
+		if (string_caseInsensitiveMatch(name, this->textureRgbaList[i].name)) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+const ImageRgbaU8 BasicResourcePool::fetchImageRgba(const ReadableString& name) {
 	ImageRgbaU8 result;
 	// Using "" will return an empty reference to allow removing textures
 	if (string_length(name) > 0) {
 		int existingIndex = this->findImageRgba(name);
 		if (existingIndex > -1) {
-			result = imageRgbaList[existingIndex].ref;
+			result = this->imageRgbaList[existingIndex].resource;
 		} else if (string_findFirst(name, U'.') > -1) {
 			throwError("The image \"", name, "\" had a forbidden dot in the name. Images in resource pools are fetched without the extension to allow changing image format without changing what it's called in other resources.\n");
 		} else if (string_findFirst(name, U'/') > -1 && string_findFirst(name, U'\\') > -1) {
@@ -63,11 +74,7 @@ const ImageRgbaU8 BasicResourcePool::fetchImageRgba(const String& name) {
 				result = image_load_RgbaU8(extensionless + ".jpg", false);
 			}
 			if (image_exists(result)) {
-				// If possible, generate a texture pyramid of smaller images
-				if (image_isTexture(result)) {
-					image_generatePyramid(result);
-				}
-				this->imageRgbaList.push(imageRgbaEntry(name, result));
+				this->imageRgbaList.push(namedEntry<ImageRgbaU8>(name, result));
 			} else {
 				printText("The image ", extensionless, ".* couldn't be loaded as either png, gif nor jpg!\n");
 			}
@@ -75,3 +82,32 @@ const ImageRgbaU8 BasicResourcePool::fetchImageRgba(const String& name) {
 	}
 	return result;
 }
+
+const TextureRgbaU8 BasicResourcePool::fetchTextureRgba(const ReadableString& name, int32_t resolutions) {
+	TextureRgbaU8 result;
+	// Using "" will return an empty reference to allow removing textures
+	if (string_length(name) > 0) {
+		int existingTextureIndex = this->findTextureRgba(name);
+		int existingImageIndex = this->findImageRgba(name);
+		if (existingTextureIndex > -1) {
+			result = this->textureRgbaList[existingTextureIndex].resource;
+		} else if (existingImageIndex > -1) {
+			result = texture_create_RgbaU8(this->imageRgbaList[existingImageIndex].resource, resolutions);
+		} else {
+			// TODO: Save memory by loading a temporary image for generating the texture
+			//         and letting the image point to the highest layer in the texture using texture_getMipLevelImage(result, 0).
+			result = texture_create_RgbaU8(this->fetchImageRgba(name), resolutions);
+			/* Enable this to save each texture layer as a file for debugging.
+			if (texture_exists(result)) {
+				for (int32_t mipLevel = 0; mipLevel < texture_getSmallestMipLevel(result); mipLevel++) {
+					image_save(texture_getMipLevelImage(result, mipLevel), string_combine(U"Mip_", name, U"_", texture_getWidth(result, mipLevel), U"x", texture_getHeight(result, mipLevel), U".png"));
+				}
+			}
+			*/
+		}
+		if (texture_exists(result)) {
+			this->textureRgbaList.push(namedEntry<TextureRgbaU8>(name, result));
+		}
+	}
+	return result;
+}

+ 17 - 11
Source/DFPSR/render/ResourcePool.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -24,7 +24,7 @@
 #ifndef DFPSR_RENDER_RESOURCE_POOL
 #define DFPSR_RENDER_RESOURCE_POOL
 
-#include "../image/ImageRgbaU8.h"
+#include "../image/Texture.h"
 #include "../collection/List.h"
 #include "../api/stringAPI.h"
 
@@ -33,24 +33,30 @@ namespace dsr {
 // A resource pool is responsible for storing things that might be reused in order to avoid loading the same file multiple times
 class ResourcePool {
 public:
-	virtual const ImageRgbaU8 fetchImageRgba(const String& name) = 0;
+	virtual const ImageRgbaU8 fetchImageRgba(const ReadableString& name) = 0;
+	virtual const TextureRgbaU8 fetchTextureRgba(const ReadableString& name, int32_t resolutions = 4) = 0;
 };
 
-// TODO: Store names in images?
-struct imageRgbaEntry {
+// TODO: Keep track of reference count to resources and have a clean-up method for removing unused resources.
+template <typename T>
+struct namedEntry {
 	String name;
-	const ImageRgbaU8 ref;
-	imageRgbaEntry(const String& name, const ImageRgbaU8& ref) : name(name), ref(ref) {}
+	const T resource;
+	namedEntry(const String& name, const T& resource) : name(name), resource(resource) {}
 };
 
 class BasicResourcePool : public ResourcePool {
 private:
-	List<imageRgbaEntry> imageRgbaList;
-	int findImageRgba(const String& name) const;
+	List<namedEntry<ImageRgbaU8>> imageRgbaList;
+	List<namedEntry<TextureRgbaU8>> textureRgbaList;
+	int findImageRgba(const ReadableString& name) const;
+	int findTextureRgba(const ReadableString& name) const;
 public:
 	String path;
-	explicit BasicResourcePool(const String& path) : path(path) {}
-	const ImageRgbaU8 fetchImageRgba(const String& name) override;
+	explicit BasicResourcePool(const ReadableString& path) : path(path) {}
+	const ImageRgbaU8 fetchImageRgba(const ReadableString& name) override;
+	// The resolutions argument can be used to limit the number of mip levels for a specific rendering engine.
+	const TextureRgbaU8 fetchTextureRgba(const ReadableString& name, int32_t resolutions) override;
 };
 
 }

+ 31 - 40
Source/DFPSR/render/model/Model.cpp

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -21,13 +21,12 @@
 //    3. This notice may not be removed or altered from any source
 //    distribution.
 
-#define DFPSR_INTERNAL_ACCESS
+#define DSR_INTERNAL_ACCESS
 
 #include "Model.h"
 #include "../constants.h"
 #include "../../api/imageAPI.h"
-#include "../../image/ImageRgbaU8.h"
-#include "../../image/ImageF32.h"
+#include "../../api/textureAPI.h"
 #include "../../base/virtualStack.h"
 
 using namespace dsr;
@@ -117,7 +116,7 @@ int Polygon::getVertexCount() const {
 }
 
 Part::Part(String name) : name(name) {}
-Part::Part(const ImageRgbaU8 &diffuseMap, const ImageRgbaU8 &lightMap, const List<Polygon> &polygonBuffer, const String &name) :
+Part::Part(const TextureRgbaU8 &diffuseMap, const TextureRgbaU8 &lightMap, const List<Polygon> &polygonBuffer, const String &name) :
   diffuseMap(diffuseMap), lightMap(lightMap), polygonBuffer(polygonBuffer), name(name) {}
 Part Part::clone() const { return Part(this->diffuseMap, this->lightMap, this->polygonBuffer, this->name); }
 int Part::getPolygonCount() const {
@@ -133,7 +132,7 @@ int Part::getPolygonVertexCount(int polygonIndex) const {
 //         Only decreasing the length of the point buffer, changing a position index or adding new polygons should set it to false
 //         Only running validation before rendering should set it from false to true
 //   point indices may not go outside of projected's array range
-static void renderTriangleFromPolygon(CommandQueue *commandQueue, ImageRgbaU8Impl *targetImage, ImageF32Impl *depthBuffer, const Camera &camera, const Polygon &polygon, int triangleIndex, const ProjectedPoint *projected, Filter filter, const ImageRgbaU8Impl *diffuse, const ImageRgbaU8Impl *light) {
+static void renderTriangleFromPolygon(CommandQueue *commandQueue, ImageRgbaU8 *targetImage, ImageF32 *depthBuffer, const Camera &camera, const Polygon &polygon, int triangleIndex, const ProjectedPoint *projected, Filter filter, const TextureRgbaU8 *diffuse, const TextureRgbaU8 *light) {
 	// Triangle fan starting from the first vertex of the polygon
 	int indexA = 0;
 	int indexB = 1 + triangleIndex;
@@ -148,24 +147,24 @@ static void renderTriangleFromPolygon(CommandQueue *commandQueue, ImageRgbaU8Imp
 	renderTriangleFromData(commandQueue, targetImage, depthBuffer, camera, posA, posB, posC, filter, diffuse, light, texCoords, colors);
 }
 
-void Part::render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, Filter filter, const ProjectedPoint* projected) const {
+void Part::render(CommandQueue *commandQueue, ImageRgbaU8* targetImage, ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, Filter filter, const ProjectedPoint* projected) const {
 	// Get textures
-	const ImageRgbaU8Impl *diffuse = this->diffuseMap.get();
-	const ImageRgbaU8Impl *light = this->lightMap.get();
+	const TextureRgbaU8 *diffuse = &(this->diffuseMap);
+	const TextureRgbaU8 *light = &(this->lightMap);
 	for (int p = 0; p < this->polygonBuffer.length(); p++) {
 		Polygon polygon = this->polygonBuffer[p];
 		if (polygon.pointIndices[3] == -1) {
 			// Render triangle
-			renderTriangleFromPolygon(commandQueue, targetImage.get(), depthBuffer.get(), camera, polygon, 0, projected, filter, diffuse, light);
+			renderTriangleFromPolygon(commandQueue, targetImage, depthBuffer, camera, polygon, 0, projected, filter, diffuse, light);
 		} else {
 			// Render quad
-			renderTriangleFromPolygon(commandQueue, targetImage.get(), depthBuffer.get(), camera, polygon, 0, projected, filter, diffuse, light);
-			renderTriangleFromPolygon(commandQueue, targetImage.get(), depthBuffer.get(), camera, polygon, 1, projected, filter, diffuse, light);
+			renderTriangleFromPolygon(commandQueue, targetImage, depthBuffer, camera, polygon, 0, projected, filter, diffuse, light);
+			renderTriangleFromPolygon(commandQueue, targetImage, depthBuffer, camera, polygon, 1, projected, filter, diffuse, light);
 		}
 	}
 }
 
-void Part::renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, const ProjectedPoint* projected) const {
+void Part::renderDepth(ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, const ProjectedPoint* projected) const {
 	for (int p = 0; p < this->polygonBuffer.length(); p++) {
 		Polygon polygon = this->polygonBuffer[p];
 		if (polygon.pointIndices[3] == -1) {
@@ -173,24 +172,24 @@ void Part::renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWorldTra
 			ProjectedPoint posA = projected[polygon.pointIndices[0]];
 			ProjectedPoint posB = projected[polygon.pointIndices[1]];
 			ProjectedPoint posC = projected[polygon.pointIndices[2]];
-			renderTriangleFromDataDepth(depthBuffer.get(), camera, posA, posB, posC);
+			renderTriangleFromDataDepth(depthBuffer, camera, posA, posB, posC);
 		} else {
 			// Render quad
 			ProjectedPoint posA = projected[polygon.pointIndices[0]];
 			ProjectedPoint posB = projected[polygon.pointIndices[1]];
 			ProjectedPoint posC = projected[polygon.pointIndices[2]];
 			ProjectedPoint posD = projected[polygon.pointIndices[3]];
-			renderTriangleFromDataDepth(depthBuffer.get(), camera, posA, posB, posC);
-			renderTriangleFromDataDepth(depthBuffer.get(), camera, posA, posC, posD);
+			renderTriangleFromDataDepth(depthBuffer, camera, posA, posB, posC);
+			renderTriangleFromDataDepth(depthBuffer, camera, posA, posC, posD);
 		}
 	}
 }
 
-void ModelImpl::render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const {
+void ModelImpl::render(CommandQueue *commandQueue, ImageRgbaU8* targetImage, ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const {
 	if (camera.isBoxSeen(this->minBound, this->maxBound, modelToWorldTransform)) {
 		// Transform and project all vertices
 		int positionCount = positionBuffer.length();
-		VirtualStackAllocation<ProjectedPoint> projected(positionCount);
+		VirtualStackAllocation<ProjectedPoint> projected(positionCount, "Projected points in ModelImpl::render");
 		for (int vert = 0; vert < positionCount; vert++) {
 			projected[vert] = camera.worldToScreen(modelToWorldTransform.transformPoint(positionBuffer[vert]));
 		}
@@ -200,11 +199,11 @@ void ModelImpl::render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, Ima
 	}
 }
 
-void ModelImpl::renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const {
+void ModelImpl::renderDepth(ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const {
 	if (camera.isBoxSeen(this->minBound, this->maxBound, modelToWorldTransform)) {
 		// Transform and project all vertices
 		int positionCount = positionBuffer.length();
-		VirtualStackAllocation<ProjectedPoint> projected(positionCount);
+		VirtualStackAllocation<ProjectedPoint> projected(positionCount, "Projected points in ModelImpl::renderDepth");
 		for (int vert = 0; vert < positionCount; vert++) {
 			projected[vert] = camera.worldToScreen(modelToWorldTransform.transformPoint(positionBuffer[vert]));
 		}
@@ -237,41 +236,33 @@ String ModelImpl::getPartName(int partIndex) const {
 	CHECK_PART_INDEX(partIndex, return "");
 	return this->partBuffer[partIndex].name;
 }
-ImageRgbaU8 ModelImpl::getDiffuseMap(int partIndex) const {
-	CHECK_PART_INDEX(partIndex, return ImageRgbaU8());
+TextureRgbaU8 ModelImpl::getDiffuseMap(int partIndex) const {
+	CHECK_PART_INDEX(partIndex, return TextureRgbaU8());
 	return this->partBuffer[partIndex].diffuseMap;
 }
-void ModelImpl::setDiffuseMap(const ImageRgbaU8 &diffuseMap, int partIndex) {
+void ModelImpl::setDiffuseMap(const TextureRgbaU8 &diffuseMap, int partIndex) {
 	CHECK_PART_INDEX(partIndex, return);
-	if (image_exists(diffuseMap) && !image_isTexture(diffuseMap)) {
-		printText("Cannot assign a non-texture image as a diffuse map!\n");
-	} else {
-		this->partBuffer[partIndex].diffuseMap = diffuseMap;
-	}
+	this->partBuffer[partIndex].diffuseMap = diffuseMap;
 }
 void ModelImpl::setDiffuseMapByName(ResourcePool &pool, const String &filename, int partIndex) {
 	CHECK_PART_INDEX(partIndex, return);
-	const ImageRgbaU8 texture = pool.fetchImageRgba(filename);
-	if (image_exists(texture)) {
+	const TextureRgbaU8 texture = pool.fetchTextureRgba(filename, 5);
+	if (texture_exists(texture)) {
 		this->setDiffuseMap(texture, partIndex);
 	}
 }
-ImageRgbaU8 ModelImpl::getLightMap(int partIndex) const {
-	CHECK_PART_INDEX(partIndex, return ImageRgbaU8());
+TextureRgbaU8 ModelImpl::getLightMap(int partIndex) const {
+	CHECK_PART_INDEX(partIndex, return TextureRgbaU8());
 	return this->partBuffer[partIndex].lightMap;
 }
-void ModelImpl::setLightMap(const ImageRgbaU8 &lightMap, int partIndex) {
+void ModelImpl::setLightMap(const TextureRgbaU8 &lightMap, int partIndex) {
 	CHECK_PART_INDEX(partIndex, return);
-	if (image_exists(lightMap) && !image_isTexture(lightMap)) {
-		printText("Cannot assign a non-texture image as a light map!\n");
-	} else {
-		this->partBuffer[partIndex].lightMap = lightMap;
-	}
+	this->partBuffer[partIndex].lightMap = lightMap;
 }
 void ModelImpl::setLightMapByName(ResourcePool &pool, const String &filename, int partIndex) {
 	CHECK_PART_INDEX(partIndex, return);
-	const ImageRgbaU8 texture = pool.fetchImageRgba(filename);
-	if (image_exists(texture)) {
+	const TextureRgbaU8 texture = pool.fetchTextureRgba(filename, 1); // TODO: Allow configuring the number of mip levels and selecting a sampler somehow.
+	if (texture_exists(texture)) {
 		this->setLightMap(texture, partIndex);
 	}
 }

+ 12 - 14
Source/DFPSR/render/model/Model.h

@@ -1,6 +1,6 @@
 // zlib open source license
 //
-// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// Copyright (c) 2017 to 2025 David Forsgren Piuva
 // 
 // This software is provided 'as-is', without any express or implied
 // warranty. In no event will the authors be held liable for any damages
@@ -25,15 +25,13 @@
 #define DFPSR_RENDER_MODEL_POLYGONMODEL
 
 #include <cstdint>
-#include "../../api/types.h"
-#include "../../collection/List.h"
 #include "../../api/stringAPI.h"
+#include "../../image/Texture.h"
 #include "../shader/Shader.h"
 #include "../Camera.h"
 #include "../ResourcePool.h"
 #include "../renderCore.h"
 #include "../../math/FVector.h"
-#include "../../collection/List.h"
 
 namespace dsr {
 
@@ -66,14 +64,14 @@ struct Polygon {
 };
 
 struct Part {
-	ImageRgbaU8 diffuseMap, lightMap;
+	TextureRgbaU8 diffuseMap, lightMap;
 	List<Polygon> polygonBuffer;
 	String name;
 	explicit Part(String name);
-	Part(const ImageRgbaU8 &diffuseMap, const ImageRgbaU8 &lightMap, const List<Polygon> &polygonBuffer, const String &name);
+	Part(const TextureRgbaU8 &diffuseMap, const TextureRgbaU8 &lightMap, const List<Polygon> &polygonBuffer, const String &name);
 	Part clone() const;
-	void render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, Filter filter, const ProjectedPoint* projected) const;
-	void renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, const ProjectedPoint* projected) const;
+	void render(CommandQueue *commandQueue, ImageRgbaU8* targetImage, ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, Filter filter, const ProjectedPoint* projected) const;
+	void renderDepth(ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera, const ProjectedPoint* projected) const;
 	int getPolygonCount() const;
 	int getPolygonVertexCount(int polygonIndex) const;
 };
@@ -102,12 +100,12 @@ public:
 	String getPartName(int partIndex) const;
 
 	// TODO: Make an array of texture slots using a class enum for index
-	ImageRgbaU8 getDiffuseMap(int partIndex) const;
-	void setDiffuseMap(const ImageRgbaU8 &diffuseMap, int partIndex);
+	TextureRgbaU8 getDiffuseMap(int partIndex) const;
+	void setDiffuseMap(const TextureRgbaU8 &diffuseMap, int partIndex);
 	void setDiffuseMapByName(ResourcePool &pool, const String &filename, int partIndex);
 
-	ImageRgbaU8 getLightMap(int partIndex) const;
-	void setLightMap(const ImageRgbaU8 &lightMap, int partIndex);
+	TextureRgbaU8 getLightMap(int partIndex) const;
+	void setLightMap(const TextureRgbaU8 &lightMap, int partIndex);
 	void setLightMapByName(ResourcePool &pool, const String &filename, int partIndex);
 
 	// Polygon interface
@@ -130,8 +128,8 @@ public:
 	FVector4D getTexCoord(int partIndex, int polygonIndex, int vertexIndex) const;
 	void setTexCoord(int partIndex, int polygonIndex, int vertexIndex, const FVector4D& texCoord);
 	// Rendering
-	void render(CommandQueue *commandQueue, ImageRgbaU8& targetImage, ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const;
-	void renderDepth(ImageF32& depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const;
+	void render(CommandQueue *commandQueue, ImageRgbaU8* targetImage, ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const;
+	void renderDepth(ImageF32* depthBuffer, const Transform3D &modelToWorldTransform, const Camera &camera) const;
 };
 
 }

+ 18 - 20
Source/DFPSR/render/renderCore.cpp

@@ -23,9 +23,7 @@
 
 #include <cassert>
 #include "renderCore.h"
-#include "../image/internal/imageInternal.h"
 #include "../base/virtualStack.h"
-#include "shader/Shader.h"
 #include "shader/RgbaMultiply.h"
 #include "constants.h"
 
@@ -173,7 +171,7 @@ public:
 Visibility dsr::getTriangleVisibility(const ITriangle2D &triangle, const Camera &camera, bool clipFrustum) {
 	static const int cornerCount = 3;
 	int planeCount = camera.getFrustumPlaneCount(clipFrustum);
-	VirtualStackAllocation<bool> outside(cornerCount * planeCount);
+	VirtualStackAllocation<bool> outside(cornerCount * planeCount, "Corner outside buffer in getTriangleVIsibility");
 	// Check which corners are outside of the different planes
 	int offset = 0;
 	for (int c = 0; c < cornerCount; c++) {
@@ -207,7 +205,7 @@ void dsr::executeTriangleDrawing(const TriangleDrawCommand &command, const IRect
 	if (rowCount > 0) {
 		int startRow;
 		// TODO: Use SafePointer in shape functions.
-		VirtualStackAllocation<RowInterval> rows(rowCount);
+		VirtualStackAllocation<RowInterval> rows(rowCount, "Row intervals in executeTriangleDrawing");
 		command.triangle.getShape(startRow, rows.getUnsafe(), finalClipBound, alignX, alignY);
 		Projection projection = command.triangle.getProjection(command.subB, command.subC, command.perspective);
 		command.processTriangle(command.triangleInput, command.targetImage, command.depthBuffer, command.triangle, projection, RowShape(startRow, rowCount, rows.getUnsafe()), command.filter);
@@ -259,7 +257,7 @@ static void drawClippedTriangle(CommandQueue *commandQueue, const TriangleDrawDa
 }
 
 // Clipping is applied automatically if needed
-void dsr::renderTriangleWithShader(CommandQueue *commandQueue, const TriangleDrawData &triangleDrawData, const Camera &camera, const ITriangle2D &triangle, const IRect &clipBound) {
+static void renderTriangleWithShader(CommandQueue *commandQueue, const TriangleDrawData &triangleDrawData, const Camera &camera, const ITriangle2D &triangle, const IRect &clipBound) {
 	// Allow small triangles to be a bit outside of the view frustum without being clipped by increasing the width and height slopes in a second test
 	// This reduces redundant clipping to improve both speed and quality
 	Visibility paddedVisibility = getTriangleVisibility(triangle, camera, true);
@@ -283,15 +281,15 @@ void dsr::renderTriangleWithShader(CommandQueue *commandQueue, const TriangleDra
 
 // TODO: Move shader selection to Shader_RgbaMultiply and let models default to its shader factory function pointer as shader selection
 void dsr::renderTriangleFromData(
-  CommandQueue *commandQueue, ImageRgbaU8Impl *targetImage, ImageF32Impl *depthBuffer,
+  CommandQueue *commandQueue, ImageRgbaU8 *targetImage, ImageF32 *depthBuffer,
   const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC,
-  Filter filter, const ImageRgbaU8Impl *diffuse, const ImageRgbaU8Impl *light,
+  Filter filter, const TextureRgbaU8 *diffuse, const TextureRgbaU8 *light,
   TriangleTexCoords texCoords, TriangleColors colors) {
 	// Get dimensions from both buffers
-	int colorWidth = imageInternal::getWidth(targetImage);
-	int colorHeight = imageInternal::getHeight(targetImage);
-	int depthWidth = imageInternal::getWidth(depthBuffer);
-	int depthHeight = imageInternal::getHeight(depthBuffer);
+	int colorWidth = targetImage != nullptr ? image_getWidth(*targetImage) : 0;
+	int colorHeight = targetImage != nullptr ? image_getHeight(*targetImage) : 0;
+	int depthWidth = depthBuffer != nullptr ? image_getWidth(*depthBuffer) : 0;
+	int depthHeight = depthBuffer != nullptr ? image_getHeight(*depthBuffer) : 0;
 	// Combine dimensions
 	int targetWidth, targetHeight;
 	if (targetImage != nullptr) {
@@ -324,17 +322,17 @@ void dsr::renderTriangleFromData(
 }
 
 template<bool AFFINE>
-static void executeTriangleDrawingDepth(ImageF32Impl *depthBuffer, const ITriangle2D& triangle, const IRect &clipBound) {
+static void executeTriangleDrawingDepth(ImageF32 *depthBuffer, const ITriangle2D& triangle, const IRect &clipBound) {
 	int32_t rowCount = triangle.getBufferSize(clipBound, 1, 1);
 	if (rowCount > 0) {
 		int startRow;
-		VirtualStackAllocation<RowInterval> rows(rowCount);
+		VirtualStackAllocation<RowInterval> rows(rowCount, "Row intervals in executeTriangleDrawingDepth");
 		triangle.getShape(startRow, rows.getUnsafe(), clipBound, 1, 1);
 		Projection projection = triangle.getProjection(FVector3D(), FVector3D(), !AFFINE); // TODO: Create a weight using only depth to save time
 		RowShape shape = RowShape(startRow, rowCount, rows.getUnsafe());
 		// Draw the triangle
-		const int depthBufferStride = imageInternal::getStride(depthBuffer);
-		SafePointer<float> depthDataRow = imageInternal::getSafeData<float>(depthBuffer, shape.startRow);
+		const int depthBufferStride = image_getStride(*depthBuffer);
+		SafePointer<float> depthDataRow = image_getSafePointer<float>(*depthBuffer, shape.startRow);
 		for (int32_t y = shape.startRow; y < shape.startRow + shape.rowCount; y++) {
 			RowInterval row = shape.rows[y - shape.startRow];
 			SafePointer<float> depthData = depthDataRow + row.left;
@@ -369,7 +367,7 @@ static void executeTriangleDrawingDepth(ImageF32Impl *depthBuffer, const ITriang
 	}
 }
 
-static void drawTriangleDepth(ImageF32Impl *depthBuffer, const Camera &camera, const IRect &clipBound, const ITriangle2D& triangle) {
+static void drawTriangleDepth(ImageF32 *depthBuffer, const Camera &camera, const IRect &clipBound, const ITriangle2D& triangle) {
 	// Rounding sub-triangles to integer locations may reverse the direction of zero area triangles
 	if (triangle.isFrontfacing()) {
 		if (camera.perspective) {
@@ -380,18 +378,18 @@ static void drawTriangleDepth(ImageF32Impl *depthBuffer, const Camera &camera, c
 	}
 }
 
-static void drawSubTriangleDepth(ImageF32Impl *depthBuffer, const Camera &camera, const IRect &clipBound, const SubVertex &vertexA, const SubVertex &vertexB, const SubVertex &vertexC) {
+static void drawSubTriangleDepth(ImageF32 *depthBuffer, const Camera &camera, const IRect &clipBound, const SubVertex &vertexA, const SubVertex &vertexB, const SubVertex &vertexC) {
 	ProjectedPoint posA = camera.cameraToScreen(vertexA.cs);
 	ProjectedPoint posB = camera.cameraToScreen(vertexB.cs);
 	ProjectedPoint posC = camera.cameraToScreen(vertexC.cs);
 	drawTriangleDepth(depthBuffer, camera, clipBound, ITriangle2D(posA, posB, posC));
 }
 
-void dsr::renderTriangleFromDataDepth(ImageF32Impl *depthBuffer, const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC) {
+void dsr::renderTriangleFromDataDepth(ImageF32 *depthBuffer, const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC) {
 	// Skip rendering if there's no target buffer
 	if (depthBuffer == nullptr) { return; }
 	// Select a bound
-	IRect clipBound = IRect::FromSize(imageInternal::getWidth(depthBuffer), imageInternal::getHeight(depthBuffer));
+	IRect clipBound = IRect::FromSize(image_getWidth(*depthBuffer), image_getHeight(*depthBuffer));
 	// Create a triangle
 	ITriangle2D triangle(posA, posB, posC);
 	// Only draw visible triangles
@@ -438,7 +436,7 @@ void CommandQueue::execute(const IRect &clipBound, int jobCount) const {
 			}
 		}
 	} else {
-		VirtualStackAllocation<std::function<void()>> jobs(jobCount);
+		VirtualStackAllocation<std::function<void()>> jobs(jobCount, "Triangle draw jobs in CommandQueue::execute");
 		int y1 = clipBound.top();
 		for (int j = 0; j < jobCount; j++) {
 			int y2 = clipBound.top() + ((clipBound.bottom() * (j + 1)) / jobCount);

+ 6 - 14
Source/DFPSR/render/renderCore.h

@@ -27,8 +27,6 @@
 #include <cstdint>
 #include "Camera.h"
 #include "shader/Shader.h"
-#include "../image/ImageRgbaU8.h"
-#include "../image/ImageF32.h"
 #include "../base/threading.h"
 #include "../collection/List.h"
 
@@ -36,9 +34,9 @@ namespace dsr {
 
 struct TriangleDrawData {
 	// Color target
-	ImageRgbaU8Impl *targetImage;
+	ImageRgbaU8 *targetImage;
 	// Depth target
-	ImageF32Impl *depthBuffer;
+	ImageF32 *depthBuffer;
 	// When perspective is used, the depth buffer stores 1 / depth instead of linear depth.
 	bool perspective;
 	// The target blending method
@@ -47,7 +45,7 @@ struct TriangleDrawData {
 	TriangleInput triangleInput;
 	// Function pointer to the method that will process the command
 	DRAW_CALLBACK_TYPE processTriangle;
-	TriangleDrawData(ImageRgbaU8Impl *targetImage, ImageF32Impl *depthBuffer, bool perspective, Filter filter, const TriangleInput &triangleInput, DRAW_CALLBACK_TYPE processTriangle)
+	TriangleDrawData(ImageRgbaU8 *targetImage, ImageF32 *depthBuffer, bool perspective, Filter filter, const TriangleInput &triangleInput, DRAW_CALLBACK_TYPE processTriangle)
 	: targetImage(targetImage), depthBuffer(depthBuffer), perspective(perspective), filter(filter), triangleInput(triangleInput), processTriangle(processTriangle) {}
 };
 
@@ -97,25 +95,19 @@ public:
 //   * triangle should have passed the triangle visibility test for the actual image bound.
 //     Only construct the shader and make this call if "getTriangleVisibility(triangle, camera, false) != Visibility::Hidden" passed.
 //     Otherwise, it will waste a lot of time on rasterizing triangles that are not even visible.
-//   * targetImage must be a render target because it needs some padding for reading out of bound while rendering.
-//     ImageRgbaU8Impl::createRenderTarget will automatically padd any odd dimensions given.
-void renderTriangleWithShader(CommandQueue *commandQueue, const TriangleDrawData &triangleDrawData, const Camera &camera, const ITriangle2D &triangle, const IRect &clipBound);
 
 // Given a set of triangle data, this method can automatically draw it using the fastest default shader.
 // Triangle culling is handled automatically but you might want to apply culling per model or something before drawing many triangles.
 // commandQueue can be null to render directly using a single thread.
 // targetImage can be null to avoid using the pixel shader.
 // depthBuffer can be null to render without depth buffering.
-// Preconditions:
-//   * targetImage must be a render target because it needs some padding for reading out of bound while rendering.
-//     ImageRgbaU8Impl::createRenderTarget will automatically padd any odd dimensions given.
 void renderTriangleFromData(
-  CommandQueue *commandQueue, ImageRgbaU8Impl *targetImage, ImageF32Impl *depthBuffer,
+  CommandQueue *commandQueue, ImageRgbaU8 *targetImage, ImageF32 *depthBuffer,
   const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC,
-  Filter filter, const ImageRgbaU8Impl *diffuse, const ImageRgbaU8Impl *light,
+  Filter filter, const TextureRgbaU8 *diffuse, const TextureRgbaU8 *light,
   TriangleTexCoords texCoords, TriangleColors colors
 );
-void renderTriangleFromDataDepth(ImageF32Impl *depthBuffer, const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC);
+void renderTriangleFromDataDepth(ImageF32 *depthBuffer, const Camera &camera, const ProjectedPoint &posA, const ProjectedPoint &posB, const ProjectedPoint &posC);
 
 }
 

+ 60 - 52
Source/DFPSR/render/shader/RgbaMultiply.h

@@ -30,71 +30,76 @@
 #include <algorithm>
 #include "Shader.h"
 #include "fillerTemplates.h"
-#include "../../image/ImageRgbaU8.h"
+#include "../../image/Image.h"
 
 namespace dsr {
 
 struct RgbaMultiply_data {
-	const TextureRgba *diffuseMap; // Mip-mapping is allowed for diffuse textures.
-	const TextureRgba *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.
+	const TextureRgbaU8 *diffuseMap; // Mip-mapping is allowed for diffuse textures.
+	const TextureRgbaU8 *lightMap; // Mip-mapping is not allowed for lightmaps, because it would increase the number of shaders to compile and still look worse.
 	// Planar format with each vector representing the three triangle corners
 	const TriangleTexCoords texCoords;
 	const TriangleColors colors;
 	// Normalize the color product by pre-multiplying the vertex colors
 	float getVertexScale() {
 		float result = 255.0f; // Scale from normalized to byte for the output
-		if (this->diffuseMap) {
+		if (texture_exists(*(this->diffuseMap))) {
 			result *= 1.0f / 255.0f; // Normalize the diffuse map from 0..255 to 0..1 by dividing the vertex color
 		}
-		if (this->lightMap) {
+		if (texture_exists(*(this->lightMap))) {
 			result *= 1.0f / 255.0f; // Normalize the light map from 0..255 to 0..1 by dividing the vertex color
 		}
 		return result;
 	}
 	explicit RgbaMultiply_data(const TriangleInput &triangleInput) :
-	  diffuseMap(triangleInput.diffuseImage ? &(triangleInput.diffuseImage->texture) : nullptr),
-	  lightMap(triangleInput.lightImage ? &(triangleInput.lightImage->texture) : nullptr),
-	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {
-		// Texture coordinates must be on the positive side to allow using truncation as a floor function
-		if (this->diffuseMap) {
-			assert(this->diffuseMap != nullptr); // Cannot sample null
-			assert(this->diffuseMap->exists()); // Cannot sample regular images
-		}
-		if (this->lightMap) {
-			assert(this->lightMap != nullptr); // Cannot sample null
-			assert(this->lightMap->exists()); // Cannot sample regular images
-		}
-	}
+	  diffuseMap(triangleInput.diffuseMap),
+	  lightMap(triangleInput.lightMap),
+	  texCoords(triangleInput.texCoords), colors(triangleInput.colors.getScaled(getVertexScale())) {}
 };
 
-template <bool HAS_DIFFUSE_MAP, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS, bool DISABLE_MIPMAP>
-static Rgba_F32 getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {
+// TODO: Simplify by merging boolean flags into named states.
+//       A texture can be:
+//         Non-existing
+//         Of a single layer
+//         Of multiple layers
+//         Of enough layers to avoid clamping the mip index
+//       A color can be:
+//         White
+//         Constant
+//         Faded
+// TODO: Skip converting to and from float colors when only sampling one texture or color.
+// TODO: Because colors are converted to 16-bit channels during multiplication, the shader's return value might as well use a 16-bit color format that is faster to multiply and shift.
+//       8 low bits for visible light, and 8 high bits for spill from multiplications before shifting results 8 bits to the right.
+//       High intensity vertex colors multiplied at the end can use the high range for 10-bit image formats.
+template <bool HAS_DIFFUSE_MAP, bool DIFFUSE_SINGLE_LAYER, bool HAS_LIGHT_MAP, bool HAS_VERTEX_FADING, bool COLORLESS>
+inline Rgba_F32<U32x4, F32x4> getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {
+	RgbaMultiply_data *assets = (RgbaMultiply_data*)data;
 	if (HAS_DIFFUSE_MAP && !HAS_LIGHT_MAP && COLORLESS) {
 		// Optimized for diffuse only
-		F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);
-		F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);
-		return shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);
+		F32x4 u1 = shaderMethods::interpolate(assets->texCoords.u1, vertexWeights);
+		F32x4 v1 = shaderMethods::interpolate(assets->texCoords.v1, vertexWeights);
+		return shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false, false>(assets->diffuseMap, u1, v1);
 	} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {
 		// Optimized for light only
-		F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);
-		F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);
-		return shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);
+		F32x4 u2 = shaderMethods::interpolate(assets->texCoords.u2, vertexWeights);
+		F32x4 v2 = shaderMethods::interpolate(assets->texCoords.v2, vertexWeights);
+		return shaderMethods::sample_F32<Interpolation::BL, false, false, false, false, true>(assets->lightMap, u2, v2);
 	} else {
 		// Interpolate the vertex color
-		Rgba_F32 color = HAS_VERTEX_FADING ?
-		  shaderMethods::interpolateVertexColor(((RgbaMultiply_data*)data)->colors.red, ((RgbaMultiply_data*)data)->colors.green, ((RgbaMultiply_data*)data)->colors.blue, ((RgbaMultiply_data*)data)->colors.alpha, vertexWeights) :
-		  Rgba_F32(F32x4(((RgbaMultiply_data*)data)->colors.red.x), F32x4(((RgbaMultiply_data*)data)->colors.green.x), F32x4(((RgbaMultiply_data*)data)->colors.blue.x), F32x4(((RgbaMultiply_data*)data)->colors.alpha.x));
+		Rgba_F32<U32x4, F32x4> color = HAS_VERTEX_FADING ?
+		  shaderMethods::interpolateVertexColor(assets->colors.red, assets->colors.green, assets->colors.blue, assets->colors.alpha, vertexWeights) :
+		  Rgba_F32<U32x4, F32x4>(F32x4(assets->colors.red.x), F32x4(assets->colors.green.x), F32x4(assets->colors.blue.x), F32x4(assets->colors.alpha.x));
 		// Sample diffuse
 		if (HAS_DIFFUSE_MAP) {
-			F32x4 u1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u1, vertexWeights);
-			F32x4 v1 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v1, vertexWeights);
-			color = color * shaderMethods::sample_F32<Interpolation::BL, DISABLE_MIPMAP, false>(((RgbaMultiply_data*)data)->diffuseMap, u1, v1);
+			F32x4 u1 = shaderMethods::interpolate(assets->texCoords.u1, vertexWeights);
+			F32x4 v1 = shaderMethods::interpolate(assets->texCoords.v1, vertexWeights);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false, false>(assets->diffuseMap, u1, v1);
 		}
 		// Sample lightmap
 		if (HAS_LIGHT_MAP) {
-			F32x4 u2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.u2, vertexWeights);
-			F32x4 v2 = shaderMethods::interpolate(((RgbaMultiply_data*)data)->texCoords.v2, vertexWeights);
-			color = color * shaderMethods::sample_F32<Interpolation::BL, true, false>(((RgbaMultiply_data*)data)->lightMap, u2, v2);
+			F32x4 u2 = shaderMethods::interpolate(assets->texCoords.u2, vertexWeights);
+			F32x4 v2 = shaderMethods::interpolate(assets->texCoords.v2, vertexWeights);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, false, false, false, false, true>(assets->lightMap, u2, v2);
 		}
 		return color;
 	}
@@ -102,24 +107,27 @@ static Rgba_F32 getPixels_2x2(void *data, const F32x4x3 &vertexWeights) {
 
 // The process method to take a function pointer to.
 //    Must have the same signature as drawCallbackTemplate in Shader.h.
-static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
+static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, ImageRgbaU8 *colorBuffer, ImageF32 *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
+	// The pointers to textures may not be null, but can point to empty textures.
 	RgbaMultiply_data data = RgbaMultiply_data(triangleInput);
 	bool hasVertexFade = !(almostSame(data.colors.red) && almostSame(data.colors.green) && almostSame(data.colors.blue) && almostSame(data.colors.alpha));
 	bool colorless = almostOne(data.colors.red) && almostOne(data.colors.green) && almostOne(data.colors.blue) && almostOne(data.colors.alpha);
-	if (data.diffuseMap) {
-		bool hasDiffusePyramid = data.diffuseMap->hasMipBuffer();
-		if (data.lightMap) {
+	// TODO: Should non-existing textures use null pointers in the data, or pointers to empty textures?
+	if (texture_exists(*(data.diffuseMap))) {
+		bool hasDiffusePyramid = texture_hasPyramid(*(data.diffuseMap));
+		// TODO: Avoid generating mip levels for the lightmap texture instead of hard-coding it to no mip levels.
+		if (texture_exists(*(data.lightMap))) {
 			if (hasVertexFade) { // DiffuseLightVertex
 				if (hasDiffusePyramid) { // With mipmap
-					fillShape(&data, getPixels_2x2<true, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, false, true, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				} else { // Without mipmap
-					fillShape(&data, getPixels_2x2<true, true, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, true, true, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				}
 			} else { // DiffuseLight
 				if (hasDiffusePyramid) { // With mipmap
-					fillShape(&data, getPixels_2x2<true, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				} else { // Without mipmap
-					fillShape(&data, getPixels_2x2<true, true, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				}
 			}
 		} else {
@@ -127,38 +135,38 @@ static void processTriangle_RgbaMultiply(const TriangleInput &triangleInput, Ima
 				if (hasDiffusePyramid) { // With mipmap
 					fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				} else { // Without mipmap
-					fillShape(&data, getPixels_2x2<true, false, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, true, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				}
 			} else {
 				if (colorless) { // Diffuse without normalization
 					if (hasDiffusePyramid) { // With mipmap
-						fillShape(&data, getPixels_2x2<true, false, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+						fillShape(&data, getPixels_2x2<true, false, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 					} else { // Without mipmap
-					fillShape(&data, getPixels_2x2<true, false, false, true, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<true, true, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 					}
 				} else { // Diffuse
 					if (hasDiffusePyramid) { // With mipmap
 						fillShape(&data, getPixels_2x2<true, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 					} else { // Without mipmap
-						fillShape(&data, getPixels_2x2<true, false, false, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+						fillShape(&data, getPixels_2x2<true, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 					}
 				}
 			}
 		}
 	} else {
-		if (data.lightMap) {
+		if (texture_exists(*(data.lightMap))) {
 			if (hasVertexFade) { // LightVertex
-				fillShape(&data, getPixels_2x2<false, true, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				fillShape(&data, getPixels_2x2<false, false, true, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 			} else {
 				if (colorless) { // Light without normalization
-					fillShape(&data, getPixels_2x2<false, true, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<false, false, true, false, true>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				} else { // Light
-					fillShape(&data, getPixels_2x2<false, true, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+					fillShape(&data, getPixels_2x2<false, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 				}
 			}
 		} else {
 			if (hasVertexFade) { // Vertex
-				fillShape(&data, getPixels_2x2<false, false, true, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
+				fillShape(&data, getPixels_2x2<false, false, false, true, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 			} else { // Single color
 				fillShape(&data, getPixels_2x2<false, false, false, false, false>, colorBuffer, depthBuffer, triangle, projection, shape, filter);
 			}

+ 7 - 7
Source/DFPSR/render/shader/Shader.h

@@ -26,8 +26,8 @@
 
 #include <cstdint>
 #include "../../image/PackOrder.h"
-#include "../../image/ImageRgbaU8.h"
-#include "../../image/ImageF32.h"
+#include "../../image/Image.h"
+#include "../../image/Texture.h"
 #include "../ITriangle2D.h"
 #include "shaderMethods.h"
 #include "shaderTypes.h"
@@ -56,16 +56,16 @@ struct TriangleColors {
 };
 
 struct TriangleInput {
-	const ImageRgbaU8Impl *diffuseImage;
-	const ImageRgbaU8Impl *lightImage;
+	const TextureRgbaU8 *diffuseMap;
+	const TextureRgbaU8 *lightMap;
 	const TriangleTexCoords texCoords;
 	const TriangleColors colors;
-	TriangleInput(const ImageRgbaU8Impl *diffuseImage, const ImageRgbaU8Impl *lightImage, const TriangleTexCoords &texCoords, const TriangleColors &colors)
-	: diffuseImage(diffuseImage), lightImage(lightImage), texCoords(texCoords), colors(colors) {}
+	TriangleInput(const TextureRgbaU8 *diffuseMap, const TextureRgbaU8 *lightMap, const TriangleTexCoords &texCoords, const TriangleColors &colors)
+	: diffuseMap(diffuseMap), lightMap(lightMap), texCoords(texCoords), colors(colors) {}
 };
 
 // The template for function pointers doing the work
-inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}
+inline void drawCallbackTemplate(const TriangleInput &triangleInput, ImageRgbaU8 *colorBuffer, ImageF32 *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {}
 using DRAW_CALLBACK_TYPE = decltype(&drawCallbackTemplate);
 
 }

+ 19 - 37
Source/DFPSR/render/shader/fillerTemplates.h

@@ -25,16 +25,14 @@
 #define DFPSR_RENDER_FILLER_TEMPLATES
 
 #include <cstdint>
-#include "../../image/PackOrder.h"
-#include "../../image/ImageRgbaU8.h"
-#include "../../image/ImageF32.h"
+#include "../../api/imageAPI.h"
 #include "../ITriangle2D.h"
 #include "shaderTypes.h"
 
 namespace dsr {
 
 // Function for filling pixels
-using PixelShadingCallback = std::function<Rgba_F32(void *data, const F32x4x3 &vertexWeights)>;
+using PixelShadingCallback = std::function<Rgba_F32<U32x4, F32x4>(void *data, const F32x4x3 &vertexWeights)>;
 
 inline bool almostZero(float value) {
 	return value > -0.001f && value < 0.001f;
@@ -108,7 +106,7 @@ inline void clipPixels(int x, const RowInterval &upperRow, const RowInterval &lo
 }
 
 template<bool CLIP_SIDES, bool DEPTH_READ, bool AFFINE>
-inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, const SafePointer<float> depthDataUpper, const SafePointer<float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
+inline void getVisibility(int x, const RowInterval &upperRow, const RowInterval &lowerRow, const FVector4D &depth, SafePointer<const float> depthDataUpper, SafePointer<const float> depthDataLower, bool &vis0, bool &vis1, bool &vis2, bool &vis3) {
 	// Clip pixels
 	bool clip0, clip1, clip2, clip3;
 	clipPixels<CLIP_SIDES>(x, upperRow, lowerRow, clip0, clip1, clip2, clip3);
@@ -164,7 +162,7 @@ inline void fillQuadSuper(void *data, PixelShadingCallback pixelShaderFunction,
 			// Get the color
 			U32x4 packedColor(0u); // Allow uninitialized memory?
 			// Execute the shader
-			Rgba_F32 planarSourceColor = pixelShaderFunction(data, weights);
+			Rgba_F32<U32x4, F32x4> planarSourceColor = pixelShaderFunction(data, weights);
 			// Apply alpha filtering
 			if (FILTER == Filter::Alpha) {
 				// Get opacity from the source color
@@ -172,7 +170,7 @@ inline void fillQuadSuper(void *data, PixelShadingCallback pixelShaderFunction,
 				// Read the packed colors for alpha blending
 				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);
 				// Unpack the target color into planar RGBA format so that it can be mixed with the source color
-				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);
+				Rgba_F32<U32x4, F32x4> planarTargetColor(packedTargetColor, targetPackingOrder);
 				// Blend linearly using floats
 				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));
 			}
@@ -246,43 +244,32 @@ inline void fillRowSuper(void *data, PixelShadingCallback pixelShaderFunction, S
 }
 
 template<bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, Filter FILTER, bool AFFINE>
-inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
+inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8 *colorBuffer, ImageF32 *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape) {
 	// Prepare constants
-	const int targetStride = imageInternal::getStride(colorBuffer);
-	const int depthBufferStride = imageInternal::getStride(depthBuffer);
+	const int targetStride = colorBuffer ? image_getStride(*colorBuffer) : 0;
+	const int depthBufferStride = depthBuffer ? image_getStride(*depthBuffer) : 0;
 	const FVector3D doublePWeightDx = projection.pWeightDx * 2.0f;
-	const int colorRowSize = imageInternal::getRowSize(colorBuffer);
-	const int depthRowSize = imageInternal::getRowSize(depthBuffer);
-	const PackOrder& targetPackingOrder = imageInternal::getPackOrder(colorBuffer);
-	const int colorHeight = imageInternal::getHeight(colorBuffer);
-	const int depthHeight = imageInternal::getHeight(depthBuffer);
+	const int colorRowSize = colorBuffer ? image_getWidth(*colorBuffer) * sizeof(uint32_t) : 0;
+	const int depthRowSize = depthBuffer ? image_getWidth(*depthBuffer) * sizeof(float) : 0;
+	const PackOrder& targetPackingOrder = colorBuffer ? image_getPackOrder(*colorBuffer) : PackOrder::getPackOrder(PackOrderIndex::RGBA);
+	const int colorHeight = colorBuffer ? image_getHeight(*colorBuffer) : 0;
+	const int depthHeight = depthBuffer ? image_getHeight(*depthBuffer) : 0;
 	const int maxHeight = colorHeight > depthHeight ? colorHeight : depthHeight;
 
 	// Initialize row pointers for color buffer
 	SafePointer<uint32_t> pixelDataUpper, pixelDataLower, pixelDataUpperRow, pixelDataLowerRow;
 	if (COLOR_WRITE) {
-		SafePointer<uint32_t> targetData = imageInternal::getSafeData<uint32_t>(colorBuffer);
-		pixelDataUpperRow = targetData;
-		pixelDataUpperRow.increaseBytes(shape.startRow * targetStride);
-		pixelDataLowerRow = targetData;
-		pixelDataLowerRow.increaseBytes((shape.startRow + 1) * targetStride);
-	} else {
-		pixelDataUpperRow = SafePointer<uint32_t>();
-		pixelDataLowerRow = SafePointer<uint32_t>();
+		pixelDataUpperRow = image_getSafePointer<uint32_t>(*colorBuffer, shape.startRow);
+		pixelDataLowerRow = pixelDataUpperRow; pixelDataLowerRow.increaseBytes(targetStride);
 	}
 
 	// Initialize row pointers for depth buffer
 	SafePointer<float> depthDataUpper, depthDataLower, depthDataUpperRow, depthDataLowerRow;
 	if (DEPTH_READ || DEPTH_WRITE) {
-		SafePointer<float> depthBufferData = imageInternal::getSafeData<float>(depthBuffer);
-		depthDataUpperRow = depthBufferData;
-		depthDataUpperRow.increaseBytes(shape.startRow * depthBufferStride);
-		depthDataLowerRow = depthBufferData;
-		depthDataLowerRow.increaseBytes((shape.startRow + 1) * depthBufferStride);
-	} else {
-		depthDataUpperRow = SafePointer<float>();
-		depthDataLowerRow = SafePointer<float>();
+		depthDataUpperRow = image_getSafePointer<float>(*depthBuffer, shape.startRow);
+		depthDataLowerRow = depthDataUpperRow; depthDataLowerRow.increaseBytes(depthBufferStride);
 	}
+
 	for (int32_t y1 = shape.startRow; y1 < shape.startRow + shape.rowCount; y1 += 2) {
 		int y2 = y1 + 1;
 		RowInterval upperRow = shape.rows[y1 - shape.startRow];
@@ -337,9 +324,6 @@ inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction,
 				}
 				depthDataUpper += outerBlockStart;
 				depthDataLower += outerBlockStart;
-			} else {
-				depthDataUpper = SafePointer<float>();
-				depthDataLower = SafePointer<float>();
 			}
 			// Initialize projection
 			FVector3D pWeightUpperRow;
@@ -400,7 +384,7 @@ inline void fillShapeSuper(void *data, PixelShadingCallback pixelShaderFunction,
 	}
 }
 
-inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8Impl *colorBuffer, ImageF32Impl *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
+inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, ImageRgbaU8 *colorBuffer, ImageF32 *depthBuffer, const ITriangle2D &triangle, const Projection &projection, const RowShape &shape, Filter filter) {
 	bool hasColorBuffer = colorBuffer != nullptr;
 	bool hasDepthBuffer = depthBuffer != nullptr;
 	if (projection.affine) {
@@ -415,7 +399,6 @@ inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, Imag
 				}
 			} else {
 				// Solid depth
-				// TODO: Use for orthogonal depth based shadows
 				fillShapeSuper<false, true, true, Filter::Solid, true>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
 			}
 		} else {
@@ -441,7 +424,6 @@ inline void fillShape(void *data, PixelShadingCallback pixelShaderFunction, Imag
 				}
 			} else {
 				// Solid depth
-				// TODO: Use for depth based shadows with perspective projection
 				fillShapeSuper<false, true, true, Filter::Solid, false>(data, pixelShaderFunction, nullptr, depthBuffer, triangle, projection, shape);
 			}
 		} else {

+ 38 - 164
Source/DFPSR/render/shader/shaderMethods.h

@@ -28,7 +28,7 @@
 #include "../../math/FVector.h"
 #include "../../math/scalar.h"
 #include "../../base/simd3D.h"
-#include "../../image/ImageRgbaU8.h"
+#include "../../api/textureAPI.h"
 #include "shaderTypes.h"
 #include "../constants.h"
 
@@ -43,8 +43,8 @@ namespace shaderMethods {
 		return vMA + vMB + vMC;
 	}
 
-	inline Rgba_F32 interpolateVertexColor(const FVector3D &red, const FVector3D &green, const FVector3D &blue, const FVector3D &alpha, const F32x4x3 &vertexWeights) {
-		return Rgba_F32(
+	inline Rgba_F32x4 interpolateVertexColor(const FVector3D &red, const FVector3D &green, const FVector3D &blue, const FVector3D &alpha, const F32x4x3 &vertexWeights) {
+		return Rgba_F32x4(
 		  interpolate(red,   vertexWeights),
 		  interpolate(green, vertexWeights),
 		  interpolate(blue,  vertexWeights),
@@ -52,175 +52,49 @@ namespace shaderMethods {
 		);
 	}
 
-	// Returns (colorA * weightA + colorB * weightB) / 256 as bytes
-	// weightA and weightB should contain pairs of the same 16-bit weights for each of the 4 pixels in the corresponding A and B colors
-	inline U32x4 weightColors(const U32x4 &colorA, const U16x8 &weightA, const U32x4 &colorB, const U16x8 &weightB) {
-		U32x4 lowMask(0x00FF00FFu);
-		U16x8 lowColorA = U16x8(colorA & lowMask);
-		U16x8 lowColorB = U16x8(colorB & lowMask);
-		U32x4 highMask(0xFF00FF00u);
-		U16x8 highColorA = U16x8((colorA & highMask) >> 8);
-		U16x8 highColorB = U16x8((colorB & highMask) >> 8);
-		U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
-		U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
-		return (((lowColor >> 8) & lowMask) | (highColor & highMask));
-	}
-
-	// The more significant bits must be zero so that the lower bits can fill the space.
-	//   lowBits[x] < 2^16
-	inline U16x8 repeatAs16Bits(const U32x4 &lowBits) {
-		return U16x8(lowBits | (lowBits << 16));
-	}
-
-	// Returns 256 - weight
-	inline U16x8 invertWeight(const U16x8 &weight) {
-		return U16x8(0x01000100u) - weight;
-	}
-
-	inline U32x4 mix_L(const U32x4 &colorA, const U32x4 &colorB, const U32x4 &weight) {
-		// Get inverse weights
-		U16x8 weightB = repeatAs16Bits(weight);
-		U16x8 weightA = invertWeight(weightB);
-		// Multiply
-		return weightColors(colorA, weightA, colorB, weightB);
-	}
-
-	inline U32x4 mix_BL(const U32x4 &colorA, const U32x4 &colorB, const U32x4 &colorC, const U32x4 &colorD, const U32x4 &weightX, const U32x4 &weightY) {
-		// Get inverse weights
-		U16x8 weightXR = repeatAs16Bits(weightX);
-		U16x8 weightYB = repeatAs16Bits(weightY);
-		U16x8 weightXL = invertWeight(weightXR);
-		U16x8 weightYT = invertWeight(weightYB);
-		// Multiply
-		return weightColors(weightColors(colorA, weightXL, colorB, weightXR), weightYT, weightColors(colorC, weightXL, colorD, weightXR), weightYB);
-	}
-
-	// Single layer sampling methods
-	inline U32x4 sample_U32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const U32x4 &col, const U32x4 &row) {
-		U32x4 pixelOffset((source->startOffset + col + (row << source->widthShift))); // PixelOffset = Start + Column + Row * Width
-		return gather(data, pixelOffset);
-	}
-
-	// How many mip levels down from here should be sampled for the given texture coordinates
-	template<int maxOffset>
-	inline int getMipLevelOffset(const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
-		FVector4D ua = u.get();
-		FVector4D va = v.get();
-		float offsetUX = fabs(ua.x - ua.y);
-		float offsetUY = fabs(ua.x - ua.z);
-		float offsetVX = fabs(va.x - va.y);
-		float offsetVY = fabs(va.x - va.z);
-		float offsetU = max(offsetUX, offsetUY) * source->width;
-		float offsetV = max(offsetVX, offsetVY) * source->height;
-		float offset = max(offsetU, offsetV);
-
-		// This log2 approximation has to be adapted if the number of mip levels changes.
-		static_assert(MIP_BIN_COUNT == 5, "Changing MIP_BIN_COUNT must also adapt shaderMethods::getMipLevelOffset");
-		int result = 0;
-		if (offset > 2.0f) { result = 1; }
-		if (offset > 4.0f) { result = 2; }
-		if (offset > 8.0f) { result = 3; }
-		if (offset > 16.0f) { result = 4; }
-		return result;
-	}
-
-	inline int getMipLevel(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {
-		return getMipLevelOffset<MIP_BIN_COUNT - 1>(source->mips, u, v);
-	}
-
-	// Single layer sampling method
-	template<Interpolation INTERPOLATION>
-	inline U32x4 sample_U32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
-		if (INTERPOLATION == Interpolation::BL) {
-			U32x4 subPixelOffset = U32x4(1073741952); // 2 to the power of 30 + 128, adjusting to a safe part of the unsigned integer and adding half a pixel for the bi-linear interpolation.
-			U32x4 subPixLowX(truncateToU32(u * source->subWidth) + subPixelOffset); // SubPixelLowX = u * (Width * 256) + 128
-			U32x4 subPixLowY(truncateToU32(v * source->subHeight) + subPixelOffset); // SubPixelLowY = v * (Height * 256) + 128
-			U32x4 weightX = subPixLowX & 255; // WeightX = SubPixelLowX % 256
-			U32x4 weightY = subPixLowY & 255; // WeightY = SubPixelLowY % 256
-			U32x4 pixLowX(subPixLowX >> 8); // PixelLowX = SubPixelLowX / 256
-			U32x4 pixLowY(subPixLowY >> 8); // PixelLowY = SubPixelLowY / 256
-			U32x4 wMask(source->widthMask);
-			U32x4 hMask(source->heightMask);
-			U32x4 colLow(pixLowX & wMask); // ColumnLow = PixelLowX % Width
-			U32x4 rowLow(pixLowY & hMask); // RowLow = PixelLowY % Height
-			U32x4 colHigh(((colLow + 1) & wMask)); // ColumnHigh = (ColumnLow + 1) % Width
-			U32x4 rowHigh(((rowLow + 1) & hMask)); // RowHigh = (RowLow + 1) % Height
-			// Sample colors in the 4 closest pixels
-			U32x4 colorA(sample_U32(data, source, colLow, rowLow));
-			U32x4 colorB(sample_U32(data, source, colHigh, rowLow));
-			U32x4 colorC(sample_U32(data, source, colLow, rowHigh));
-			U32x4 colorD(sample_U32(data, source, colHigh, rowHigh));
-			// Take a weighted average
-			return shaderMethods::mix_BL(colorA, colorB, colorC, colorD, weightX, weightY);
-		} else { // Interpolation::NN or unhandled
-			// TODO: Test nearest neighbor sampling.
-			F32x4 subPixelOffset = F32x4(1073741824.0f);
-			// TODO: Use multiply and add instructions.
-			U32x4 pixX(truncateToU32(u * source->width + subPixelOffset));  // PixelX = U * Width
-			U32x4 pixY(truncateToU32(v * source->height + subPixelOffset)); // PixelY = V * Height
-			U32x4 col(pixX & source->widthMask); // Column = PixelX % Width
-			U32x4 row(pixY & source->heightMask); // Row = PixelY % Height
-			return sample_U32(data, source, col, row);
-		}
-	}
-
-	template<Interpolation INTERPOLATION, bool HIGH_QUALITY>
-	inline Rgba_F32 sample_F32(SafePointer<uint32_t> data, const TextureRgbaLayer *source, const F32x4 &u, const F32x4 &v) {
-		if (INTERPOLATION == Interpolation::BL) {
-			if (HIGH_QUALITY) { // High quality interpolation
-				F32x4 subPixelOffset = F32x4(4194304.5f); // A large power of two and half a pixel's offset for bi-linear interpolation.
-				F32x4 pixX = u * source->width + subPixelOffset; // PixelX = ULow * Width
-				F32x4 pixY = v * source->height + subPixelOffset; // PixelY = VLow * Height
-				// Truncation can be used as floor for positive input
-				U32x4 pixLowX(truncateToU32(pixX)); // PixelLowX = floor(PixelX)
-				U32x4 pixLowY(truncateToU32(pixY)); // PixelLowY = floor(PixelY)
-				U32x4 wMask(source->widthMask);
-				U32x4 hMask(source->heightMask);
-				U32x4 colLow(pixLowX & wMask); // ColumnLow = PixelLowX % Width
-				U32x4 rowLow(pixLowY & hMask); // RowLow = PixelLowY % Height
-				U32x4 colHigh(((colLow + 1) & wMask)); // ColumnHigh = (ColumnLow + 1) % Width
-				U32x4 rowHigh(((rowLow + 1) & hMask)); // RowHigh = (RowLow + 1) % Height
-				// Sample colors in the 4 closest pixels
-				Rgba_F32 colorA(Rgba_F32(sample_U32(data, source, colLow, rowLow)));
-				Rgba_F32 colorB(Rgba_F32(sample_U32(data, source, colHigh, rowLow)));
-				Rgba_F32 colorC(Rgba_F32(sample_U32(data, source, colLow, rowHigh)));
-				Rgba_F32 colorD(Rgba_F32(sample_U32(data, source, colHigh, rowHigh)));
-				F32x4 weightX = pixX - floatFromU32(pixLowX);
-				F32x4 weightY = pixY - floatFromU32(pixLowY);
-				F32x4 invWeightX = 1.0f - weightX;
-				F32x4 invWeightY = 1.0f - weightY;
-				return (colorA * invWeightX + colorB * weightX) * invWeightY + (colorC * invWeightX + colorD * weightX) * weightY;
-			} else { // Fast interpolation
-				return Rgba_F32(sample_U32<Interpolation::BL>(data, source, u, v));
+	// TODO: Implement sparse computation of floating-point mip levels in a grid, which can increase the density when getting closer to a horizon.
+	// TODO: Let RgbaMultipy generate additional template instances, especially for SQUARE and MIP_INSIDE which are common.
+	//       If the texture has at least 5 mip levels, MIP_INSIDE can be true.
+	//       For the majority of textures that are square, SQUARE can be true.
+	template<
+	  Interpolation INTERPOLATION,
+	  bool SQUARE = false,
+	  bool SINGLE_LAYER = false,
+	  bool XY_INSIDE = false,
+	  bool MIP_INSIDE = false,
+	  bool HIGHEST_RESOLUTION = false
+	>
+	inline U32x4 sample_U32(const TextureRgbaU8 *source, const F32x4 &u, const F32x4 &v) {
+		if (INTERPOLATION == Interpolation::NN) {
+			if (HIGHEST_RESOLUTION) {
+				return texture_sample_nearest<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION, U32x4, F32x4>(*source, u, v, U32x4(0u));
+			} else {
+				// TODO: Calculate MIP levels using a separate rendering stage with sparse resolution writing results into thread-local memory.
+				uint32_t mipLevel = texture_getMipLevelIndex<F32x4>(*source, u, v);
+				return texture_sample_nearest<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION, U32x4, F32x4>(*source, u, v, U32x4(mipLevel));
 			}
-		} else { // Interpolation::NN or unhandled
-			return Rgba_F32(sample_U32<Interpolation::NN>(data, source, u, v));
-		}
-	}
-
-	// Multi layer sampling method
-	template<Interpolation INTERPOLATION, bool DISABLE_MIPMAP>
-	inline U32x4 sample_U32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {
-		if (DISABLE_MIPMAP) {
-			return sample_U32<INTERPOLATION>(source->data, &(source->mips[0]), u, v);
 		} else {
-			int mipLevel = getMipLevel(source, u, v);
-			return sample_U32<INTERPOLATION>(source->data, &(source->mips[mipLevel]), u, v);
+			if (HIGHEST_RESOLUTION) {
+				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION, U32x4, U16x8, F32x4>(*source, u, v, U32x4(0u));
+			} else {
+				uint32_t mipLevel = texture_getMipLevelIndex<F32x4>(*source, u, v);
+				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION, U32x4, U16x8, F32x4>(*source, u, v, U32x4(mipLevel));
+			}
 		}
 	}
 
-	template<Interpolation INTERPOLATION, bool DISABLE_MIPMAP, bool HIGH_QUALITY>
-	inline Rgba_F32 sample_F32(const TextureRgba *source, const F32x4 &u, const F32x4 &v) {
-		if (DISABLE_MIPMAP) {
-			return sample_F32<INTERPOLATION, HIGH_QUALITY>(source->data, &(source->mips[0]), u, v);
-		} else {
-			int mipLevel = getMipLevel(source, u, v);
-			return sample_F32<INTERPOLATION, HIGH_QUALITY>(source->data, &(source->mips[mipLevel]), u, v);
-		}
+	template<Interpolation INTERPOLATION,
+	  bool SQUARE = false,
+	  bool SINGLE_LAYER = false,
+	  bool XY_INSIDE = false,
+	  bool MIP_INSIDE = false,
+	  bool HIGHEST_RESOLUTION = false
+	>
+	inline Rgba_F32<U32x4, F32x4> sample_F32(const TextureRgbaU8 *source, const F32x4 &u, const F32x4 &v) {
+		return Rgba_F32<U32x4, F32x4>(sample_U32<INTERPOLATION, SQUARE, SINGLE_LAYER, XY_INSIDE, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v));
 	}
 }
 
 }
 
 #endif
-

+ 40 - 31
Source/DFPSR/render/shader/shaderTypes.h

@@ -25,51 +25,60 @@
 #define DFPSR_RENDER_SHADER_TYPES
 
 #include <cstdint>
-#include <cstdio>
 #include "../../base/simd.h"
 #include "../../image/PackOrder.h"
 
 namespace dsr {
 
+template<typename U, typename F>
 struct Rgba_F32 {
-	F32x4 red;
-	F32x4 green;
-	F32x4 blue;
-	F32x4 alpha;
-	explicit Rgba_F32(const U32x4 &color) :
-	  red(  floatFromU32(getRed(  color))),
-	  green(floatFromU32(getGreen(color))),
-	  blue( floatFromU32(getBlue( color))),
-	  alpha(floatFromU32(getAlpha(color))) {}
-	Rgba_F32(const U32x4 &color, const PackOrder &order) :
-	  red(  floatFromU32(getRed(  color, order))),
-	  green(floatFromU32(getGreen(color, order))),
-	  blue( floatFromU32(getBlue( color, order))),
-	  alpha(floatFromU32(getAlpha(color, order))) {}
-	Rgba_F32(const F32x4 &red, const F32x4 &green, const F32x4 &blue, const F32x4 &alpha) : red(red), green(green), blue(blue), alpha(alpha) {}
-	// TODO: Use a template argument for deciding the packing order for external image formats
-	U32x4 toSaturatedByte() const {
-		return floatToSaturatedByte(this->red, this->green, this->blue, this->alpha);
+	F red;
+	F green;
+	F blue;
+	F alpha;
+	explicit Rgba_F32(const U &color) :
+	  red(  floatFromU32(packOrder_getRed(  color))),
+	  green(floatFromU32(packOrder_getGreen(color))),
+	  blue( floatFromU32(packOrder_getBlue( color))),
+	  alpha(floatFromU32(packOrder_getAlpha(color))) {}
+	Rgba_F32(const U &color, const PackOrder &order) :
+	  red(  floatFromU32(packOrder_getRed(  color, order))),
+	  green(floatFromU32(packOrder_getGreen(color, order))),
+	  blue( floatFromU32(packOrder_getBlue( color, order))),
+	  alpha(floatFromU32(packOrder_getAlpha(color, order))) {}
+	Rgba_F32(const F &red, const F &green, const F &blue, const F &alpha) : red(red), green(green), blue(blue), alpha(alpha) {}
+	U toSaturatedByte() const {
+		return packOrder_floatToSaturatedByte<U, F>(this->red, this->green, this->blue, this->alpha);
 	}
-	U32x4 toSaturatedByte(const PackOrder &order) const {
-		return floatToSaturatedByte(this->red, this->green, this->blue, this->alpha, order);
+	U toSaturatedByte(const PackOrder &order) const {
+		return packOrder_floatToSaturatedByte<U, F>(this->red, this->green, this->blue, this->alpha, order);
 	}
 };
-inline Rgba_F32 operator+(const Rgba_F32 &left, const Rgba_F32 &right) {
-	return Rgba_F32(left.red + right.red, left.green + right.green, left.blue + right.blue, left.alpha + right.alpha);
-}
-inline Rgba_F32 operator-(const Rgba_F32 &left, const Rgba_F32 &right) {
-	return Rgba_F32(left.red - right.red, left.green - right.green, left.blue - right.blue, left.alpha - right.alpha);
+
+template<typename U, typename F>
+inline Rgba_F32<U, F> operator+(const Rgba_F32<U, F> &left, const Rgba_F32<U, F> &right) {
+	return Rgba_F32<U, F>(left.red + right.red, left.green + right.green, left.blue + right.blue, left.alpha + right.alpha);
 }
-inline Rgba_F32 operator*(const Rgba_F32 &left, const Rgba_F32 &right) {
-	return Rgba_F32(left.red * right.red, left.green * right.green, left.blue * right.blue, left.alpha * right.alpha);
+
+template<typename U, typename F>
+inline Rgba_F32<U, F> operator-(const Rgba_F32<U, F> &left, const Rgba_F32<U, F> &right) {
+	return Rgba_F32<U, F>(left.red - right.red, left.green - right.green, left.blue - right.blue, left.alpha - right.alpha);
 }
-inline Rgba_F32 operator*(const Rgba_F32 &left, const F32x4 &right) {
-	return Rgba_F32(left.red * right, left.green * right, left.blue * right, left.alpha * right);
+
+template<typename U, typename F>
+inline Rgba_F32<U, F> operator*(const Rgba_F32<U, F> &left, const Rgba_F32<U, F> &right) {
+	return Rgba_F32<U, F>(left.red * right.red, left.green * right.green, left.blue * right.blue, left.alpha * right.alpha);
 }
 
+template<typename U, typename F>
+inline Rgba_F32<U, F> operator*(const Rgba_F32<U, F> &left, const F &right) {
+	return Rgba_F32<U, F>(left.red * right, left.green * right, left.blue * right, left.alpha * right);
 }
 
-#endif
+using Rgba_F32x4 = Rgba_F32<U32x4, F32x4>;
+using Rgba_F32x8 = Rgba_F32<U32x8, F32x8>;
+using Rgba_F32xX = Rgba_F32<U32xX, F32xX>;
 
+}
 
+#endif

+ 25 - 25
Source/DFPSR/settings.h

@@ -1,28 +1,23 @@
-// zlib open source license
-//
-// Copyright (c) 2024 David Forsgren Piuva
-// 
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-// 
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-// 
-//    1. The origin of this software must not be misrepresented; you must not
-//    claim that you wrote the original software. If you use this software
-//    in a product, an acknowledgment in the product documentation would be
-//    appreciated but is not required.
-// 
-//    2. Altered source versions must be plainly marked as such, and must not be
-//    misrepresented as being the original software.
-// 
-//    3. This notice may not be removed or altered from any source
-//    distribution.
+
+// This header collects hardcoded settings for the entire framework in one place.
+//   Either modify this header for all your projects, or define macros using compiler flags for a specific project.
 
 #ifndef DFPSR_SETTINGS
 #define DFPSR_SETTINGS
+	// If you are not using try-catch, you can let the default error handler call heap_hardExitCleaning and std::exit instead of throwing std::exception.
+	//   This may reduce some runtime overhead from stack unwinding.
+	#ifndef __EXCEPTIONS
+		// If compiling with -fno-exceptions, hard exit must be enabled.
+		#define DSR_HARD_EXIT_ON_ERROR
+	#endif
+
+	// If EXTRA_SAFE_POINTER_CHECKS is defined, debug mode will let SafePointer perform thread and allocation identity checks.
+	//     Makes sure that the accessed memory has not been freed, recycled or shared with the wrong thread.
+	//     This will make memory access super slow but catch more memory errors when basic bound checks are not enough.
+	// If EXTRA_SAFE_POINTER_CHECKS is not defined, debug mode will 
+	// Has no effect in release mode, because it is only active when SAFE_POINTER_CHECKS is also defined.
+	//#define EXTRA_SAFE_POINTER_CHECKS
+
 	// Determine which SIMD extensions to use in base/simd.h.
 	// Use the standard compiler flags for enabling SIMD extensions.
 	//   If your compiler uses a different macro name to indicate the presence of a SIMD extension, you can add them here to enable the USE_* macros.
@@ -47,6 +42,7 @@
 		#endif
 	#elif defined(__ARM_NEON)
 		#define USE_NEON // Comment out this line to test without NEON
+		// TODO: Check if SVE is enabled once implemented in simd.h.
 	#endif
 
 	// Enable the EMULATE_X_256BIT_SIMD macro to force use of 256-bit vectors even when there is no hardware instructions supporting it.
@@ -86,10 +82,11 @@
 		#define DSR_LARGEST_VECTOR_SIZE 16
 	#endif
 
-	// Endianness
-	//   Compile with C++ 2020 or later to detect endianness automatically.
-	//   Or define the DSR_BIG_ENDIAN macro externally when building for big-endian targets.
+	// If using C++ 2020 or later.
 	#if (__cplusplus >= 202002L)
+		// Endianness
+		//   Compile with C++ 2020 or later to detect endianness automatically.
+		//   Or define the DSR_BIG_ENDIAN macro externally when building for big-endian targets.
 		#include <bit>
 		#if (std::endian::native == std::endian::big)
 			// We detected a big endian target.
@@ -108,9 +105,12 @@
 	//   Must be a power of two, and no less than the largest cache line among all CPU cores that might run the program.
 	//   Can be assigned using the DSR_THREAD_SAFE_ALIGNMENT macro externally or changed here.
 	#ifndef DSR_THREAD_SAFE_ALIGNMENT
+		// 64 bytes is generally a good choice, because it is large enough to align with cache lines on most computers and large enough to store an allocation header.
+		// Note that Apple M1 has a cache line of 128 bytes, which exceeds this default value.
 		#define DSR_THREAD_SAFE_ALIGNMENT 64
 	#endif
 
+	// TODO: Allow having a dynamic largest vector size to support SVE vectors of 1024 or 2048 bits in the future.
 	// When allocating memory for being reused many times for different purposes, we need to know the maximum alignment that will be required ahead of time.
 	//   Here we define it as the maximum of the largest SIMD vector and the thread safe alignment.
 	#if (DSR_LARGEST_VECTOR_SIZE > DSR_THREAD_SAFE_ALIGNMENT)

+ 7 - 7
Source/SDK/SpriteEngine/lightAPI.cpp

@@ -7,7 +7,7 @@ namespace dsr {
 
 // Precondition: The packed color must be in the standard RGBA order, meaning no native packing
 inline F32xXx3 unpackRgb_U32xX_to_F32xXx3(const U32xX& color) {
-	return F32xXx3(floatFromU32(getRed(color)), floatFromU32(getGreen(color)), floatFromU32(getBlue(color)));
+	return F32xXx3(floatFromU32(packOrder_getRed(color)), floatFromU32(packOrder_getGreen(color)), floatFromU32(packOrder_getBlue(color)));
 }
 
 static inline void setLight(SafePointer<uint8_t> lightPixel, U8xX newlight) {
@@ -52,7 +52,7 @@ void directedLight(const FMatrix3x3& normalToWorldSpace, OrderedImageRgbaU8& lig
 				green = green.clampUpper(255.1f);
 				blue = blue.clampUpper(255.1f);
 				// TODO: Let color packing handle arbitrary vector lengths.
-				U8xX light = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
+				U8xX light = reinterpret_U8FromU32(packOrder_packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
 				if (ADD_LIGHT) {
 					addLight(lightPixel, light);
 				} else {
@@ -252,7 +252,7 @@ static void addPointLightSuper(const OrthoView& camera, const IVector2D& worldCe
 					green = green.clampUpper(255.1f);
 					blue = blue.clampUpper(255.1f);
 					// Add light to the image
-					U8xX morelight = reinterpret_U8FromU32(packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
+					U8xX morelight = reinterpret_U8FromU32(packOrder_packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue)));
 					addLight(lightPixel, morelight);
 					// Go to the next four pixels in light-space
 					lightBasePixelxX += dxX;
@@ -303,13 +303,13 @@ void blendLight(AlignedImageRgbaU8& colorBuffer, const OrderedImageRgbaU8& diffu
 			for (int x = 0; x < width; x += laneCountX_32Bit) {
 				U32xX diffuse = U32xX::readAligned(diffusePixel, "blendLight: reading diffuse");
 				U32xX light = U32xX::readAligned(lightPixel, "blendLight: reading light");
-				F32xX red = (floatFromU32(getRed(diffuse)) * floatFromU32(getRed(light))) * scale;
-				F32xX green = (floatFromU32(getGreen(diffuse)) * floatFromU32(getGreen(light))) * scale;
-				F32xX blue = (floatFromU32(getBlue(diffuse)) * floatFromU32(getBlue(light))) * scale;
+				F32xX red = (floatFromU32(packOrder_getRed(diffuse)) * floatFromU32(packOrder_getRed(light))) * scale;
+				F32xX green = (floatFromU32(packOrder_getGreen(diffuse)) * floatFromU32(packOrder_getGreen(light))) * scale;
+				F32xX blue = (floatFromU32(packOrder_getBlue(diffuse)) * floatFromU32(packOrder_getBlue(light))) * scale;
 				red = red.clampUpper(255.1f);
 				green = green.clampUpper(255.1f);
 				blue = blue.clampUpper(255.1f);
-				U32xX color = packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue), targetOrder);
+				U32xX color = packOrder_packBytes(truncateToU32(red), truncateToU32(green), truncateToU32(blue), targetOrder);
 				color.writeAligned(targetPixel, "blendLight: writing color");
 				targetPixel += laneCountX_32Bit;
 				diffusePixel += laneCountX_32Bit;

+ 4 - 4
Source/SDK/SpriteEngine/spriteAPI.cpp

@@ -1349,16 +1349,16 @@ void sprite_generateFromModel(ImageRgbaU8& targetAtlas, String& targetConfigText
 
 		// Calculate initial image size
 		float worstCaseDiameter = (std::max(maxBound.x, -minBound.x) + std::max(maxBound.y, -minBound.y) + std::max(maxBound.z, -minBound.z)) * 2;
-		int maxRes = roundUp(worstCaseDiameter * ortho.pixelsPerTile, 2) + 4; // Round up to even pixels and add 4 padding pixels
+		int32_t maxRes = roundUp(int32_t(worstCaseDiameter) * ortho.pixelsPerTile, 2) + 4; // Round up to even pixels and add 4 padding pixels
 
 		// Allocate square images from the pessimistic size estimation
-		int width = maxRes;
-		int height = maxRes;
+		int32_t width = maxRes;
+		int32_t height = maxRes;
 		ImageF32 depthBuffer = image_create_F32(width, height);
 		ImageRgbaU8 colorImage[cameraAngles];
 		ImageRgbaU8 heightImage[cameraAngles];
 		ImageRgbaU8 normalImage[cameraAngles];
-		for (int a = 0; a < cameraAngles; a++) {
+		for (int32_t a = 0; a < cameraAngles; a++) {
 			colorImage[a] = image_create_RgbaU8(width, height);
 			heightImage[a] = image_create_RgbaU8(width, height);
 			normalImage[a] = image_create_RgbaU8(width, height);

+ 8 - 7
Source/SDK/terrain/main.cpp

@@ -170,7 +170,7 @@ int createGridPart(Model& targetModel, const ImageU8& heightMap) {
 	return part;
 }
 
-static Model createGrid(const ImageU8& heightMap, const ImageRgbaU8& colorMap) {
+static Model createGrid(const ImageU8& heightMap, const TextureRgbaU8& colorMap) {
 	Model model = model_create();
 	int part = createGridPart(model, heightMap);
 	model_setDiffuseMap(model, part, colorMap);
@@ -366,15 +366,16 @@ void dsrMain(List<String> args) {
 	ImageRgbaU8 diffuseMap = image_create_RgbaU8(colorMapWidth, colorMapHeight);
 	generateDiffuseMap(diffuseMap, bumpMap, heightRamp);
 
-	// Create a color map for the ground
-	ImageRgbaU8 colorMap = image_create_RgbaU8(colorMapWidth, colorMapHeight);
+	// Create a color texture with 5 resolutions.
+	TextureRgbaU8 colorTexture = texture_create_RgbaU8(colorMapWidth, colorMapHeight, 5);
+	// Get the highest texture resolution as an image for easy manipulation.
+	ImageRgbaU8 colorMap = texture_getMipLevelImage(colorTexture, 0);
+	// Update the color map and texture.
 	updateColorMap(colorMap, diffuseMap, lightMap);
-
-	// Generate pyramid
-	image_generatePyramid(colorMap);
+	texture_generatePyramid(colorTexture);
 
 	// Create a ground model
-	Model ground = createGrid(heightMap, colorMap);
+	Model ground = createGrid(heightMap, colorTexture);
 
 	// Create a renderer for multi-threading
 	Renderer worker = renderer_create();

+ 2 - 2
Source/templates/basic3D/main.cpp

@@ -30,7 +30,7 @@ AlignedImageU8 darkEdge = image_fromAscii(
 	"<x--------------x>"
 	"<xxxxxxxxxxxxxxxx>"
 );
-OrderedImageRgbaU8 myTexture = image_pack(darkEdge, darkEdge, 0, 255);
+TextureRgbaU8 myTexture = texture_create_RgbaU8(image_pack(darkEdge, darkEdge, 0, 255), 1);
 
 int createCubePart(Model model, const FVector3D &min, const FVector3D &max) {
 	// Add positions
@@ -89,7 +89,7 @@ void dsrMain(List<String> args) {
 	});
 
 	// Genrate mip-maps for the texture
-	image_generatePyramid(myTexture);
+	texture_generatePyramid(myTexture);
 	// Create a cube model
 	Model cubeModel = createCubeModel(FVector3D(-0.5f), FVector3D(0.5f));
 	// Assign the texture to part 0

Some files were not shown because too many files changed in this diff