Quellcode durchsuchen

Beginning of public version history.

David Piuva vor 6 Jahren
Commit
3c4e7bc569
100 geänderte Dateien mit 25039 neuen und 0 gelöschten Zeilen
  1. 31 0
      Doc/C++Guide.txt
  2. 47 0
      Doc/GettingStarted.txt
  3. 66 0
      Doc/StyleGuide.txt
  4. 27 0
      Source/DFPSR/License.txt
  5. 68 0
      Source/DFPSR/README.md
  6. 54 0
      Source/DFPSR/api/configAPI.cpp
  7. 63 0
      Source/DFPSR/api/configAPI.h
  8. 370 0
      Source/DFPSR/api/drawAPI.cpp
  9. 172 0
      Source/DFPSR/api/drawAPI.h
  10. 231 0
      Source/DFPSR/api/guiAPI.cpp
  11. 231 0
      Source/DFPSR/api/guiAPI.h
  12. 755 0
      Source/DFPSR/api/imageAPI.cpp
  13. 228 0
      Source/DFPSR/api/imageAPI.h
  14. 1016 0
      Source/DFPSR/api/mediaMachineAPI.cpp
  15. 137 0
      Source/DFPSR/api/mediaMachineAPI.h
  16. 274 0
      Source/DFPSR/api/modelAPI.cpp
  17. 118 0
      Source/DFPSR/api/modelAPI.h
  18. 54 0
      Source/DFPSR/api/timeAPI.cpp
  19. 37 0
      Source/DFPSR/api/timeAPI.h
  20. 55 0
      Source/DFPSR/api/types.cpp
  21. 201 0
      Source/DFPSR/api/types.h
  22. 88 0
      Source/DFPSR/base/Buffer.cpp
  23. 77 0
      Source/DFPSR/base/Buffer.h
  24. 78 0
      Source/DFPSR/base/SafePointer.cpp
  25. 243 0
      Source/DFPSR/base/SafePointer.h
  26. 56 0
      Source/DFPSR/base/endian.h
  27. 1624 0
      Source/DFPSR/base/simd.h
  28. 114 0
      Source/DFPSR/base/simd3D.h
  29. 84 0
      Source/DFPSR/base/simdExtra.h
  30. 772 0
      Source/DFPSR/base/text.cpp
  31. 304 0
      Source/DFPSR/base/text.h
  32. 167 0
      Source/DFPSR/base/threading.cpp
  33. 67 0
      Source/DFPSR/base/threading.h
  34. 79 0
      Source/DFPSR/collection/Array.h
  35. 39 0
      Source/DFPSR/collection/BoundChecks.cpp
  36. 106 0
      Source/DFPSR/collection/Field.h
  37. 114 0
      Source/DFPSR/collection/List.h
  38. 7 0
      Source/DFPSR/collection/includeCollection.h
  39. 70 0
      Source/DFPSR/gui/BackendWindow.cpp
  40. 96 0
      Source/DFPSR/gui/BackendWindow.h
  41. 259 0
      Source/DFPSR/gui/DsrWindow.cpp
  42. 156 0
      Source/DFPSR/gui/DsrWindow.h
  43. 68 0
      Source/DFPSR/gui/FlexRegion.cpp
  44. 107 0
      Source/DFPSR/gui/FlexRegion.h
  45. 157 0
      Source/DFPSR/gui/Font.cpp
  46. 93 0
      Source/DFPSR/gui/Font.h
  47. 169 0
      Source/DFPSR/gui/InputEvent.cpp
  48. 132 0
      Source/DFPSR/gui/InputEvent.h
  49. 352 0
      Source/DFPSR/gui/VisualComponent.cpp
  50. 209 0
      Source/DFPSR/gui/VisualComponent.h
  51. 142 0
      Source/DFPSR/gui/VisualTheme.cpp
  52. 40 0
      Source/DFPSR/gui/VisualTheme.h
  53. 121 0
      Source/DFPSR/gui/components/Button.cpp
  54. 70 0
      Source/DFPSR/gui/components/Button.h
  55. 83 0
      Source/DFPSR/gui/components/Panel.cpp
  56. 61 0
      Source/DFPSR/gui/components/Panel.h
  57. 266 0
      Source/DFPSR/gui/defaultFont.h
  58. 9 0
      Source/DFPSR/gui/includeGui.h
  59. 104 0
      Source/DFPSR/image/Color.cpp
  60. 120 0
      Source/DFPSR/image/Color.h
  61. 36 0
      Source/DFPSR/image/Image.cpp
  62. 70 0
      Source/DFPSR/image/Image.h
  63. 39 0
      Source/DFPSR/image/ImageF32.cpp
  64. 46 0
      Source/DFPSR/image/ImageF32.h
  65. 51 0
      Source/DFPSR/image/ImageLoader.h
  66. 293 0
      Source/DFPSR/image/ImageRgbaU8.cpp
  67. 98 0
      Source/DFPSR/image/ImageRgbaU8.h
  68. 39 0
      Source/DFPSR/image/ImageU16.cpp
  69. 47 0
      Source/DFPSR/image/ImageU16.h
  70. 39 0
      Source/DFPSR/image/ImageU8.cpp
  71. 47 0
      Source/DFPSR/image/ImageU8.h
  72. 213 0
      Source/DFPSR/image/PackOrder.h
  73. 1226 0
      Source/DFPSR/image/draw.cpp
  74. 99 0
      Source/DFPSR/image/draw.h
  75. 83 0
      Source/DFPSR/image/internal/imageInternal.h
  76. 65 0
      Source/DFPSR/image/internal/imageTemplate.h
  77. 45 0
      Source/DFPSR/image/stbImage/stbImageWrapper.cpp
  78. 15 0
      Source/DFPSR/image/stbImage/stbImageWrapper.h
  79. 6712 0
      Source/DFPSR/image/stbImage/stb_image.h
  80. 1458 0
      Source/DFPSR/image/stbImage/stb_image_write.h
  81. 31 0
      Source/DFPSR/includeFramework.h
  82. 481 0
      Source/DFPSR/machine/VirtualMachine.cpp
  83. 424 0
      Source/DFPSR/machine/VirtualMachine.h
  84. 270 0
      Source/DFPSR/machine/mediaFilters.cpp
  85. 59 0
      Source/DFPSR/machine/mediaFilters.h
  86. 81 0
      Source/DFPSR/math/FMatrix2x2.h
  87. 112 0
      Source/DFPSR/math/FMatrix3x3.h
  88. 56 0
      Source/DFPSR/math/FPlane3D.h
  89. 133 0
      Source/DFPSR/math/FVector.h
  90. 387 0
      Source/DFPSR/math/FixedPoint.cpp
  91. 184 0
      Source/DFPSR/math/FixedPoint.h
  92. 110 0
      Source/DFPSR/math/IRect.h
  93. 71 0
      Source/DFPSR/math/IVector.h
  94. 57 0
      Source/DFPSR/math/LVector.h
  95. 91 0
      Source/DFPSR/math/Transform3D.h
  96. 57 0
      Source/DFPSR/math/UVector.h
  97. 15 0
      Source/DFPSR/math/includeMath.h
  98. 123 0
      Source/DFPSR/math/scalar.h
  99. 255 0
      Source/DFPSR/math/vectorMethods.h
  100. 183 0
      Source/DFPSR/persistent/ClassFactory.cpp

+ 31 - 0
Doc/C++Guide.txt

@@ -0,0 +1,31 @@
+A general C++ debugging guide for anyone who might need it.
+
+Cannot find method in namespace:
+	* Check that the header is included from where it is called.
+	* Check that the correct namespace is being used.
+	* Check input arguments.
+		Having the wrong input is like using another method name because of overloading.
+
+Linker error:
+	* Check that the cpp file is compiled by looking at the cpp files being mentioned in the terminal.
+		If not compiled:
+			* Check that the file's name doesn't contain spaces by mistake.
+			* Check that it's in a folder being compiled.
+	* Check that the definition in the cpp file has the identifier declared explicitly in the namespace.
+		flags returnType namespace::methodName( ... ) flags { ... }
+
+Multiple definitions of method in header:
+	* Move to cpp implementation which is only compiled once or declare as inline.
+
+Cannot move variable to base class without getting weird behaviour:
+	* Did the construction of the variable in the list depend on a variable given to the class that it moved to?
+		Calling a base constructor will not set any of the variables before all of them are complete.
+
+Calls to a class crashes when done from another source file.
+	* Make sure that each method is implemented in the corresponding source file.
+		A class fully defined in the header may have contradicting implementations between the sources that include the header.
+
+General rules:
+	* If a class isn't supposed to be instanciated, make it abstract by letting at least one virtual method be pure.
+		This reveals if any polymorph argument is passed as a template type to overloading, which ignores the child type and its v-table.
+

+ 47 - 0
Doc/GettingStarted.txt

@@ -0,0 +1,47 @@
+LINUX
+
+Place the library's root folder inside of a new folder.
+This allow keeping your own projects and the "temporary" folder that's
+used for compiling quickly outside of the library's version history.
+
+If using Linux with an X11 server, you might need to install the X11 headers.
+If it doesn't work, your Linux distro might not have an X11 server.
+	sudo apt-get install libx11-dev
+
+If using Raspbian, you can create and share your own window module for Raspbian
+or simply choose Ubuntu Mate with the following performance tweaks:
+	Control panel - Sound - Activate sound for windows and buttons: Off
+	Firefox browser - Preferences - General - Use smooth scrolling: Off
+	Firefox browser - Preferences - Home - Homepage and new windows: Blank page
+
+Build and run an example program:
+	* Select an SDK example and open its folder in a terminal.
+		chmod +x build.sh
+		./build.sh
+	Some examples might have additional dependencies.
+
+Run regression tests:
+	* Open the source folder in a terminal and run the test script:
+		chmod +x test.sh
+		./test.sh
+
+Create your own Linux project
+	* Copy one of the SDK examples.
+	* Change ROOT_PATH and TEMP_DIR in your build script
+	  to refer to the same locations from a new source path.
+	* Add your own external dependencies to LINKER_FLAGS within a quote
+	  with -l in front of each library name.
+
+Creating a terminal application
+	If your application doesn't create any window, you can make it more portable
+	by replacing "WINDOW_MANAGER=X11" with "WINDOW_MANAGER=NONE".
+	This will compile with NoWindow.cpp instead of X11Window.cpp in windowManagers.
+
+Create your own cross-platform CodeBlocks project:
+	* Select C++ 14 with G++ from the GCC toolchain.
+	* Link with "-lm -pthread" to get standard C++ math and threading.
+	* Include all source files in the DFPSR folder or just the ones you need.
+	* Include the window wrapper and its dependencies for each target build.
+	  Most platforms will be targeted using NoWindow.cpp for text only.
+	This should work with most other IDEs that support the GCC toolchain.
+

+ 66 - 0
Doc/StyleGuide.txt

@@ -0,0 +1,66 @@
+Code convention:
+1. Use common sense!
+	If it looks wrong to humans then it's wrong.
+	Don't defeat the purpose of the rule by taking it too far.
+2. Don't use iterators when there is any other way to accomplish the task.
+	You can't write efficient algorithms without knowing the data structures.
+3. Tabs for indentation then spaces for alignment.
+	It's the best of both worlds by both having variable length tabs
+	and correct alignment that works between lines of the same indentation.
+	3.1. Do not use multiple spaces as a replacement for tabs in indentation.
+	     I don't care if you set your editor to 2, 4 or 8 columns per tab
+	     and neither should you care about my preferences.
+	3.2. Do not use a tab as a replacement for multiple spaces in alignment.
+	     The number of leading tabs should be equal to the indentation depth.
+	     This way we can prove mathematically that blocks of code in the same
+	     indentation will always keep the same relative alignment.
+	3.3. Do not use tabs after spaces.
+	     First tabs for indentation, then spaces for alignment.
+	3.4. Do not try to align between different indentation depths.
+	     Only align within the same depth so that it works for all tab lengths.
+	Example using "--->" for tabs and "." for spaces:
+		int foo(int x, int y) [
+		--->if (superLongExpression(x) &&
+		--->....superLongExpression(y)) {
+		--->--->bar(x + y);
+		--->}
+		}
+	If a reader uses 8 spaces per tab then superLongExpression is still aligned:
+		int foo(int x, int y) [
+		------->if (superLongExpression(x) &&
+		------->....superLongExpression(y)) {
+		------->------->bar(x + y);
+		------->}
+		}
+4. No dangling else, use explicit {} for safety.
+	Otherwise someone might add an extra statement and get random crashes.
+5. No hpp extensions, use h for all headers.
+	Use macros if you want to use the same API for both C and C++,
+	so that it selects the correct version for the language automatically.
+6. C-style casting for raw data manipulation and C++-style for polymorphism.
+	C++-style casting makes no sense when using assembly intrinsics.
+	High-level behaviour is undefined behaviour in bare metal programming
+	and only adds more confusion when trying to optimize code.
+7. Don't call member methods with "this" set to nullptr.
+	This would be undefined behaviour and may randomly crash.
+	Use global functions instead. They allow checking pointers for null
+	because they are explicit arguments declared by the programmer.
+8. Avoid using STD/STL directly in SDK examples.
+	Use the safer and faster wrapper types in the dsr namespace.
+	One shouldn't have to remember which namespace a collection was declared in.
+9. Don't abuse the auto keyword everywhere just to make it look more "modern".
+	Auto can decrease readability if used too often.
+	Don't force the reader to look into multiple modules to figure out the type.
+10. No new line for opening brackets.
+	Makes the code more compact and decreases the risk of copy-paste errors.
+11. Don't fix the style of someone else's code if you can easily read it.
+	Being pedantic can become an addiction consuming all your time.
+	Fixing actual bugs and port to new systems is much more appreciated than
+	causing version conflicts with others.
+12. Don't change things that you don't know how to test manually.
+
+Regression tests can only catch a percentage of new defects and
+cannot guarantee any level of quality on its own for a large test space.
+The principle of random sampling to estimate quality is only valid
+if the samples are actually random to cover the entire test space.
+

+ 27 - 0
Source/DFPSR/License.txt

@@ -0,0 +1,27 @@
+Main license for David Piuva's software renderer:
+	zlib open source license
+
+	Copyright (c) 2017 to 2019 David Forsgren Piuva
+
+	This software is provided 'as-is', without any express or implied
+	warranty. In no event will the authors be held liable for any damages
+	arising from the use of this software.
+
+	Permission is granted to anyone to use this software for any purpose,
+	including commercial applications, and to alter it and redistribute it
+	freely, subject to the following restrictions:
+
+	   1. The origin of this software must not be misrepresented; you must not
+	   claim that you wrote the original software. If you use this software
+	   in a product, an acknowledgment in the product documentation would be
+	   appreciated but is not required.
+
+	   2. Altered source versions must be plainly marked as such, and must not be
+	   misrepresented as being the original software.
+
+	   3. This notice may not be removed or altered from any source
+	   distribution.
+
+External licenses:
+	* See image/stbImage for STB's license and copyright notice.
+

+ 68 - 0
Source/DFPSR/README.md

@@ -0,0 +1,68 @@
+# David Piuva's software renderer
+
+Since august 12 2017
+
+## Why use this software renderer
+
+* No core dependencies, just a static library defining all rendering mathematically in C++. You can render without any GPU and save directly to files without depending on a window manager. You can show grayscale images as ascii art in the terminal for debugging. If you want a window manager, you can inject one from the outside by creating a backend class and inject into a portable DsrWindow.
+
+* No device lost exceptions randomly erasing all your buffers. This may happen on GPUs because of poor hardware design that never considered general purpose computations.
+
+* No shader compilation failure at end users. It's all compiled with the application.
+
+* No visible shader source code exposing secret algoritms to competitors. It's just more code in your executable where identifiers can be obfuscated.
+
+* No missing GPU extensions. You don't even need a GPU if you save the result to a file or send it over a network.
+
+### Classic games
+
+Making a classic game without defining how rendering is done in pure high-level math makes as little sense as saving the last copy of your family history book for generations to come in a platform specific encrypted DRM format. If someone's going to restore your game into playable conditions 500 years from now by porting the code, they shouldn't have to guess what OpenGL is nor which month's driver patch it requires to run.
+
+### Long lifetime projects
+
+If you plan to make something that takes 30 years to develop and should be operational far into the future, you need a statically linked framework that defines everything clearly and precisely in one place without leving room for alternative interpretation by external libraries.
+
+### Generate textures for the GPU
+
+To save space and get more randomness to your textures. You can use this framework to generate images with higher determinism and less maintenance cost.
+
+### Determinism
+
+By having a single implementation on top of a SIMD hardware abstraction layer, operations are following the same algorithms on every platform with less surprises. If you can avoid using floating-point operations, you'll get 100% determinism.
+
+### Always direct memory access
+
+While not advisable to break multi-threading convention, you can always access your data directly without having to worry about the long delay from the GPU back to the CPU.
+
+### Fast and precise 2D graphics
+
+To use OpenGL for 2D graphics without GPU acceleration on Linux is technically to let the CPU emulate a GPU, which then does what the CPU does best to begin with. Using the CPU directly for 2D is both faster and more precise.
+
+* No need to fake GPU memory uploads nor downloads. You have direct access to the data and can get pointers to your image buffers.
+
+* Pixel-exact 2D image drawing using direct integer coordinates. No trial and error with different graphics drivers, it just works correctly with speeds comparable to the GPU.
+
+* If you're making a low-resolution 2D retro game with many small sprites, the CPU will probably reach far higher framerates than your screen is capable of displaying, which gives room for more game logic and visual effects.
+
+* Filters on aligned images can read, write and process using SIMD vectors, which is very fast even in high resolutions. Even better if you combine multiple filters into compound functions that read and write once.
+
+### Possible to modify down to the core
+
+If you miss something in the rendering pipeline or just want to learn all the math behind fundamental 3D graphics, you can modify the source code directly and have it statically linked to your application. Maybe you want a custom 12 channel image format for generating 2D sprites with depth and normal maps. Maybe you want to try a new triangle occlusion system or threading algorithm for improved rendering speed.
+
+There's no need to look at cryptic assembly code if something won't compile, just high-level math operations taking multiple pixels at once. The whole renderer is built on top of a simplistic SIMD vector abstraction layer in the simd.h module, which is well tested on both Intel and ARM processors.
+
+### Write your SIMD filter once, run on Intel/ARM
+
+The SIMD vectors work on SSE, NEON and scalar operations, so you aren't forced to write another version in case that SIMD doesn't exist. It is however good practice to validate your ideas and create regression tests with a safe algorithm first.
+
+Even when running without a supported SIMD instruction set, the emulated scalar version can be better at utilizing hyper-threading than a naive non-vectorized pixel loop, because the CPU's instruction window can process multiple pixels at once on different ALUs within the same block.
+
+## Requirements
+
+* Just like when building your own desktop computer, ARM based mini-computers also need cooling if you plan to do something resource intensive non-stop for hours.
+
+* Big-endian mode is untested and gives a warning if compiling using the DSR_BIG_ENDIAN macro. The file endian.h only exists in case that big-endian ever comes back to personal computers in the future.
+
+* Requires the VLA compiler extension (Variable length array) from C. It does make the machine code look like crap, but it's still a lot better than fragmenting the heap every time you draw a triangle.
+

+ 54 - 0
Source/DFPSR/api/configAPI.cpp

@@ -0,0 +1,54 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "configAPI.h"
+
+using namespace dsr;
+
+void dsr::config_parse_ini(const ReadableString& content, ConfigIniCallback receiverLambda) {
+	List<ReadableString> lines = content.split(U'\n');
+	String block = U"";
+	for (int l = 0; l < lines.length(); l++) {
+		// Get the current line
+		ReadableString command = lines[l];
+		// Skip comments
+		int commentIndex = command.findFirst(U';');
+		if (commentIndex > -1) {
+			command = command.before(commentIndex);
+		}
+		// Find assignments
+		int assignmentIndex = command.findFirst(U'=');
+		if (assignmentIndex > -1) {
+			ReadableString key = string_removeOuterWhiteSpace(command.before(assignmentIndex));
+			ReadableString value = string_removeOuterWhiteSpace(command.after(assignmentIndex));
+			receiverLambda(block, key, value);
+		} else {
+			int blockStartIndex = command.findFirst(U'[');
+			int blockEndIndex = command.findFirst(U']');
+			if (blockStartIndex > -1 && blockEndIndex > -1) {
+				block = string_removeOuterWhiteSpace(command.inclusiveRange(blockStartIndex + 1, blockEndIndex - 1));
+			}
+		}
+	}
+}
+

+ 63 - 0
Source/DFPSR/api/configAPI.h

@@ -0,0 +1,63 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_CONFIG
+#define DFPSR_API_CONFIG
+
+#include "../base/text.h"
+#include <functional>
+
+namespace dsr {
+	// A type of function sending (Block, Key, Value) to the caller.
+	//   One can have hard-coded options, lookup-tables, dictionaries, et cetera for looking up the given key names.
+	using ConfigIniCallback = std::function<void(const ReadableString&, const ReadableString&, const ReadableString&)>;
+	/*
+		Parsing the given content of a *.ini configuration file.
+		Sending callbacks to receiverLambda for each key being assigned a value.
+		  * If there's any preceding [] block, the content of the last preceding block will be given as the first argument.
+		  * The key will be sent as the second argument.
+		  * The value will be sent as the third argument.
+		Example:
+			config_parse_ini(content, [this](const ReadableString& block, const ReadableString& key, const ReadableString& value) {
+				if (block.length() == 0) {
+					if (string_caseInsensitiveMatch(key, U"A")) {
+						this->valueA = string_parseInteger(value);
+					} else if (string_caseInsensitiveMatch(key, U"B")) {
+						this->valueB = string_parseInteger(value);
+					} else {
+						printText("Unrecognized key \"", key, "\" in A&B value configuration file.\n");
+					}
+				} else {
+					printText("Unrecognized block \"", block, "\" in A&B value configuration file.\n");
+				}
+			});
+	*/
+	void config_parse_ini(const ReadableString& content, ConfigIniCallback receiverLambda);
+
+	// Adding an ini generator might be convenient for complying with the *.ini file standard
+	// but it would also take away some artistic freedom with how lines are indented
+	// and it's not really difficult to generate a few assignments manually.
+}
+
+#endif
+

+ 370 - 0
Source/DFPSR/api/drawAPI.cpp

@@ -0,0 +1,370 @@
+
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#define DFPSR_INTERNAL_ACCESS
+
+#include <cassert>
+#include "imageAPI.h"
+#include "drawAPI.h"
+#include "../image/draw.h"
+#include "../image/ImageRgbaU8.h"
+#include "../image/PackOrder.h"
+#include "../image/internal/imageTemplate.h"
+#include "../image/internal/imageInternal.h"
+
+using namespace dsr;
+
+
+// -------------------------------- Image generation and filtering --------------------------------
+
+
+static void mapRgbaU8(ImageRgbaU8Impl& target, const ImageGenRgbaU8& lambda, int startX, int startY) {
+	const int targetWidth = target.width;
+	const int targetHeight = target.height;
+	const int targetStride = target.stride;
+	// Use the output directly
+	SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target);
+	for (int y = startY; y < targetHeight + startY; y++) {
+		SafePointer<Color4xU8> targetPixel = targetRow;
+		for (int x = startX; x < targetWidth + startX; x++) {
+			*targetPixel = target.packRgba(lambda(x, y).saturate());
+			targetPixel += 1;
+		}
+		targetRow.increaseBytes(targetStride);
+	}
+}
+void dsr::filter_mapRgbaU8(ImageRgbaU8& target, const ImageGenRgbaU8& lambda, int startX, int startY) {
+	if (target) {
+		mapRgbaU8(*target, lambda, startX, startY);
+	}
+}
+ImageRgbaU8 dsr::filter_generateRgbaU8(int width, int height, const ImageGenRgbaU8& lambda, int startX, int startY) {
+	ImageRgbaU8 result = image_create_RgbaU8(width, height);
+	filter_mapRgbaU8(result, lambda, startX, startY);
+	return result;
+}
+
+static void mapF32(ImageF32Impl& target, const ImageGenF32& lambda, int startX, int startY) {
+	const int targetWidth = target.width;
+	const int targetHeight = target.height;
+	const int targetStride = target.stride;
+	// Use the output directly
+	SafePointer<float> targetRow = imageInternal::getSafeData<float>(target);
+	for (int y = startY; y < targetHeight + startY; y++) {
+		SafePointer<float> targetPixel = targetRow;
+		for (int x = startX; x < targetWidth + startX; x++) {
+			*targetPixel = lambda(x, y);
+			targetPixel += 1;
+		}
+		targetRow.increaseBytes(targetStride);
+	}
+}
+void dsr::filter_mapF32(ImageF32& target, const ImageGenF32& lambda, int startX, int startY) {
+	if (target) {
+		mapF32(*target, lambda, startX, startY);
+	}
+}
+ImageF32 dsr::filter_generateF32(int width, int height, const ImageGenF32& lambda, int startX, int startY) {
+	ImageF32 result = image_create_F32(width, height);
+	filter_mapF32(result, lambda, startX, startY);
+	return result;
+}
+
+// Basic immutable image operations
+// TODO: Create optimized in-place versions for aligned images based on the reference implementations
+static const uint32_t imageMultiply_shift = 10;
+static const float imageMultiply_scale = (1u << imageMultiply_shift) / 255.0f;
+ImageRgbaU8 dsr::filter_mulColorRgb(const ImageRgbaU8& inputImage, const ColorRgbI32& color) {
+	if (inputImage) {
+		const int iR = (float)color.red * imageMultiply_scale;
+		const int iG = (float)color.green * imageMultiply_scale;
+		const int iB = (float)color.blue * imageMultiply_scale;
+		return filter_generateRgbaU8(inputImage->width, inputImage->height, [inputImage, iR, iG, iB](int x, int y)->ColorRgbaI32 {
+			ColorRgbaI32 source = image_readPixel_clamp(inputImage, x, y);
+			return ColorRgbaI32((source.red * iR) >> imageMultiply_shift, (source.green * iG) >> imageMultiply_shift, (source.blue * iB) >> imageMultiply_shift, source.alpha);
+		});
+	} else {
+		return ImageRgbaU8();
+	}
+}
+ImageRgbaU8 dsr::filter_mulColorRgba(const ImageRgbaU8& inputImage, const ColorRgbaI32& color) {
+	if (inputImage) {
+		const int iR = (float)color.red * imageMultiply_scale;
+		const int iG = (float)color.green * imageMultiply_scale;
+		const int iB = (float)color.blue * imageMultiply_scale;
+		const int iA = (float)color.alpha * imageMultiply_scale;
+		return filter_generateRgbaU8(inputImage->width, inputImage->height, [inputImage, iR, iG, iB, iA](int x, int y)->ColorRgbaI32 {
+			ColorRgbaI32 source = image_readPixel_clamp(inputImage, x, y);
+			return ColorRgbaI32((source.red * iR) >> imageMultiply_shift, (source.green * iG) >> imageMultiply_shift, (source.blue * iB) >> imageMultiply_shift, (source.alpha * iA) >> imageMultiply_shift);
+		});
+	} else {
+		return ImageRgbaU8();
+	}
+}
+
+
+// -------------------------------- Drawing shapes --------------------------------
+
+
+void dsr::draw_rectangle(ImageU8& image, const IRect& bound, int color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, bound, color);
+	}
+}
+void dsr::draw_rectangle(ImageF32& image, const IRect& bound, float color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, bound, color);
+	}
+}
+void dsr::draw_rectangle(ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, bound, color);
+	}
+}
+
+void dsr::draw_line(ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
+	if (image) {
+		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+	}
+}
+void dsr::draw_line(ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
+	if (image) {
+		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+	}
+}
+void dsr::draw_line(ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
+	if (image) {
+		imageImpl_draw_line(*image, x1, y1, x2, y2, color);
+	}
+}
+
+
+// -------------------------------- Drawing images --------------------------------
+
+
+#define DRAW_COPY_WRAPPER(TARGET_TYPE, SOURCE_TYPE) \
+	void dsr::draw_copy(TARGET_TYPE& target, const SOURCE_TYPE& source, int32_t left, int32_t top) { \
+		if (target && source) { \
+			imageImpl_drawCopy(*target, *source, left, top); \
+		} \
+	}
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageRgbaU8);
+DRAW_COPY_WRAPPER(ImageU8, ImageU8);
+DRAW_COPY_WRAPPER(ImageU16, ImageU16);
+DRAW_COPY_WRAPPER(ImageF32, ImageF32);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU8);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageU16);
+DRAW_COPY_WRAPPER(ImageRgbaU8, ImageF32);
+DRAW_COPY_WRAPPER(ImageU8, ImageU16);
+DRAW_COPY_WRAPPER(ImageU8, ImageF32);
+DRAW_COPY_WRAPPER(ImageU16, ImageU8);
+DRAW_COPY_WRAPPER(ImageU16, ImageF32);
+DRAW_COPY_WRAPPER(ImageF32, ImageU8);
+DRAW_COPY_WRAPPER(ImageF32, ImageU16);
+
+void dsr::draw_alphaFilter(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top) {
+	if (target && source) {
+		imageImpl_drawAlphaFilter(*target, *source, left, top);
+	}
+}
+void dsr::draw_maxAlpha(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
+	if (target && source) {
+		imageImpl_drawMaxAlpha(*target, *source, left, top, sourceAlphaOffset);
+	}
+}
+void dsr::draw_alphaClip(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left, int32_t top, int32_t treshold) {
+	if (target && source) {
+		imageImpl_drawAlphaClip(*target, *source, left, top, treshold);
+	}
+}
+void dsr::draw_silhouette(ImageRgbaU8& target, const ImageU8& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (target && source) {
+		imageImpl_drawSilhouette(*target, *source, color, left, top);
+	}
+}
+void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (targetHeight && sourceHeight) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, left, top, sourceHeightOffset);
+	}
+}
+void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (targetHeight && sourceHeight && targetA && sourceA) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, left, top, sourceHeightOffset);
+	}
+}
+void dsr::draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (targetHeight && sourceHeight && targetA && sourceA && targetB && sourceB) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, *targetB, *sourceB, left, top, sourceHeightOffset);
+	}
+}
+void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
+	if (targetHeight && sourceHeight) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, left, top, sourceHeightOffset);
+	}
+}
+void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  int32_t left, int32_t top, float sourceHeightOffset) {
+	if (targetHeight && sourceHeight && targetA && sourceA) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, left, top, sourceHeightOffset);
+	}
+}
+void dsr::draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight, ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+  ImageRgbaU8& targetB, const ImageRgbaU8& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
+	if (targetHeight && sourceHeight && targetA && sourceA && targetB && sourceB) {
+		imageImpl_drawHigher(*targetHeight, *sourceHeight, *targetA, *sourceA, *targetB, *sourceB, left, top, sourceHeightOffset);
+	}
+}
+
+
+// -------------------------------- Resize --------------------------------
+
+
+static ImageRgbaU8Impl resizeToValue(const ImageRgbaU8Impl& image, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
+	ImageRgbaU8Impl resultImage = ImageRgbaU8Impl(newWidth, newHeight);
+	imageImpl_resizeToTarget(resultImage, image, interpolation == Sampler::Linear); // TODO: Pass Sampler to internal API if more modes are created
+	return resultImage;
+}
+
+static OrderedImageRgbaU8 resizeToRef(const ImageRgbaU8Impl& image, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
+	OrderedImageRgbaU8 resultImage = image_create_RgbaU8(newWidth, newHeight);
+	imageImpl_resizeToTarget(*resultImage, image, interpolation == Sampler::Linear); // TODO: Pass Sampler to internal API if more modes are created
+	return resultImage;
+}
+
+OrderedImageRgbaU8 dsr::filter_resize(const ImageRgbaU8& image, Sampler interpolation, int32_t newWidth, int32_t newHeight) {
+	if (image) {
+		return resizeToRef(*image, interpolation, newWidth, newHeight);
+	} else {
+		return OrderedImageRgbaU8(); // Null gives null
+	}
+}
+
+void dsr::filter_blockMagnify(ImageRgbaU8& target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight) {
+	if (target && source) {
+		imageImpl_blockMagnify(*target, *source, pixelWidth, pixelHeight);
+	}
+}
+
+// Get RGBA sub-images without allocating heads on the heap
+static const ImageRgbaU8Impl getView(const ImageRgbaU8Impl& image, const IRect& region) {
+	assert(region.left() >= 0); assert(region.top() >= 0); assert(region.width() >= 1); assert(region.height() >= 1);
+	assert(region.right() <= image.width); assert(region.bottom() <= image.height);
+	intptr_t newOffset = image.startOffset + (region.left() * image.pixelSize) + (region.top() * image.stride);
+	return ImageRgbaU8Impl(region.width(), region.height(), image.stride, image.buffer, newOffset, image.packOrder);
+}
+
+OrderedImageRgbaU8 dsr::filter_resize3x3(const ImageRgbaU8& image, Sampler interpolation, int newWidth, int newHeight, int leftBorder, int topBorder, int rightBorder, int bottomBorder) {
+	if (image) {
+		// Get source dimensions
+		int sourceWidth = image->width;
+		int sourceHeight = image->height;
+
+		// Limit borders to a place near the center while leaving at least 2x2 pixels at the center for bilinear interpolation
+		int maxLeftBorder = std::min(sourceWidth, newWidth) / 2 - 1;
+		int maxTopBorder = std::min(sourceHeight, newHeight) / 2 - 1;
+		int maxRightBorder = maxLeftBorder;
+		int maxBottomBorder = maxTopBorder;
+		if (leftBorder > maxLeftBorder) leftBorder = maxLeftBorder;
+		if (topBorder > maxTopBorder) topBorder = maxTopBorder;
+		if (rightBorder > maxRightBorder) rightBorder = maxRightBorder;
+		if (bottomBorder > maxBottomBorder) bottomBorder = maxBottomBorder;
+		if (leftBorder < 0) leftBorder = 0;
+		if (topBorder < 0) topBorder = 0;
+		if (rightBorder < 0) rightBorder = 0;
+		if (bottomBorder < 0) bottomBorder = 0;
+
+		// Combine dimensions
+		// L_R T_B
+		int leftRightBorder = leftBorder + rightBorder;
+		int topBottomBorder = topBorder + bottomBorder;
+		// _C_
+		int targetCenterWidth = newWidth - leftRightBorder;
+		int targetCenterHeight = newHeight - topBottomBorder;
+		// LC_ RC_
+		int targetLeftAndCenter = newWidth - rightBorder;
+		int targetTopAndCenter = newHeight - bottomBorder;
+		// _C_
+		int sourceCenterWidth = sourceWidth - leftRightBorder;
+		int sourceCenterHeight = sourceHeight - topBottomBorder;
+		// LC_ RC_
+		int sourceLeftAndCenter = sourceWidth - rightBorder;
+		int sourceTopAndCenter = sourceHeight - bottomBorder;
+
+		// Allocate target image
+		OrderedImageRgbaU8 result = image_create_RgbaU8(newWidth, newHeight);
+		ImageRgbaU8Impl* target = result.get();
+
+		// Draw corners
+		if (leftBorder > 0 && topBorder > 0) {
+			imageImpl_drawCopy(*target, getView(*image, IRect(0, 0, leftBorder, topBorder)), 0, 0);
+		}
+		if (rightBorder > 0 && topBorder > 0) {
+			imageImpl_drawCopy(*target, getView(*image, IRect(sourceLeftAndCenter, 0, rightBorder, topBorder)), targetLeftAndCenter, 0);
+		}
+		if (leftBorder > 0 && bottomBorder > 0) {
+			imageImpl_drawCopy(*target, getView(*image, IRect(0, sourceTopAndCenter, leftBorder, bottomBorder)), 0, targetTopAndCenter);
+		}
+		if (rightBorder > 0 && bottomBorder > 0) {
+			imageImpl_drawCopy(*target, getView(*image, IRect(sourceLeftAndCenter, sourceTopAndCenter, rightBorder, bottomBorder)), targetLeftAndCenter, targetTopAndCenter);
+		}
+		// Resize and draw edges
+		if (targetCenterHeight > 0) {
+			if (leftBorder > 0) {
+				ImageRgbaU8Impl edgeSource = getView(*image, IRect(0, topBorder, leftBorder, sourceCenterHeight));
+				ImageRgbaU8Impl stretchedEdge = resizeToValue(edgeSource, interpolation, leftBorder, targetCenterHeight);
+				imageImpl_drawCopy(*target, stretchedEdge, 0, topBorder);
+			}
+			if (rightBorder > 0) {
+				ImageRgbaU8Impl edgeSource = getView(*image, IRect(sourceLeftAndCenter, topBorder, rightBorder, sourceCenterHeight));
+				ImageRgbaU8Impl stretchedEdge = resizeToValue(edgeSource, interpolation, rightBorder, targetCenterHeight);
+				imageImpl_drawCopy(*target, stretchedEdge, targetLeftAndCenter, topBorder);
+			}
+		}
+		if (targetCenterWidth > 0) {
+			if (topBorder > 0) {
+				ImageRgbaU8Impl edgeSource = getView(*image, IRect(leftBorder, 0, sourceCenterWidth, topBorder));
+				ImageRgbaU8Impl stretchedEdge = resizeToValue(edgeSource, interpolation, targetCenterWidth, topBorder);
+				imageImpl_drawCopy(*target, stretchedEdge, leftBorder, 0);
+			}
+			if (bottomBorder > 0) {
+				ImageRgbaU8Impl edgeSource = getView(*image, IRect(leftBorder, sourceTopAndCenter, sourceCenterWidth, bottomBorder));
+				ImageRgbaU8Impl stretchedEdge = resizeToValue(edgeSource, interpolation, targetCenterWidth, bottomBorder);
+				imageImpl_drawCopy(*target, stretchedEdge, leftBorder, targetTopAndCenter);
+			}
+		}
+		// Resize and draw center
+		if (targetCenterWidth > 0 && targetCenterHeight > 0) {
+			ImageRgbaU8Impl centerSource = getView(*image, IRect(leftBorder, topBorder, sourceCenterWidth, sourceCenterHeight));
+			ImageRgbaU8Impl stretchedCenter = resizeToValue(centerSource, interpolation, targetCenterWidth, targetCenterHeight);
+			imageImpl_drawCopy(*target, stretchedCenter, leftBorder, topBorder);
+		}
+		return result;
+	} else {
+		return OrderedImageRgbaU8(); // Null gives null
+	}
+
+}
+

+ 172 - 0
Source/DFPSR/api/drawAPI.h

@@ -0,0 +1,172 @@
+
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_DRAW
+#define DFPSR_API_DRAW
+
+#include "types.h"
+#include <functional>
+
+namespace dsr {
+
+
+
+// ------------------------ Below is untested! ------------------------ //
+
+
+
+// Drawing shapes
+	void draw_rectangle(ImageU8& image, const IRect& bound, int color);
+	void draw_rectangle(ImageF32& image, const IRect& bound, float color);
+	void draw_rectangle(ImageRgbaU8& image, const IRect& bound, const ColorRgbaI32& color);
+
+	void draw_line(ImageU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
+	void draw_line(ImageF32& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color);
+	void draw_line(ImageRgbaU8& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color);
+
+// Drawing images
+	// Draw an image to another image
+	//   All image types can draw to their own format
+	//   All image types can draw to RgbaU8
+	//   All monochrome types can draw to each other
+	//   The source and target images can be sub-images from the same atlas but only if the sub-regions are not overlapping
+	void draw_copy(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU16& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageF32& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageRgbaU8& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageRgbaU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageRgbaU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU8& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU8& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU16& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageU16& target, const ImageF32& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageF32& target, const ImageU8& source, int32_t left = 0, int32_t top = 0);
+	void draw_copy(ImageF32& target, const ImageU16& source, int32_t left = 0, int32_t top = 0);
+	// Draw one RGBA image to another using alpha filtering
+	//   Target alpha does no affect RGB blending, in case that it contains padding for opaque targets
+	//   If you really want to draw to a transparent layer, this method should not be used
+	void draw_alphaFilter(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0);
+	// Draw one RGBA image to another using the alpha channel as height
+	//   sourceAlphaOffset is added to non-zero heights from source alpha
+	//   Writes each source pixel who's alpha value is greater than the target's
+	//   Zero alpha can be used as a mask, because no source value can be below zero in unsigned color formats
+	void draw_maxAlpha(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t sourceAlphaOffset = 0);
+
+	// Draw between multiple images using a height buffer
+	//   Each source pixel is drawn where the source height's pixel exceeds the target height's pixel
+	//   Including the source height pixel, so that the drawn object occludes the following objects below it
+	//   Can be used for isometric top-down and side-scroller games with heavy graphical effects
+	//   A usually contains color pixels
+	//   B usually contains surface normals for light effects
+	// 16-bit integer depth buffers:
+	//   Fully deterministic overlaps
+	//   Source height zero is treated as invisible even if sourceHeightOffset adds to the height
+	//   It's recommended to let the target height buffer use 32768 as height zero to allow placing things on negative locations
+	void draw_higher(
+		ImageU16& targetHeight, const ImageU16& sourceHeight,
+		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
+	);
+	void draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight,
+		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
+	);
+	void draw_higher(ImageU16& targetHeight, const ImageU16& sourceHeight,
+		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+		ImageRgbaU8& targetB, const ImageRgbaU8& sourceB,
+		int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0
+	);
+	// 32-bit floating-point depth buffers
+	//   Source height negative infinity is used for invisible pixels
+	//     Negative infinity is expressed using -std::numeric_limits<float>::infinity() from limits.h
+	//   Same pixel size as in ImageRgbaU8 to make aligned reading easier when used together with colors
+	//   Floats allow doing light calculations directly without having to perform expensive conversions from integers
+	void draw_higher(
+		ImageF32& targetHeight, const ImageF32& sourceHeight,
+		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
+	);
+	void draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight,
+		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
+	);
+	void draw_higher(ImageF32& targetHeight, const ImageF32& sourceHeight,
+		ImageRgbaU8& targetA, const ImageRgbaU8& sourceA,
+		ImageRgbaU8& targetB, const ImageRgbaU8& sourceB,
+		int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0
+	);
+
+	// Draw one RGBA image to another using alpha clipping
+	//   Source is solid where alpha is greater than treshold, which can be used for animations
+	void draw_alphaClip(ImageRgbaU8& target, const ImageRgbaU8& source, int32_t left = 0, int32_t top = 0, int32_t treshold = 127);
+	// Draw a uniform color using a grayscale silhouette as the alpha channel
+	void draw_silhouette(ImageRgbaU8& target, const ImageU8& silhouette, const ColorRgbaI32& color, int32_t left = 0, int32_t top = 0);
+
+// TODO: Make a separate filter API
+
+// Image resizing
+	// The interpolate argument
+	//   Bi-linear interoplation is used when true
+	//   Nearest neighbor sampling is used when false
+	// Create a stretched version of the source image with the given dimensions and default RGBA pack order
+	OrderedImageRgbaU8 filter_resize(const ImageRgbaU8& image, Sampler interpolation, int32_t newWidth, int32_t newHeight);
+	// Resize with borders of pixels that aren't stretched
+	//   Using a larger border than half the size will be clamped, so that the center keeps at least 2x2 pixels
+	OrderedImageRgbaU8 filter_resize3x3(const ImageRgbaU8& image, Sampler interpolation, int newWidth, int newHeight, int leftBorder, int topBorder, int rightBorder, int bottomBorder);
+	// The source image is scaled by pixelWidth and pixelHeight from the upper left corner
+	// If source is too small, transparent black pixels (0, 0, 0, 0) fills the outside
+	// If source is too large, partial pixels will be cropped away completely and replaced by the black border
+	// Letting the images have the same pack order and be aligned to 16-bytes will increase speed
+	void filter_blockMagnify(ImageRgbaU8& target, const ImageRgbaU8& source, int pixelWidth, int pixelHeight);
+
+// Image generation and filtering
+//   Create new images from Lambda expressions
+//   Useful for pre-generating images for reuse, reference implementations and fast prototyping
+	// Lambda expressions for generating integer images
+	using ImageGenRgbaU8 = std::function<ColorRgbaI32(int, int)>;
+	using ImageGenF32 = std::function<float(int, int)>;
+	// In-place image generation to an existing image
+	//   The pixel at the upper left corner gets (startX, startY) as x and y arguments to the function
+	void filter_mapRgbaU8(ImageRgbaU8& target, const ImageGenRgbaU8& lambda, int startX = 0, int startY = 0);
+	void filter_mapF32(ImageF32& target, const ImageGenF32& lambda, int startX = 0, int startY = 0);
+	// A simpler image generation that constructs the image as a result
+	// Example:
+	//     int width = 64;
+	//     int height = 64;
+	//     ImageRgbaU8 fadeImage = filter_generateRgbaU8(width, height, [](int x, int y)->ColorRgbaI32 {
+	//         return ColorRgbaI32(x * 4, y * 4, 0, 255);
+	//     });
+	//     ImageRgbaU8 brighterImage = filter_generateRgbaU8(width, height, [fadeImage](int x, int y)->ColorRgbaI32 {
+	//	       ColorRgbaI32 source = image_readPixel_clamp(fadeImage, x, y);
+	//	       return ColorRgbaI32(source.red * 2, source.green * 2, source.blue * 2, source.alpha);
+	//     });
+	ImageRgbaU8 filter_generateRgbaU8(int width, int height, const ImageGenRgbaU8& lambda, int startX = 0, int startY = 0);
+	ImageF32 filter_generateF32(int width, int height, const ImageGenF32& lambda, int startX = 0, int startY = 0);
+
+	// TODO: Document
+	ImageRgbaU8 filter_mulColorRgb(const ImageRgbaU8& inputImage, const ColorRgbI32& color);
+	ImageRgbaU8 filter_mulColorRgba(const ImageRgbaU8& inputImage, const ColorRgbaI32& color);
+}
+
+#endif
+

+ 231 - 0
Source/DFPSR/api/guiAPI.cpp

@@ -0,0 +1,231 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#define DFPSR_INTERNAL_ACCESS
+
+#include "guiAPI.h"
+#include "timeAPI.h"
+#include "../gui/DsrWindow.h"
+
+using namespace dsr;
+
+// To be implemented outside of the core framework
+std::shared_ptr<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
+
+#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.get() == nullptr) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
+
+Window dsr::window_create(const String& title, int32_t width, int32_t height) {
+	if (width < 1) { width = 1; }
+	if (height < 1) { height = 1; }
+	return std::make_shared<DsrWindow>(createBackendWindow(title, width, height));
+}
+
+Window dsr::window_create_fullscreen(const String& title) {
+	return std::make_shared<DsrWindow>(createBackendWindow(title, 0, 0));
+}
+
+bool dsr::window_exists(const Window& window) {
+	return window.get() != nullptr;
+}
+
+bool dsr::component_exists(const Component& component) {
+	return component.get() != nullptr;
+}
+
+void dsr::window_loadInterfaceFromString(const Window& window, const String& content) {
+	MUST_EXIST(window, window_loadInterfaceFromString);
+	window->loadInterfaceFromString(content);
+}
+
+void dsr::window_loadInterfaceFromFile(const Window& window, const ReadableString& filename) {
+	MUST_EXIST(window, window_loadInterfaceFromFile);
+	window->loadInterfaceFromString(string_load(filename));
+}
+
+String dsr::window_saveInterfaceToString(const Window& window) {
+	MUST_EXIST(window, window_saveInterfaceToString);
+	return window->saveInterfaceToString();
+}
+
+Component dsr::window_getRoot(const Window& window) {
+	MUST_EXIST(window, window_getRoot);
+	return window->getRootComponent();
+}
+
+Component dsr::window_findComponentByName(const Window& window, const ReadableString& name, bool mustExist) {
+	MUST_EXIST(window, window_findComponentByName);
+	return window->findComponentByName(name);
+}
+
+Component dsr::window_findComponentByNameAndIndex(const Window& window, const ReadableString& name, int index, bool mustExist) {
+	MUST_EXIST(window, window_findComponentByNameAndIndex);
+	return window->findComponentByNameAndIndex(name, index);
+}
+
+bool dsr::window_executeEvents(const Window& window) {
+	MUST_EXIST(window, window_executeEvents);
+	return window->executeEvents();
+}
+void dsr::window_drawComponents(const Window& window) {
+	MUST_EXIST(window, window_drawComponents);
+	window->drawComponents();
+}
+void dsr::window_showCanvas(const Window& window) {
+	MUST_EXIST(window, window_showCanvas);
+	window->showCanvas();
+}
+
+int dsr::window_getPixelScale(const Window& window) {
+	MUST_EXIST(window, window_getPixelScale);
+	return window->getPixelScale();
+}
+void dsr::window_setPixelScale(const Window& window, int scale) {
+	MUST_EXIST(window, window_setPixelScale);
+	window->setPixelScale(scale);
+}
+
+void dsr::window_setFullScreen(const Window& window, bool enabled) {
+	MUST_EXIST(window, window_setFullScreen);
+	window->setFullScreen(enabled);
+}
+bool dsr::window_isFullScreen(const Window& window) {
+	MUST_EXIST(window, window_isFullScreen);
+	return window->isFullScreen();
+}
+
+AlignedImageRgbaU8 dsr::window_getCanvas(const Window& window) {
+	MUST_EXIST(window, window_getCanvas);
+	return window->getCanvas();
+}
+AlignedImageF32 dsr::window_getDepthBuffer(const Window& window) {
+	MUST_EXIST(window, window_getDepthBuffer);
+	return window->getDepthBuffer();
+}
+
+int dsr::window_getCanvasWidth(const Window& window) {
+	MUST_EXIST(window, window_getCanvasWidth);
+	return window->getCanvasWidth();
+}
+int dsr::window_getCanvasHeight(const Window& window) {
+	MUST_EXIST(window, window_getCanvasHeight);
+	return window->getCanvasHeight();
+}
+int dsr::window_getInnerWidth(const Window& window) {
+	MUST_EXIST(window, window_getInnerWidth);
+	return window->getInnerWidth();
+}
+int dsr::window_getInnerHeight(const Window& window) {
+	MUST_EXIST(window, window_getInnerHeight);
+	return window->getInnerHeight();
+}
+
+void dsr::window_setMouseEvent(const Window& window, const MouseCallback& mouseEvent) {
+	MUST_EXIST(window, window_setMouseEvent);
+	window->windowMouseEvent() = mouseEvent;
+}
+void dsr::window_setKeyboardEvent(const Window& window, const KeyboardCallback& keyboardEvent) {
+	MUST_EXIST(window, window_setKeyboardEvent);
+	window->windowKeyboardEvent() = keyboardEvent;
+}
+void dsr::window_setCloseEvent(const Window& window, const EmptyCallback& closeEvent) {
+	MUST_EXIST(window, window_setCloseEvent);
+	window->windowCloseEvent() = closeEvent;
+}
+
+void dsr::component_setPressedEvent(const Component& component, const EmptyCallback& event) {
+	MUST_EXIST(component, component_setPressedEvent);
+	component->pressedEvent() = event;
+}
+void dsr::component_setMouseDownEvent(const Component& component, const MouseCallback& mouseEvent) {
+	MUST_EXIST(component, component_setMouseDownEvent);
+	component->mouseDownEvent() = mouseEvent;
+}
+void dsr::component_setMouseUpEvent(const Component& component, const MouseCallback& mouseEvent) {
+	MUST_EXIST(component, component_setMouseUpEvent);
+	component->mouseUpEvent() = mouseEvent;
+}
+void dsr::component_setMouseMoveEvent(const Component& component, const MouseCallback& mouseEvent) {
+	MUST_EXIST(component, component_setMouseMoveEvent);
+	component->mouseMoveEvent() = mouseEvent;
+}
+void dsr::component_setMouseScrollEvent(const Component& component, const MouseCallback& mouseEvent) {
+	MUST_EXIST(component, component_setMouseScrollEvent);
+	component->mouseScrollEvent() = mouseEvent;
+}
+void dsr::component_setKeyDownEvent(const Component& component, const KeyboardCallback& keyboardEvent) {
+	MUST_EXIST(component, component_setKeyDownEvent);
+	component->keyDownEvent() = keyboardEvent;
+}
+void dsr::component_setKeyUpEvent(const Component& component, const KeyboardCallback& keyboardEvent) {
+	MUST_EXIST(component, component_setKeyUpEvent);
+	component->keyUpEvent() = keyboardEvent;
+}
+void dsr::component_setKeyTypeEvent(const Component& component, const KeyboardCallback& keyboardEvent) {
+	MUST_EXIST(component, component_setKeyTypeEvent);
+	component->keyTypeEvent() = keyboardEvent;
+}
+
+bool dsr::component_hasProperty(const Component& component, const ReadableString& propertyName) {
+	MUST_EXIST(component, component_hasProperty);
+	Persistent* target = component->findAttribute(propertyName);
+	return target != nullptr;
+}
+
+ReturnCode dsr::component_setProperty(const Component& component, const ReadableString& propertyName, const ReadableString& value, bool mustAssign) {
+	MUST_EXIST(component, component_setProperty_string);
+	Persistent* target = component->findAttribute(propertyName);
+	if (target == nullptr) {
+		if (mustAssign) {
+			throwError("component_setProperty_string: ", propertyName, " in ", component->getClassName(), " could not be found.\n");
+		}
+		return ReturnCode::KeyNotFound;
+	} else {
+		if (target->assignValue(value)) {
+			return ReturnCode::Good;
+		} else {
+			if (mustAssign) {
+				throwError("component_setProperty_string: The input ", value, " could not be assigned to property ", propertyName, " because of incorrect format.\n");
+			}
+			return ReturnCode::ParsingFailure;
+		}
+	}
+}
+String dsr::component_getProperty(const Component& component, const ReadableString& propertyName, bool mustExist) {
+	MUST_EXIST(component, component_getProperty_string);
+	Persistent* target = component->findAttribute(propertyName);
+	if (target == nullptr) {
+		if (mustExist) {
+			throwError("component_getProperty_string: ", propertyName, " in ", component->getClassName(), " could not be found.\n");
+		}
+		return U"";
+	} else {
+		return component->toString();
+	}
+}
+
+void dsr::window_applyTheme(const Window& window, const VisualTheme& theme) {
+	MUST_EXIST(window, window_applyTheme);
+	MUST_EXIST(theme, window_applyTheme);
+	window->applyTheme(theme);
+}

+ 231 - 0
Source/DFPSR/api/guiAPI.h

@@ -0,0 +1,231 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_GUI
+#define DFPSR_API_GUI
+
+#include "types.h"
+#include "../base/text.h"
+#include "../gui/InputEvent.h"
+
+// createBackendWindow should be implemented outside of the core framework
+//   Choose one of the window backends in SDK/native to compile and link with your application.
+// std::shared_ptr<dsr::BackendWindow> createBackendWindow(const dsr::String& title, int width, int height);
+
+// Constness on handles doesn't propagate to any inner types
+//   "const Comopnent&" only means that the writable Component handle can be created from a sub-expression
+//   because the location where the handle is stored cannot be overwritten.
+//   This allow getting a component by name and using it as an argument without being stored in a variable.
+
+namespace dsr {
+
+// Window Construction
+	// A portable window will be wrapped around a native window backend supplied from a call to createBackendWindow.
+	Window window_create(const dsr::String& title, int32_t width, int32_t height);
+	Window window_create_fullscreen(const dsr::String& title);
+	// Returns true iff the window exists
+	bool window_exists(const Window& window);
+
+// Layout files
+	// Loading an interface by parsing a layout file's content.
+	//   Raises an exception if window doesn't exist.
+	void window_loadInterfaceFromString(const Window& window, const dsr::String& content);
+	// Loading an interface by parsing a layout file loaded by filename.
+	//   Raises an exception if window doesn't exist.
+	void window_loadInterfaceFromFile(const Window& window, const dsr::ReadableString& filename);
+	// Store the interface back into a layout file.
+	//   Raises an exception if window doesn't exist.
+	String window_saveInterfaceToString(const Window& window);
+
+// Find a component
+	// Get the component being stored directly in the window
+	//   Raises an exception if window doesn't exist.
+	//   There should always exist a root component where more components can be added recursively
+	Component window_getRoot(const Window& window);
+	// TODO: Document
+	//   Raises an exception if window doesn't exist.
+	Component window_findComponentByName(const Window& window, const ReadableString& name, bool mustExist = true);
+	// TODO: Document
+	//   Raises an exception if window doesn't exist.
+	Component window_findComponentByNameAndIndex(const Window& window, const ReadableString& name, int index, bool mustExist = true);
+
+// The three main events to run in a loop at the end of the main function
+	// If the window's event queue contained any resize of the window, the canvas and the depth buffer will be replaced during this call.
+	//   New calls to window_getCanvas and window_getDepthBuffer are required after this call, because the window could be given a new size.
+	//   Returns true iff any event were processed.
+	//   By calling window_executeEvents in a loop while returning false, one can wait for input.
+	//     Sleeping for 10 milliseconds is quite responsive while saving lots of battery.
+	//     Only redrawing the regions that has changed (dirty rectangles et cetera) can further save power.
+	//   Example:
+	//     while (!window_executeEvents(window)) {
+	//         time_sleepSeconds(0.01);
+	//     }
+	//     window_drawComponents(window);
+	//     window_showCanvas(window);
+	bool window_executeEvents(const Window& window);
+	// Draw the root component and its children to the canvas.
+	//   Raises an exception if window doesn't exist.
+	void window_drawComponents(const Window& window);
+	// Show the canvas.
+	//   Raises an exception if window doesn't exist.
+	void window_showCanvas(const Window& window);
+
+// Pixel upscaling
+//   The pixel-scale is the width and height of each canvas pixel when displayed on the window.
+//     The color and depth buffers from window_getCanvas and window_getDepthBuffer will shrink to fit each pixel within the window.
+//     Partial pixels at right and bottom sides are replaced with black padding,
+//     so that mouse coordinates can be divided and multiplied evenly during scale conversion.
+//   If using a higher value than the default 1, upscaling will be done during the call to window_showCanvas.
+//     The backend window will receive the upscaled image to display over the whole window.
+	// Gets the current pixel scale.
+	//   Raises an exception if window doesn't exist.
+	int window_getPixelScale(const Window& window);
+	// Assigns a new pixel scale.
+	//   Raises an exception if window doesn't exist.
+	//   Just like when handling a window resize, this will replace the canvas and depth buffer.
+	//     Any old handles to canvas and depth buffer will become useless, so fetch new image handles from the window to avoid black flickering.
+	void window_setPixelScale(const Window& window, int scale);
+
+// Full screen
+	void window_setFullScreen(const Window& window, bool enabled);
+	bool window_isFullScreen(const Window& window);
+
+// Fetch the window's surfaces
+//   Always get the canvas (and any depth buffer) after calls to window_executeEvents or window_setPixelScale,
+//   because these may replace the canvas with a new size.
+//   TODO: Prevent the backend window from freeing the memory while the canvas is still being used.
+	// Get the canvas/color-buffer.
+	//   Raises an exception if window doesn't exist.
+	//   The canvas size will be smaller when pixelScale is larger, because the canvas has to fit inside the window.
+	AlignedImageRgbaU8 window_getCanvas(const Window& window);
+	// Get the depth buffer allocated on demand.
+	//   Raises an exception if window doesn't exist.
+	//   If you never call this method, no depth buffer will be allocated.
+	//   If you call it at the same time as window_getCanvas, it will have the same size as the canvas.
+	AlignedImageF32 window_getDepthBuffer(const Window& window);
+
+// The low-resolution canvas and depth buffer dimensions are relative to mouse events given to components.
+// Because component are drawn to the canvas and affected by upscaling.
+	// Returns the width of the canvas.
+	//   Raises an exception if window doesn't exist.
+	int window_getCanvasWidth(const Window& window);
+	// Returns the height of the canvas.
+	//   Raises an exception if window doesn't exist.
+	int window_getCanvasHeight(const Window& window);
+// The window's inner dimensions are relative to mouse events received directly from the window at full pixel resolution.
+	// Returns the inner width of the window.
+	//   Raises an exception if window doesn't exist.
+	int window_getInnerWidth(const Window& window);
+	// Returns the inner height of the window.
+	//   Raises an exception if window doesn't exist.
+	int window_getInnerHeight(const Window& window);
+
+// Direct window events
+	// Listen to window mouse events
+	//   Raises an exception if window doesn't exist.
+	//   event.mouseEventType gives the type of mouse event
+	//   event.key gives the key being used
+	void window_setMouseEvent(const Window& window, const MouseCallback& mouseEvent);
+	// Listen to window keyboard events
+	//   Raises an exception if window doesn't exist.
+	//   event.keyboardEventType gives the type of keyboard event
+	//   event.dsrKey gives the key being used
+	void window_setKeyboardEvent(const Window& window, const KeyboardCallback& keyboardEvent);
+	// Listen to the window close event
+	//   Raises an exception if window doesn't exist.
+	void window_setCloseEvent(const Window& window, const EmptyCallback& closeEvent);
+
+// Components
+	// Returns true iff the component exists
+	bool component_exists(const Component& component);
+
+	// Returns true iff propertyName exists in component
+	bool component_hasProperty(const Component& component, const ReadableString& propertyName);
+	// Sets a property found using propertyName in component to the value serialized in value.
+	//   Raises an exception if component doesn't exist.
+	//   Matching of propertyName is case insensitive.
+	//   Returns ReturnCode::Good if assigned.
+	//   Unless mustAssign forces an exception.
+	//     Returns ReturnCode::KeyNotFound if propertyName wasn't found in component.
+	//   Unless mustAssign forces an exception.
+	//     Returns ReturnCode::ParsingFailure if propertyName was found but value couldn't be converted to its type.
+	ReturnCode component_setProperty(const Component& component, const ReadableString& propertyName, const ReadableString& value, bool mustAssign = true);
+	// Returns a property found using propertyName in component.
+	//   Raises an exception if component doesn't exist.
+	//   Matching of propertyName is case insensitive.
+	//   If mustExist is true
+	//     Raises an exception when propertyName isn't found.
+	//   If mustExist is false
+	//     Returns an empty string when propertyName isn't found.
+	String component_getProperty(const Component& component, const ReadableString& propertyName, bool mustExist = true);
+
+// Component events
+	// The main activation of clickable components.
+	//   The pressed callback doesn't take any arguments, because it should be possible to generate from multiple input methods.
+	void component_setPressedEvent(const Component& component, const EmptyCallback& event);
+	// Mouse-down activates when any mouse button is pressed down within the component
+	//   Raises an exception if component doesn't exist.
+	//   The component itself decides if the mouse is inside, which allow rounded components to act as their true shape.
+	void component_setMouseDownEvent(const Component& component, const MouseCallback& mouseEvent);
+	// Mouse-up should eventually follow after a mouse-down event, to ensure basic transaction safety.
+	//   Raises an exception if component doesn't exist.
+	//   * Even if the mouse is dragged outside of the component or window before being lifted.
+	//   * Even if the component is removed from the window while the button is pressed,
+	//     the button press will keep it alive long enough to receive the mouse-up event before being freed.
+	void component_setMouseUpEvent(const Component& component, const MouseCallback& mouseEvent);
+	// Mouse-move is triggered when the mouse moves over the component.
+	//   Raises an exception if component doesn't exist.
+	//   * When pressed down inside of the component, dragging outside the component or even window will
+	//     continue to give mouse-move events to the callback.
+	//   * If dragging left of or above the window, event.position may contain negative coordinates.
+	void component_setMouseMoveEvent(const Component& component, const MouseCallback& mouseEvent);
+	// Mouse-scroll is triggered by scrolling in any direction.
+	//   Raises an exception if component doesn't exist.
+	//   Currently only supporting MouseKeyEnum::ScrollUp and MouseKeyEnum::ScrollDown as values in event.key.
+	void component_setMouseScrollEvent(const Component& component, const MouseCallback& mouseEvent);
+	// Key-down only comes when a button is pressed down. (No repeat)
+	//   Raises an exception if component doesn't exist.
+	//   The backend window is responsible to filter away any false positives for down events caused by repetition.
+	void component_setKeyDownEvent(const Component& component, const KeyboardCallback& keyboardEvent);
+	// Key-up only comes when a button is lifted after being pressed. (No repeat)
+	//   Raises an exception if component doesn't exist.
+	void component_setKeyUpEvent(const Component& component, const KeyboardCallback& keyboardEvent);
+	// Key-type comes both when a key is pressed, and then repeatedly without having to lift the key.
+	//   Raises an exception if component doesn't exist.
+	//   There's usually a second's delay before quickly repeating.
+	void component_setKeyTypeEvent(const Component& component, const KeyboardCallback& keyboardEvent);
+
+// Theme
+	// Apply the given theme recursively to all components in the window's interface.
+	//   Raises an exception if window or component doesn't exist.
+	//   Components will gather what they can from the theme and save it for later.
+	//   Changing a theme while being used by an interface or adding new components,
+	//     should apply the theme again to ensure that all changes are applied.
+	//     TODO: Automate this process by storing a reference to the theme in each component and checking for updates before drawing.
+	void window_applyTheme(const Window& window, const VisualTheme& theme);
+
+}
+
+#endif
+

+ 755 - 0
Source/DFPSR/api/imageAPI.cpp

@@ -0,0 +1,755 @@
+
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#define DFPSR_INTERNAL_ACCESS
+
+#include <limits>
+#include <cassert>
+#include "imageAPI.h"
+#include "drawAPI.h"
+#include "../image/draw.h"
+#include "../image/internal/imageInternal.h"
+#include "../image/stbImage/stbImageWrapper.h"
+#include "../math/scalar.h"
+
+using namespace dsr;
+
+// Constructors
+AlignedImageU8 dsr::image_create_U8(int32_t width, int32_t height) {
+	return AlignedImageU8(std::make_shared<ImageU8Impl>(width, height));
+}
+AlignedImageU16 dsr::image_create_U16(int32_t width, int32_t height) {
+	return AlignedImageU16(std::make_shared<ImageU16Impl>(width, height));
+}
+AlignedImageF32 dsr::image_create_F32(int32_t width, int32_t height) {
+	return AlignedImageF32(std::make_shared<ImageF32Impl>(width, height));
+}
+OrderedImageRgbaU8 dsr::image_create_RgbaU8(int32_t width, int32_t height) {
+	return OrderedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height));
+}
+AlignedImageRgbaU8 dsr::image_create_RgbaU8_native(int32_t width, int32_t height, PackOrderIndex packOrderIndex) {
+	return AlignedImageRgbaU8(std::make_shared<ImageRgbaU8Impl>(width, height, packOrderIndex));
+}
+
+// Loading and saving
+OrderedImageRgbaU8 dsr::image_load_RgbaU8(const String& filename, bool mustExist) {
+	return image_stb_load_RgbaU8(filename, mustExist);
+}
+bool dsr::image_save(const ImageRgbaU8 &image, const String& filename) {
+	return image_stb_save(image, filename);
+}
+
+#define GET_OPTIONAL(SOURCE,DEFAULT) \
+	if (image) { \
+		return SOURCE; \
+	} else { \
+		return DEFAULT; \
+	}
+
+// Properties
+int32_t dsr::image_getWidth(const ImageU8& image)     { GET_OPTIONAL(image->width, 0); }
+int32_t dsr::image_getWidth(const ImageU16& image)    { GET_OPTIONAL(image->width, 0); }
+int32_t dsr::image_getWidth(const ImageF32& image)    { GET_OPTIONAL(image->width, 0); }
+int32_t dsr::image_getWidth(const ImageRgbaU8& image) { GET_OPTIONAL(image->width, 0); }
+
+int32_t dsr::image_getHeight(const ImageU8& image)     { GET_OPTIONAL(image->height, 0); }
+int32_t dsr::image_getHeight(const ImageU16& image)    { GET_OPTIONAL(image->height, 0); }
+int32_t dsr::image_getHeight(const ImageF32& image)    { GET_OPTIONAL(image->height, 0); }
+int32_t dsr::image_getHeight(const ImageRgbaU8& image) { GET_OPTIONAL(image->height, 0); }
+
+int32_t dsr::image_getStride(const ImageU8& image)     { GET_OPTIONAL(image->stride, 0); }
+int32_t dsr::image_getStride(const ImageU16& image)    { GET_OPTIONAL(image->stride, 0); }
+int32_t dsr::image_getStride(const ImageF32& image)    { GET_OPTIONAL(image->stride, 0); }
+int32_t dsr::image_getStride(const ImageRgbaU8& image) { GET_OPTIONAL(image->stride, 0); }
+
+IRect dsr::image_getBound(const ImageU8& image)     { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
+IRect dsr::image_getBound(const ImageU16& image)    { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
+IRect dsr::image_getBound(const ImageF32& image)    { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
+IRect dsr::image_getBound(const ImageRgbaU8& image) { GET_OPTIONAL(IRect(0, 0, image->width, image->height), IRect()); }
+
+bool dsr::image_exists(const ImageU8& image)     { GET_OPTIONAL(true, false); }
+bool dsr::image_exists(const ImageU16& image)    { GET_OPTIONAL(true, false); }
+bool dsr::image_exists(const ImageF32& image)    { GET_OPTIONAL(true, false); }
+bool dsr::image_exists(const ImageRgbaU8& image) { GET_OPTIONAL(true, false); }
+
+int dsr::image_useCount(const ImageU8& image)     { return image.use_count(); }
+int dsr::image_useCount(const ImageU16& image)    { return image.use_count(); }
+int dsr::image_useCount(const ImageF32& image)    { return image.use_count(); }
+int dsr::image_useCount(const ImageRgbaU8& image) { return image.use_count(); }
+
+PackOrderIndex dsr::image_getPackOrderIndex(const ImageRgbaU8& image) {
+	GET_OPTIONAL(image->packOrder.packOrderIndex, PackOrderIndex::RGBA);
+}
+
+// Texture
+void dsr::image_generatePyramid(ImageRgbaU8& image) {
+	if (image) {
+		image->generatePyramid();
+	}
+}
+bool dsr::image_hasPyramid(const ImageRgbaU8& image) {
+	GET_OPTIONAL(image->texture.hasMipBuffer(), false);
+}
+bool dsr::image_isTexture(const ImageRgbaU8& image) {
+	GET_OPTIONAL(image->isTexture(), false);
+}
+
+// Pixel access
+#define INSIDE_XY (x >= 0 && x < image->width && y >= 0 && y < image->height)
+#define CLAMP_XY \
+	if (x < 0) { x = 0; } \
+	if (y < 0) { y = 0; } \
+	if (x >= image->width) { x = image->width - 1; } \
+	if (y >= image->height) { y = image->height - 1; }
+#define TILE_XY \
+	x = signedModulo(x, image->width); \
+	y = signedModulo(y, image->height);
+void dsr::image_writePixel(ImageU8& image, int32_t x, int32_t y, int32_t color) {
+	if (image) {
+		if (INSIDE_XY) {
+			if (color < 0) { color = 0; }
+			if (color > 255) { color = 255; }
+			ImageU8Impl::writePixel_unsafe(*image, x, y, color);
+		}
+	}
+}
+void dsr::image_writePixel(ImageU16& image, int32_t x, int32_t y, int32_t color) {
+	if (image) {
+		if (INSIDE_XY) {
+			if (color < 0) { color = 0; }
+			if (color > 65535) { color = 65535; }
+			ImageU16Impl::writePixel_unsafe(*image, x, y, color);
+		}
+	}
+}
+void dsr::image_writePixel(ImageF32& image, int32_t x, int32_t y, float color) {
+	if (image) {
+		if (INSIDE_XY) {
+			ImageF32Impl::writePixel_unsafe(*image, x, y, color);
+		}
+	}
+}
+void dsr::image_writePixel(ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& color) {
+	if (image) {
+		if (INSIDE_XY) {
+			ImageRgbaU8Impl::writePixel_unsafe(*image, x, y, image->packRgba(color.saturate()));
+		}
+	}
+}
+int32_t dsr::image_readPixel_border(const ImageU8& image, int32_t x, int32_t y, int32_t border) {
+	if (image) {
+		if (INSIDE_XY) {
+			return ImageU8Impl::readPixel_unsafe(*image, x, y);
+		} else {
+			return border;
+		}
+	} else {
+		return 0;
+	}
+}
+int32_t dsr::image_readPixel_border(const ImageU16& image, int32_t x, int32_t y, int32_t border) {
+	if (image) {
+		if (INSIDE_XY) {
+			return ImageU16Impl::readPixel_unsafe(*image, x, y);
+		} else {
+			return border;
+		}
+	} else {
+		return 0;
+	}
+}
+float dsr::image_readPixel_border(const ImageF32& image, int32_t x, int32_t y, float border) {
+	if (image) {
+		if (INSIDE_XY) {
+			return ImageF32Impl::readPixel_unsafe(*image, x, y);
+		} else {
+			return border;
+		}
+	} else {
+		return 0.0f;
+	}
+}
+ColorRgbaI32 dsr::image_readPixel_border(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& border) {
+	if (image) {
+		if (INSIDE_XY) {
+			return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
+		} else {
+			return border; // Can return unsaturated colors as error codes
+		}
+	} else {
+		return ColorRgbaI32();
+	}
+}
+uint8_t dsr::image_readPixel_clamp(const ImageU8& image, int32_t x, int32_t y) {
+	if (image) {
+		CLAMP_XY;
+		return ImageU8Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0;
+	}
+}
+uint16_t dsr::image_readPixel_clamp(const ImageU16& image, int32_t x, int32_t y) {
+	if (image) {
+		CLAMP_XY;
+		return ImageU16Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0;
+	}
+}
+float dsr::image_readPixel_clamp(const ImageF32& image, int32_t x, int32_t y) {
+	if (image) {
+		CLAMP_XY;
+		return ImageF32Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0.0f;
+	}
+}
+ColorRgbaI32 dsr::image_readPixel_clamp(const ImageRgbaU8& image, int32_t x, int32_t y) {
+	if (image) {
+		CLAMP_XY;
+		return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
+	} else {
+		return ColorRgbaI32();
+	}
+}
+uint8_t dsr::image_readPixel_tile(const ImageU8& image, int32_t x, int32_t y) {
+	if (image) {
+		TILE_XY;
+		return ImageU8Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0;
+	}
+}
+uint16_t dsr::image_readPixel_tile(const ImageU16& image, int32_t x, int32_t y) {
+	if (image) {
+		TILE_XY;
+		return ImageU16Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0;
+	}
+}
+float dsr::image_readPixel_tile(const ImageF32& image, int32_t x, int32_t y) {
+	if (image) {
+		TILE_XY;
+		return ImageF32Impl::readPixel_unsafe(*image, x, y);
+	} else {
+		return 0.0f;
+	}
+}
+ColorRgbaI32 dsr::image_readPixel_tile(const ImageRgbaU8& image, int32_t x, int32_t y) {
+	if (image) {
+		TILE_XY;
+		return image->unpackRgba(ImageRgbaU8Impl::readPixel_unsafe(*image, x, y));
+	} else {
+		return ColorRgbaI32();
+	}
+}
+
+void dsr::image_fill(ImageU8& image, int32_t color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+	}
+}
+void dsr::image_fill(ImageU16& image, int32_t color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+	}
+}
+void dsr::image_fill(ImageF32& image, float color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+	}
+}
+void dsr::image_fill(ImageRgbaU8& image, const ColorRgbaI32& color) {
+	if (image) {
+		imageImpl_draw_solidRectangle(*image, imageInternal::getBound(*image), color);
+	}
+}
+
+AlignedImageU8 dsr::image_clone(const ImageU8& image) {
+	if (image) {
+		AlignedImageU8 result = image_create_U8(image->width, image->height);
+		draw_copy(result, image);
+		return result;
+	} else {
+		return AlignedImageU8(); // Null gives null
+	}
+}
+AlignedImageU16 dsr::image_clone(const ImageU16& image) {
+	if (image) {
+		AlignedImageU16 result = image_create_U16(image->width, image->height);
+		draw_copy(result, image);
+		return result;
+	} else {
+		return AlignedImageU16(); // Null gives null
+	}
+}
+AlignedImageF32 dsr::image_clone(const ImageF32& image) {
+	if (image) {
+		AlignedImageF32 result = image_create_F32(image->width, image->height);
+		draw_copy(result, image);
+		return result;
+	} else {
+		return AlignedImageF32(); // Null gives null
+	}
+}
+OrderedImageRgbaU8 dsr::image_clone(const ImageRgbaU8& image) {
+	if (image) {
+		OrderedImageRgbaU8 result = image_create_RgbaU8(image->width, image->height);
+		draw_copy(result, image);
+		return result;
+	} else {
+		return OrderedImageRgbaU8(); // Null gives null
+	}
+}
+ImageRgbaU8 dsr::image_removePadding(const ImageRgbaU8& image) {
+	if (image) {
+		// TODO: Copy the implementation of getWithoutPadding, to create ImageRgbaU8 directly
+		return ImageRgbaU8(image->getWithoutPadding());
+	} else {
+		return ImageRgbaU8(); // Null gives null
+	}
+}
+
+AlignedImageU8 dsr::image_get_red(const ImageRgbaU8& image) {
+	if (image) {
+		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
+		return AlignedImageU8(image->getChannel(image->packOrder.redIndex));
+	} else {
+		return AlignedImageU8(); // Null gives null
+	}
+}
+AlignedImageU8 dsr::image_get_green(const ImageRgbaU8& image) {
+	if (image) {
+		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
+		return AlignedImageU8(image->getChannel(image->packOrder.greenIndex));
+	} else {
+		return AlignedImageU8(); // Null gives null
+	}
+}
+AlignedImageU8 dsr::image_get_blue(const ImageRgbaU8& image) {
+	if (image) {
+		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
+		return AlignedImageU8(image->getChannel(image->packOrder.blueIndex));
+	} else {
+		return AlignedImageU8(); // Null gives null
+	}
+}
+AlignedImageU8 dsr::image_get_alpha(const ImageRgbaU8& image) {
+	if (image) {
+		// TODO: Copy the implementation of getChannel, to create ImageU8 directly
+		return AlignedImageU8(image->getChannel(image->packOrder.alphaIndex));
+	} else {
+		return AlignedImageU8(); // Null gives null
+	}
+}
+
+static inline int32_t readColor(const ImageU8& channel, int x, int y) {
+	return ImageU8Impl::readPixel_unsafe(*channel, x, y);
+}
+static inline int32_t readColor(int32_t color, int x, int y) {
+	return color;
+}
+template <typename R, typename G, typename B, typename A>
+static OrderedImageRgbaU8 pack_template(int32_t width, int32_t height, R red, G green, B blue, A alpha) {
+	OrderedImageRgbaU8 result = image_create_RgbaU8(width, height);
+	for (int y = 0; y < height; y++) {
+		for (int x = 0; x < width; x++) {
+			ColorRgbaI32 color = ColorRgbaI32(readColor(red, x, y), readColor(green, x, y), readColor(blue, x, y), readColor(alpha, x, y));
+			image_writePixel(result, x, y, color);
+		}
+	}
+	return result;
+}
+
+#define PACK1(FIRST) \
+if (FIRST) { \
+	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+} else { \
+	return OrderedImageRgbaU8(); \
+}
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha) { PACK1(red); }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK1(green); }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK1(blue); }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK1(alpha); }
+
+#define PACK2(FIRST,SECOND) \
+if (FIRST && SECOND) { \
+	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height) { \
+		throwError("Cannot pack two channels of different size!\n"); \
+	} \
+	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+} else { \
+	return OrderedImageRgbaU8(); \
+}
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha) { PACK2(red,green) }
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha) { PACK2(red,blue) }
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha) { PACK2(red,alpha) }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK2(green,blue) }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK2(green,alpha) }
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK2(blue,alpha) }
+
+#define PACK3(FIRST,SECOND,THIRD) \
+if (FIRST && SECOND && THIRD) { \
+	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height \
+	 || FIRST->width != THIRD->width || FIRST->height != THIRD->height) { \
+		throwError("Cannot pack three channels of different size!\n"); \
+	} \
+	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+} else { \
+	return OrderedImageRgbaU8(); \
+}
+OrderedImageRgbaU8 dsr::image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK3(green, blue, alpha) }
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha) { PACK3(red, blue, alpha) }
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha) { PACK3(red, green, alpha) }
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha) { PACK3(red, green, blue) }
+
+// TODO: Optimize using zip instructions
+#define PACK4(FIRST,SECOND,THIRD,FOURTH) \
+if (FIRST && SECOND && THIRD && FOURTH) { \
+	if (FIRST->width != SECOND->width || FIRST->height != SECOND->height \
+	 || FIRST->width != THIRD->width || FIRST->height != THIRD->height \
+ 	 || FIRST->width != FOURTH->width || FIRST->height != FOURTH->height) { \
+		throwError("Cannot pack four channels of different size!\n"); \
+	} \
+	return pack_template(FIRST->width, FIRST->height, red, green, blue, alpha); \
+} else { \
+	return OrderedImageRgbaU8(); \
+}
+OrderedImageRgbaU8 dsr::image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha) { PACK4(red, green, blue, alpha) }
+
+// Convert a grayscale image into an ascii image using the given alphabet.
+//   Since all 256 characters cannot be in the alphabet, the encoding is lossy.
+// Each line is stored within <> to prevent text editors from removing meaningful white space.
+// The first line contains the given alphabet as a gradient from black to white.
+// Preconditions:
+//   alphabet may not have extended ascii, non printable, '\', '"', '>' or linebreak
+//   width <= stride
+//   size of monochromeImage = height * stride
+// Example alphabet: " .,-_':;!+~=^?*abcdefghijklmnopqrstuvwxyz()[]{}|&@#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+String dsr::image_toAscii(const ImageU8& image, const String& alphabet) {
+	if (!image_exists(image)) {
+		return U"null";
+	}
+	String result;
+	char alphabetMap[256];
+	int alphabetSize = alphabet.length();
+	int width = image_getWidth(image);
+	int height = image_getHeight(image);
+	result.reserve(((width + 4) * height) + alphabetSize + 5);
+	double scale = (double)(alphabetSize - 1) / 255.0;
+	double output = 0.49;
+	for (int rawValue = 0; rawValue < 256; rawValue++) {
+		int charIndex = (int)output;
+		if (charIndex < 0) charIndex = 0;
+		if (charIndex > alphabetSize - 1) charIndex = alphabetSize - 1;
+		alphabetMap[rawValue] = alphabet[charIndex];
+		output += scale;
+	}
+	result.appendChar(U'<');
+	for (int charIndex = 0; charIndex < alphabetSize; charIndex++) {
+		result.appendChar(alphabet[charIndex]);
+	}
+	result.append(U">\n");
+	for (int y = 0; y < height; y++) {
+		result.appendChar(U'<');
+		for (int x = 0; x < width; x++) {
+			result.appendChar(alphabetMap[image_readPixel_clamp(image, x, y)]);
+		}
+		result.append(U">\n");
+	}
+	return result;
+}
+
+String dsr::image_toAscii(const ImageU8& image) {
+	return image_toAscii(image, U" .,-_':;!+~=^?*abcdefghijklmnopqrstuvwxyz()[]{}|&@#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+}
+
+// Create a monochrome image from the ascii image in content.
+// String is used instead of ReadableString, so that the content can be decompressed from 8-bit strings in the binary.
+AlignedImageU8 dsr::image_fromAscii(const String& content) {
+	char alphabet[128];
+	uint8_t alphabetMap[128];
+	char current;
+	int x = 0;
+	int y = -1;
+	int width = 0;
+	int height = 0;
+	int alphabetSize = 0;
+	int contentSize = content.length();
+	bool quoted = false;
+	int i = 0;
+	while (i < contentSize && ((current = content[i]) != '\0')) {
+		if (quoted) {
+			if (y < 0) {
+				if (current == '>') {
+					quoted = false;
+					y = 0;
+				} else if (alphabetSize < 128) {
+					alphabet[alphabetSize] = current;
+					alphabetSize++;
+				}
+			} else {
+				if (current == '>') {
+					quoted = false;
+					if (width < x) width = x;
+					y++;
+					x = 0;
+				} else {
+					x++;
+				}
+			}
+		} else if (current == '<') {
+			quoted = true;
+		}
+		i++;
+	}
+	if (alphabetSize < 2) {
+		throwError(U"The alphabet needs at least two characters!");
+	}
+	height = y;
+	if (x > 0) {
+		throwError(U"All ascii images must end with a linebreak!");
+	}
+	for (i = 0; i < 128; i++) {
+		alphabetMap[i] = 0;
+	}
+	for (i = 0; i < alphabetSize; i++) {
+		int code = (int)(alphabet[i]);
+		if (code < 32 || code > 126) {
+			throwError(U"Ascii image contained non-printable standard ascii! Use codes 32 to 126.");
+		}
+		if (alphabetMap[code] > 0) {
+			throwError(U"A character in the alphabet was used more than once!");
+		}
+		int value = (int)(((double)i) * (255.0f / ((double)(alphabetSize - 1))));
+		if (value < 0) value = 0;
+		if (value > 255) value = 255;
+		alphabetMap[code] = value;
+	}
+	if (width <= 0 || height <= 0) {
+		throwError(U"An ascii image had zero dimensions!");
+	}
+	AlignedImageU8 result = image_create_U8(width, height);
+	x = 0; y = -1;
+	quoted = false;
+	i = 0;
+	while (i < contentSize && ((current = content[i]) != '\0')) {
+		if (quoted) {
+			if (current == '>') {
+				quoted = false;
+				if (y >= 0 && x != width) {
+					throwError(U"Lines in the ascii image do not have the same lengths.");
+				}
+				y++;
+				x = 0;
+			} else if (y >= 0) {
+				int code = (int)current;
+				if (code < 0) code = 0;
+				if (code > 127) code = 127;
+				image_writePixel(result, x, y, alphabetMap[code]);
+				x++;
+			}
+		} else if (current == '<') {
+			quoted = true;
+		}
+		i++;
+	}
+	return result;
+}
+
+// TODO: Try to recycle the memory to reduce overhead from heap allocating heads pointing to existing buffers
+template <typename IMAGE_TYPE, typename VALUE_TYPE>
+static inline IMAGE_TYPE subImage_template(const IMAGE_TYPE& image, const IRect& region) {
+	if (image) {
+		IRect cut = IRect::cut(imageInternal::getBound(*image), region);
+		if (cut.hasArea()) {
+			intptr_t newOffset = image->startOffset + (cut.left() * image->pixelSize) + (cut.top() * image->stride);
+			return IMAGE_TYPE(std::make_shared<VALUE_TYPE>(cut.width(), cut.height(), image->stride, image->buffer, newOffset));
+		}
+	}
+	return IMAGE_TYPE(); // Null if where are no overlapping pixels
+}
+
+template <typename IMAGE_TYPE, typename VALUE_TYPE>
+static inline IMAGE_TYPE subImage_template_withPackOrder(const IMAGE_TYPE& image, const IRect& region) {
+	if (image) {
+		IRect cut = IRect::cut(imageInternal::getBound(*image), region);
+		if (cut.hasArea()) {
+			intptr_t newOffset = image->startOffset + (cut.left() * image->pixelSize) + (cut.top() * image->stride);
+			return IMAGE_TYPE(std::make_shared<VALUE_TYPE>(cut.width(), cut.height(), image->stride, image->buffer, newOffset, image->packOrder));
+		}
+	}
+	return IMAGE_TYPE(); // Null if where are no overlapping pixels
+}
+
+ImageU8 dsr::image_getSubImage(const ImageU8& image, const IRect& region) {
+	return subImage_template<ImageU8, ImageU8Impl>(image, region);
+}
+
+ImageU16 dsr::image_getSubImage(const ImageU16& image, const IRect& region) {
+	return subImage_template<ImageU16, ImageU16Impl>(image, region);
+}
+
+ImageF32 dsr::image_getSubImage(const ImageF32& image, const IRect& region) {
+	return subImage_template<ImageF32, ImageF32Impl>(image, region);
+}
+
+ImageRgbaU8 dsr::image_getSubImage(const ImageRgbaU8& image, const IRect& region) {
+	return subImage_template_withPackOrder<ImageRgbaU8, ImageRgbaU8Impl>(image, region);
+}
+
+template <typename IMAGE_TYPE, int CHANNELS, typename ELEMENT_TYPE>
+ELEMENT_TYPE maxDifference_template(const IMAGE_TYPE& imageA, const IMAGE_TYPE& imageB) {
+	if (imageA.width != imageB.width || imageA.height != imageB.height) {
+		return std::numeric_limits<ELEMENT_TYPE>::max();
+	} else {
+		ELEMENT_TYPE maxDifference = 0;
+		const SafePointer<ELEMENT_TYPE> rowDataA = imageInternal::getSafeData<ELEMENT_TYPE>(imageA);
+		const SafePointer<ELEMENT_TYPE> rowDataB = imageInternal::getSafeData<ELEMENT_TYPE>(imageB);
+		for (int y = 0; y < imageA.height; y++) {
+			const SafePointer<ELEMENT_TYPE> pixelDataA = rowDataA;
+			const SafePointer<ELEMENT_TYPE> pixelDataB = rowDataB;
+			for (int x = 0; x < imageA.width; x++) {
+				for (int c = 0; c < CHANNELS; c++) {
+					ELEMENT_TYPE difference = absDiff(*pixelDataA, *pixelDataB);
+					if (difference > maxDifference) {
+						maxDifference = difference;
+					}
+					pixelDataA += 1;
+					pixelDataB += 1;
+				}
+			}
+			rowDataA.increaseBytes(imageA.stride);
+			rowDataB.increaseBytes(imageB.stride);
+		}
+		return maxDifference;
+	}
+}
+uint8_t dsr::image_maxDifference(const ImageU8& imageA, const ImageU8& imageB) {
+	if (imageA && imageB) {
+		return maxDifference_template<ImageU8Impl, 1, uint8_t>(*imageA, *imageB);
+	} else {
+		return std::numeric_limits<uint8_t>::infinity();
+	}
+}
+uint16_t dsr::image_maxDifference(const ImageU16& imageA, const ImageU16& imageB) {
+	if (imageA && imageB) {
+		return maxDifference_template<ImageU16Impl, 1, uint16_t>(*imageA, *imageB);
+	} else {
+		return std::numeric_limits<uint16_t>::infinity();
+	}
+}
+float dsr::image_maxDifference(const ImageF32& imageA, const ImageF32& imageB) {
+	if (imageA && imageB) {
+		return maxDifference_template<ImageF32Impl, 1, float>(*imageA, *imageB);
+	} else {
+		return std::numeric_limits<float>::infinity();
+	}
+}
+uint8_t dsr::image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB) {
+	if (imageA && imageB) {
+		return maxDifference_template<ImageRgbaU8Impl, 4, uint8_t>(*imageA, *imageB);
+	} else {
+		return std::numeric_limits<uint8_t>::infinity();
+	}
+}
+
+SafePointer<uint8_t> dsr::image_getSafePointer(const ImageU8& image, int rowIndex) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(image.get(), rowIndex);
+	} else {
+		return SafePointer<uint8_t>();
+	}
+}
+SafePointer<uint16_t> dsr::image_getSafePointer(const ImageU16& image, int rowIndex) {
+	if (image) {
+		return imageInternal::getSafeData<uint16_t>(image.get(), rowIndex);
+	} else {
+		return SafePointer<uint16_t>();
+	}
+}
+SafePointer<float> dsr::image_getSafePointer(const ImageF32& image, int rowIndex) {
+	if (image) {
+		return imageInternal::getSafeData<float>(image.get(), rowIndex);
+	} else {
+		return SafePointer<float>();
+	}
+}
+SafePointer<uint32_t> dsr::image_getSafePointer(const ImageRgbaU8& image, int rowIndex) {
+	if (image) {
+		return imageInternal::getSafeData<uint32_t>(image.get(), rowIndex);
+	} else {
+		return SafePointer<uint32_t>();
+	}
+}
+SafePointer<uint8_t> dsr::image_getSafePointer_channels(const ImageRgbaU8& image, int rowIndex) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(image.get(), rowIndex);
+	} else {
+		return SafePointer<uint8_t>();
+	}
+}
+
+void dsr::image_dangerous_replaceDestructor(ImageU8& image, const std::function<void(uint8_t *)>& newDestructor) {
+	if (image) { return image->buffer->replaceDestructor(newDestructor); }
+}
+void dsr::image_dangerous_replaceDestructor(ImageU16& image, const std::function<void(uint8_t *)>& newDestructor) {
+	if (image) { return image->buffer->replaceDestructor(newDestructor); }
+}
+void dsr::image_dangerous_replaceDestructor(ImageF32& image, const std::function<void(uint8_t *)>& newDestructor) {
+	if (image) { return image->buffer->replaceDestructor(newDestructor); }
+}
+void dsr::image_dangerous_replaceDestructor(ImageRgbaU8& image, const std::function<void(uint8_t *)>& newDestructor) {
+	if (image) { return image->buffer->replaceDestructor(newDestructor); }
+}
+
+uint8_t* dsr::image_dangerous_getData(ImageU8& image) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
+	} else {
+		return nullptr;
+	}
+}
+uint8_t* dsr::image_dangerous_getData(ImageU16& image) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
+	} else {
+		return nullptr;
+	}
+}
+uint8_t* dsr::image_dangerous_getData(ImageF32& image) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
+	} else {
+		return nullptr;
+	}
+}
+uint8_t* dsr::image_dangerous_getData(ImageRgbaU8& image) {
+	if (image) {
+		return imageInternal::getSafeData<uint8_t>(*image).getUnsafe();
+	} else {
+		return nullptr;
+	}
+}

+ 228 - 0
Source/DFPSR/api/imageAPI.h

@@ -0,0 +1,228 @@
+
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_IMAGE
+#define DFPSR_API_IMAGE
+
+#include "types.h"
+#include "../base/SafePointer.h"
+
+namespace dsr {
+
+// Constructors
+	// Each row's start and stride is aligned to 16-bytes using padding at the end
+	//   This allow using in-place writing with aligned 16-byte SIMD vectors
+	AlignedImageU8 image_create_U8(int32_t width, int32_t height);
+	AlignedImageU16 image_create_U16(int32_t width, int32_t height);
+	AlignedImageF32 image_create_F32(int32_t width, int32_t height);
+	OrderedImageRgbaU8 image_create_RgbaU8(int32_t width, int32_t height);
+	AlignedImageRgbaU8 image_create_RgbaU8_native(int32_t width, int32_t height, PackOrderIndex packOrderIndex);
+
+// Properties
+	// Returns image's width in pixels or 0 on null image
+	int32_t image_getWidth(const ImageU8& image);
+	int32_t image_getWidth(const ImageU16& image);
+	int32_t image_getWidth(const ImageF32& image);
+	int32_t image_getWidth(const ImageRgbaU8& image);
+	// Returns image's height in pixels or 0 on null image
+	int32_t image_getHeight(const ImageU8& image);
+	int32_t image_getHeight(const ImageU16& image);
+	int32_t image_getHeight(const ImageF32& image);
+	int32_t image_getHeight(const ImageRgbaU8& image);
+	// Returns image's stride in bytes or 0 on null image
+	//   Stride is the offset from the beginning of one row to another
+	//   May be larger than width times pixel size
+	//     * If padding is used to align with 16-bytes
+	//     * Or the buffer is shared with a larger image
+	int32_t image_getStride(const ImageU8& image);
+	int32_t image_getStride(const ImageU16& image);
+	int32_t image_getStride(const ImageF32& image);
+	int32_t image_getStride(const ImageRgbaU8& image);
+	// Get a rectangle from the image's dimensions with the top left corner set to (0, 0)
+	//   Useful for clipping to an image's bounds or subdividing space for a graphical user interface
+	IRect image_getBound(const ImageU8& image);
+	IRect image_getBound(const ImageU16& image);
+	IRect image_getBound(const ImageF32& image);
+	IRect image_getBound(const ImageRgbaU8& image);
+	// Returns false on null, true otherwise
+	bool image_exists(const ImageU8& image);
+	bool image_exists(const ImageU16& image);
+	bool image_exists(const ImageF32& image);
+	bool image_exists(const ImageRgbaU8& image);
+	// Returns the number of handles to the image
+	//   References to a handle doesn't count, only when a handle is stored by value
+	int image_useCount(const ImageU8& image);
+	int image_useCount(const ImageU16& image);
+	int image_useCount(const ImageF32& image);
+	int image_useCount(const ImageRgbaU8& image);
+	// Returns the image's pack order index
+	PackOrderIndex image_getPackOrderIndex(const ImageRgbaU8& image);
+
+// Texture
+	// TODO: A method for removing the pyramid
+	void image_generatePyramid(ImageRgbaU8& image);
+	bool image_hasPyramid(const ImageRgbaU8& image);
+	bool image_isTexture(const ImageRgbaU8& image);
+
+// Pixel access
+	// Write a pixel to an image.
+	//   Out of bound is ignored silently without writing.
+	//   Empty images will be ignored safely.
+	//   Packed is faster if the color can be packed in advance for multiple pixels or comes directly from an image of the same rgba order.
+	void image_writePixel(ImageU8& image, int32_t x, int32_t y, int32_t color); // Saturated to 0..255
+	void image_writePixel(ImageU16& image, int32_t x, int32_t y, int32_t color); // Saturated to 0..65535
+	void image_writePixel(ImageF32& image, int32_t x, int32_t y, float color);
+	void image_writePixel(ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& color); // Saturated to 0..255
+	// Read a pixel from an image.
+	//   Out of bound will return the border color.
+	//   Empty images will return zero.
+	int32_t image_readPixel_border(const ImageU8& image, int32_t x, int32_t y, int32_t border = 0); // Can have negative value as border
+	int32_t image_readPixel_border(const ImageU16& image, int32_t x, int32_t y, int32_t border = 0); // Can have negative value as border
+	float image_readPixel_border(const ImageF32& image, int32_t x, int32_t y, float border = 0.0f);
+	ColorRgbaI32 image_readPixel_border(const ImageRgbaU8& image, int32_t x, int32_t y, const ColorRgbaI32& border = ColorRgbaI32()); // Can have negative value as border
+	// Read a pixel from an image.
+	//   Out of bound will return the closest pixel.
+	//   Empty images will return zero.
+	uint8_t image_readPixel_clamp(const ImageU8& image, int32_t x, int32_t y);
+	uint16_t image_readPixel_clamp(const ImageU16& image, int32_t x, int32_t y);
+	float image_readPixel_clamp(const ImageF32& image, int32_t x, int32_t y);
+	ColorRgbaI32 image_readPixel_clamp(const ImageRgbaU8& image, int32_t x, int32_t y);
+	// Read a pixel from an image.
+	//   Out of bound will take the coordinates in modulo of the size.
+	//   Empty images will return zero.
+	uint8_t image_readPixel_tile(const ImageU8& image, int32_t x, int32_t y);
+	uint16_t image_readPixel_tile(const ImageU16& image, int32_t x, int32_t y);
+	float image_readPixel_tile(const ImageF32& image, int32_t x, int32_t y);
+	ColorRgbaI32 image_readPixel_tile(const ImageRgbaU8& image, int32_t x, int32_t y);
+
+
+
+// ------------------------ Below is untested! ------------------------ //
+
+
+
+// Loading and saving
+	OrderedImageRgbaU8 image_load_RgbaU8(const String& filename, bool mustExist = true);
+	bool image_save(const ImageRgbaU8 &image, const String& filename);
+
+// Fill all pixels with a uniform color
+	void image_fill(ImageU8& image, int32_t color);
+	void image_fill(ImageU16& image, int32_t color);
+	void image_fill(ImageF32& image, float color);
+	void image_fill(ImageRgbaU8& image, const ColorRgbaI32& color);
+
+// Clone
+	// Get a deep clone of an image's content while discarding any pack order, padding and texture pyramids
+	AlignedImageU8 image_clone(const ImageU8& image);
+	AlignedImageU16 image_clone(const ImageU16& image);
+	AlignedImageF32 image_clone(const ImageF32& image);
+	OrderedImageRgbaU8 image_clone(const ImageRgbaU8& image);
+	// Returns a copy of the image without any padding, which means that alignment cannot be guaranteed
+	//   Used when external image libraries don't allow it
+	ImageRgbaU8 image_removePadding(const ImageRgbaU8& image);
+
+// Channel packing
+	// Extract one channel
+	AlignedImageU8 image_get_red(const ImageRgbaU8& image);
+	AlignedImageU8 image_get_green(const ImageRgbaU8& image);
+	AlignedImageU8 image_get_blue(const ImageRgbaU8& image);
+	AlignedImageU8 image_get_alpha(const ImageRgbaU8& image);
+	// Pack one channel
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, int32_t blue, const ImageU8& alpha);
+	// Pack two channels
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(int32_t red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
+	// Pack three channels
+	OrderedImageRgbaU8 image_pack(int32_t red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, int32_t green, const ImageU8& blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, int32_t blue, const ImageU8& alpha);
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, int32_t alpha);
+	// Pack four channels
+	OrderedImageRgbaU8 image_pack(const ImageU8& red, const ImageU8& green, const ImageU8& blue, const ImageU8& alpha);
+
+// Ascii images
+	String image_toAscii(const ImageU8& image, const String &alphabet);
+	String image_toAscii(const ImageU8& image);
+	AlignedImageU8 image_fromAscii(const String &content);
+
+// Comparisons
+	// Get the maximum pixelwise difference between two images of the same format, or the highest possible value on failure
+	//   Useful for regression tests
+	uint8_t image_maxDifference(const ImageU8& imageA, const ImageU8& imageB);
+	uint16_t image_maxDifference(const ImageU16& imageA, const ImageU16& imageB);
+	float image_maxDifference(const ImageF32& imageA, const ImageF32& imageB);
+	uint8_t image_maxDifference(const ImageRgbaU8& imageA, const ImageRgbaU8& imageB);
+
+// Sub-images are viewports to another image's data
+// TODO: Aligned sub-images that only takes vertial sections using whole rows
+// TODO: Aligned sub-images that terminates with an error if the input rectangle isn't aligned
+//       Start must be 16-byte aligned, end must be same as the parent or also 16-byte aligned
+// TODO: Make an optional warning for not returning the desired dimensions when out of bound
+	// Get a sub-image sharing buffer and side-effects with the parent image
+	// Returns the overlapping region if out of bound
+	// Returns a null image if there are no overlapping pixels to return
+	ImageU8 image_getSubImage(const ImageU8& image, const IRect& region);
+	ImageU16 image_getSubImage(const ImageU16& image, const IRect& region);
+	ImageF32 image_getSubImage(const ImageF32& image, const IRect& region);
+	ImageRgbaU8 image_getSubImage(const ImageRgbaU8& image, const IRect& region);
+
+// Bound-checked pointer access (relatively safe compared to a raw pointer)
+	// Returns a bound-checked pointer to the first byte at rowIndex
+	// Bound-checked safe-pointers are equally fast as raw pointers in release mode
+	// Warning! Bound-checked pointers are not reference counted, because that would be too slow for real-time graphics
+	SafePointer<uint8_t> image_getSafePointer(const ImageU8& image, int rowIndex = 0);
+	SafePointer<uint16_t> image_getSafePointer(const ImageU16& image, int rowIndex = 0);
+	SafePointer<float> image_getSafePointer(const ImageF32& image, int rowIndex = 0);
+	SafePointer<uint32_t> image_getSafePointer(const ImageRgbaU8& image, int rowIndex = 0);
+	// Get a pointer iterating over individual channels instead of whole pixels
+	SafePointer<uint8_t> image_getSafePointer_channels(const ImageRgbaU8& image, int rowIndex = 0);
+
+// The dangerous image API
+// Use of these methods can be spotted using a search for "_dangerous_" in your code
+	// Replaces the destructor in image's buffer.
+	//   newDestructor is responsible for freeing the given data.
+	//   Use when the buffer's pointer is being sent to a function that promises to free the memory
+	//   For example: Creating buffers being wrapped as XLib images
+	void image_dangerous_replaceDestructor(ImageU8& image, const std::function<void(uint8_t *)>& newDestructor);
+	void image_dangerous_replaceDestructor(ImageU16& image, const std::function<void(uint8_t *)>& newDestructor);
+	void image_dangerous_replaceDestructor(ImageF32& image, const std::function<void(uint8_t *)>& newDestructor);
+	void image_dangerous_replaceDestructor(ImageRgbaU8& image, const std::function<void(uint8_t *)>& newDestructor);
+	// Returns a pointer to the image's pixels
+	// Warning! Reading elements larger than 8 bits will have lower and higher bytes stored based on local endianness
+	// Warning! Using bytes outside of the [0 .. stride * height - 1] range may cause crashes and undefined behaviour
+	// Warning! Using the pointer after the image's lifetime may cause crashes from trying to access freed memory
+	uint8_t* image_dangerous_getData(ImageU8& image);
+	uint8_t* image_dangerous_getData(ImageU16& image);
+	uint8_t* image_dangerous_getData(ImageF32& image);
+	uint8_t* image_dangerous_getData(ImageRgbaU8& image);
+}
+
+#endif

+ 1016 - 0
Source/DFPSR/api/mediaMachineAPI.cpp

@@ -0,0 +1,1016 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#define DFPSR_INTERNAL_ACCESS
+
+#include "mediaMachineAPI.h"
+#include "../machine/VirtualMachine.h"
+#include "../machine/mediaFilters.h"
+#include "../api/imageAPI.h"
+
+namespace dsr {
+
+// Media Machine specification
+
+// Enumerating types
+static const DataType DataType_ImageU8 = 1;
+static const DataType DataType_ImageRgbaU8 = 2;
+static ReadableString getMediaTypeName(DataType type) {
+	switch(type) {
+		case DataType_FixedPoint:  return U"FixedPoint";
+		case DataType_ImageU8:     return U"ImageU8";
+		case DataType_ImageRgbaU8: return U"ImageRgbaU8";
+		default:                   return U"?";
+	}
+}
+
+class MediaMemory : public PlanarMemory {
+public:
+	MemoryPlane<FixedPoint> FixedPointMemory;
+	MemoryPlane<AlignedImageU8> AlignedImageU8Memory;
+	MemoryPlane<OrderedImageRgbaU8> OrderedImageRgbaU8Memory;
+	MediaMemory() : FixedPointMemory(1024), AlignedImageU8Memory(1024), OrderedImageRgbaU8Memory(512) {}
+	void store(int targetStackIndex, const VMA& sourceArg, int sourceFramePointer, DataType type) override {
+		switch(type) {
+			case DataType_FixedPoint:
+				if (sourceArg.argType == ArgumentType::Immediate) {
+					this->FixedPointMemory.accessByStackIndex(targetStackIndex) = sourceArg.value;
+				} else {
+					this->FixedPointMemory.accessByStackIndex(targetStackIndex) = this->FixedPointMemory.accessByGlobalIndex(sourceArg.value.getMantissa(), sourceFramePointer);
+				}
+			break;
+			case DataType_ImageU8:
+				this->AlignedImageU8Memory.accessByStackIndex(targetStackIndex) = this->AlignedImageU8Memory.accessByGlobalIndex(sourceArg.value.getMantissa(), sourceFramePointer);
+			break;
+			case DataType_ImageRgbaU8:
+				this->OrderedImageRgbaU8Memory.accessByStackIndex(targetStackIndex) = this->OrderedImageRgbaU8Memory.accessByGlobalIndex(sourceArg.value.getMantissa(), sourceFramePointer);
+			break;
+			default:
+				throwError("Storing element of unhandled type!\n");
+			break;
+		}
+	}
+	void load(int sourceStackIndex, const VMA& targetArg, int targetFramePointer, DataType type) override {
+		switch(type) {
+			case DataType_FixedPoint:
+				this->FixedPointMemory.accessByGlobalIndex(targetArg.value.getMantissa(), targetFramePointer) = this->FixedPointMemory.accessByStackIndex(sourceStackIndex);
+			break;
+			case DataType_ImageU8:
+				this->AlignedImageU8Memory.accessByGlobalIndex(targetArg.value.getMantissa(), targetFramePointer) = this->AlignedImageU8Memory.accessByStackIndex(sourceStackIndex);
+			break;
+			case DataType_ImageRgbaU8:
+				this->OrderedImageRgbaU8Memory.accessByGlobalIndex(targetArg.value.getMantissa(), targetFramePointer) = this->OrderedImageRgbaU8Memory.accessByStackIndex(sourceStackIndex);
+			break;
+			default:
+				throwError("Loading element of unhandled type!\n");
+			break;
+		}
+	}
+};
+
+#define MEDIA_MEMORY ((MediaMemory&)memory)
+
+// Type definitions
+static const VMTypeDef mediaMachineTypes[] = {
+	VMTypeDef(U"FixedPoint", DataType_FixedPoint, true,
+	[](VirtualMachine& machine, int globalIndex, const ReadableString& defaultValueText){
+		FixedPoint defaultValue = defaultValueText.length() > 0 ? FixedPoint::fromText(defaultValueText) : FixedPoint();
+		List<VMA> args;
+		args.pushConstruct(DataType_FixedPoint, globalIndex);
+		args.pushConstruct(defaultValue);
+		machine.interpretCommand(U"Load", args);
+	},
+	[](PlanarMemory& memory, Variable& variable, int globalIndex, int32_t* framePointer, bool fullContent) {
+		FixedPoint value = MEDIA_MEMORY.FixedPointMemory.accessByGlobalIndex(globalIndex, framePointer[DataType_FixedPoint]);
+		printText(variable.name, "(", value, ")");
+	}),
+	VMTypeDef(U"ImageU8", DataType_ImageU8, false,
+	[](VirtualMachine& machine, int globalIndex, const ReadableString& defaultValueText){
+		List<VMA> args;
+		args.pushConstruct(DataType_ImageU8, globalIndex);
+		machine.interpretCommand(U"Reset", args);
+	},
+	[](PlanarMemory& memory, Variable& variable, int globalIndex, int32_t* framePointer, bool fullContent) {
+		AlignedImageU8 value = MEDIA_MEMORY.AlignedImageU8Memory.accessByGlobalIndex(globalIndex, framePointer[DataType_ImageU8]);
+		printText(variable.name, " ImageU8");
+		if (image_exists(value)) {
+			if (fullContent) {
+				printText(":\n", image_toAscii(value, U" .:*ixXM"));
+			} else {
+				printText("(", image_getWidth(value), "x", image_getHeight(value), ")");
+			}
+		} else {
+			printText("(nothing)");
+		}
+	}),
+	VMTypeDef(U"ImageRgbaU8", DataType_ImageRgbaU8, false,
+	[](VirtualMachine& machine, int globalIndex, const ReadableString& defaultValueText){
+		List<VMA> args;
+		args.pushConstruct(DataType_ImageRgbaU8, globalIndex);
+		machine.interpretCommand(U"Reset", args);
+	},
+	[](PlanarMemory& memory, Variable& variable, int globalIndex, int32_t* framePointer, bool fullContent) {
+		OrderedImageRgbaU8 value = MEDIA_MEMORY.OrderedImageRgbaU8Memory.accessByGlobalIndex(globalIndex, framePointer[DataType_ImageRgbaU8]);
+		printText(variable.name, " ImageRgbaU8");
+		if (image_exists(value)) {
+			// TODO: image_toAscii for multi-channel images
+			printText("(", image_getWidth(value), "x", image_getHeight(value), ")");
+		} else {
+			printText("(nothing)");
+		}
+	})
+};
+
+inline FixedPoint getFixedPointValue(MediaMemory& memory, const VMA& arg) {
+	if (arg.argType == ArgumentType::Immediate) {
+		return arg.value;
+	} else {
+		return memory.FixedPointMemory.getRef(arg, memory.current.framePointer[DataType_FixedPoint]);
+	}
+}
+#define SCALAR_VALUE(ARG_INDEX) getFixedPointValue(MEDIA_MEMORY, args[ARG_INDEX])
+#define INT_VALUE(ARG_INDEX) fixedPoint_round(SCALAR_VALUE(ARG_INDEX))
+#define SCALAR_REF(ARG_INDEX) (MEDIA_MEMORY.FixedPointMemory.getRef(args[ARG_INDEX], memory.current.framePointer[DataType_FixedPoint]))
+#define IMAGE_U8_REF(ARG_INDEX) (MEDIA_MEMORY.AlignedImageU8Memory.getRef(args[ARG_INDEX], memory.current.framePointer[DataType_ImageU8]))
+#define IMAGE_RGBAU8_REF(ARG_INDEX) (MEDIA_MEMORY.OrderedImageRgbaU8Memory.getRef(args[ARG_INDEX], memory.current.framePointer[DataType_ImageRgbaU8]))
+#define NEXT_INSTRUCTION memory.current.programCounter++;
+static const InsSig mediaMachineInstructions[] = {
+	InsSig::create(U"LOAD", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = SCALAR_VALUE(1);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"RESET", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_U8_REF(0) = AlignedImageU8();
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8)
+	),
+	InsSig::create(U"RESET", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = OrderedImageRgbaU8();
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"ROUND", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(fixedPoint_round(SCALAR_VALUE(1)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"Source", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"MIN", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = fixedPoint_min(SCALAR_VALUE(1), SCALAR_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_FixedPoint),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"MAX", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = fixedPoint_max(SCALAR_VALUE(1), SCALAR_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_FixedPoint),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"ADD", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = SCALAR_VALUE(1) + SCALAR_VALUE(2);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_FixedPoint),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"ADD", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_add(IMAGE_U8_REF(0), IMAGE_U8_REF(1), IMAGE_U8_REF(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_ImageU8),
+		ArgSig(U"RightSource", true, DataType_ImageU8)
+	),
+	InsSig::create(U"ADD", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_add(IMAGE_U8_REF(0), IMAGE_U8_REF(1), SCALAR_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_ImageU8),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"ADD", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_add(IMAGE_U8_REF(0), IMAGE_U8_REF(2), SCALAR_VALUE(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_FixedPoint),
+		ArgSig(U"RightSource", true, DataType_ImageU8)
+	),
+	InsSig::create(U"SUB", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = SCALAR_VALUE(1) - SCALAR_VALUE(2);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"PositiveSource", true, DataType_FixedPoint),
+		ArgSig(U"NegativeSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"SUB", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_sub(IMAGE_U8_REF(0), IMAGE_U8_REF(1), IMAGE_U8_REF(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"PositiveSource", true, DataType_ImageU8),
+		ArgSig(U"NegativeSource", true, DataType_ImageU8)
+	),
+	InsSig::create(U"SUB", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_sub(IMAGE_U8_REF(0), IMAGE_U8_REF(1), SCALAR_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"PositiveSource", true, DataType_ImageU8),
+		ArgSig(U"NegativeSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"SUB", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_sub(IMAGE_U8_REF(0), SCALAR_VALUE(2), IMAGE_U8_REF(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"PositiveSource", true, DataType_FixedPoint),
+		ArgSig(U"NegativeSource", true, DataType_ImageU8)
+	),
+	InsSig::create(U"MUL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = SCALAR_VALUE(1) * SCALAR_VALUE(2);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_FixedPoint), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_FixedPoint),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"MUL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_mul(IMAGE_U8_REF(0), IMAGE_U8_REF(1), SCALAR_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"LeftSource", true, DataType_ImageU8),
+		ArgSig(U"RightSource", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"MUL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_filter_mul(IMAGE_U8_REF(0), IMAGE_U8_REF(1), IMAGE_U8_REF(2), SCALAR_VALUE(3));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8), // Aliasing is accepted
+		ArgSig(U"FirstSource", true, DataType_ImageU8),
+		ArgSig(U"SecondSource", true, DataType_ImageU8),
+		ArgSig(U"Scalar", true, DataType_FixedPoint) // Use 1/255 for normalized multiplication
+	),
+	InsSig::create(U"CREATE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			int width = INT_VALUE(1);
+			int height = INT_VALUE(2);
+			if (width < 1 || height < 1) {
+				throwError("Images must allocate at least one pixel to be created.");
+			}
+			IMAGE_U8_REF(0) = image_create_U8(width, height);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"CREATE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			int width = INT_VALUE(1);
+			int height = INT_VALUE(2);
+			if (width < 1 || height < 1) {
+				throwError("Images must allocate at least one pixel to be created.");
+			}
+			IMAGE_RGBAU8_REF(0) = image_create_RgbaU8(width, height);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"EXISTS", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_exists(IMAGE_U8_REF(1)) ? 1 : 0);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Truth", false, DataType_FixedPoint), // 1 for existing, 0 for null
+		ArgSig(U"Source", true, DataType_ImageU8)
+	),
+	InsSig::create(U"EXISTS", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_exists(IMAGE_RGBAU8_REF(1)) ? 1 : 0);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Truth", false, DataType_FixedPoint), // 1 for existing, 0 for null
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"GET_WIDTH", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_getWidth(IMAGE_U8_REF(1)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Width", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8)
+	),
+	InsSig::create(U"GET_WIDTH", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_getWidth(IMAGE_RGBAU8_REF(1)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Width", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"GET_HEIGHT", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_getHeight(IMAGE_U8_REF(1)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Height", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8)
+	),
+	InsSig::create(U"GET_HEIGHT", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_getHeight(IMAGE_RGBAU8_REF(1)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Height", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"FILL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			image_fill(IMAGE_U8_REF(0), INT_VALUE(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Luma", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"FILL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			image_fill(
+			  IMAGE_RGBAU8_REF(0),
+			  ColorRgbaI32(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4))
+			);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"RECTANGLE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_rectangle(
+			  IMAGE_U8_REF(0),
+			  IRect(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4)),
+			  INT_VALUE(5)
+			);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Left", true, DataType_FixedPoint),
+		ArgSig(U"Top", true, DataType_FixedPoint),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint),
+		ArgSig(U"Luma", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"RECTANGLE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_rectangle(
+			  IMAGE_RGBAU8_REF(0),
+			  IRect(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4)),
+			  ColorRgbaI32(INT_VALUE(5), INT_VALUE(6), INT_VALUE(7), INT_VALUE(8))
+			);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Left", true, DataType_FixedPoint),
+		ArgSig(U"Top", true, DataType_FixedPoint),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"COPY", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_copy(IMAGE_U8_REF(0), IMAGE_U8_REF(3), INT_VALUE(1), INT_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"TargetLeft", true, DataType_FixedPoint),
+		ArgSig(U"TargetTop", true, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8)
+	),
+	InsSig::create(U"COPY", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_copy(IMAGE_RGBAU8_REF(0), IMAGE_RGBAU8_REF(3), INT_VALUE(1), INT_VALUE(2));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"TargetLeft", true, DataType_FixedPoint),
+		ArgSig(U"TargetTop", true, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"COPY", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_copy(
+			  IMAGE_U8_REF(0),
+			  image_getSubImage(IMAGE_U8_REF(3), IRect(INT_VALUE(4), INT_VALUE(5), INT_VALUE(6), INT_VALUE(7))),
+			  INT_VALUE(1), INT_VALUE(2)
+			);
+			NEXT_INSTRUCTION
+		},
+		// TODO: Prevent aliasing between IMAGE_U8_REF(0) and IMAGE_U8_REF(3) in compile-time
+		//       This will be added as another lambda running safety checks on suggested inputs
+		//         The result will either accept, pass on to the next overload or abort compilation
+		//         Passing to another overload can be used to fall back on a run-time checked operation
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"TargetLeft", true, DataType_FixedPoint),
+		ArgSig(U"TargetTop", true, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8),
+		ArgSig(U"SourceLeft", true, DataType_FixedPoint),
+		ArgSig(U"SourceTop", true, DataType_FixedPoint),
+		ArgSig(U"SourceWidth", true, DataType_FixedPoint),
+		ArgSig(U"SourceHeight", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"COPY", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_copy(
+			  IMAGE_RGBAU8_REF(0),
+			  image_getSubImage(IMAGE_RGBAU8_REF(3), IRect(INT_VALUE(4), INT_VALUE(5), INT_VALUE(6), INT_VALUE(7))),
+			  INT_VALUE(1), INT_VALUE(2)
+			);
+			NEXT_INSTRUCTION
+		},
+		// TODO: Prevent aliasing
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"TargetLeft", true, DataType_FixedPoint),
+		ArgSig(U"TargetTop", true, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8),
+		ArgSig(U"SourceLeft", true, DataType_FixedPoint),
+		ArgSig(U"SourceTop", true, DataType_FixedPoint),
+		ArgSig(U"SourceWidth", true, DataType_FixedPoint),
+		ArgSig(U"SourceHeight", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"GET_RED", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_U8_REF(0) = image_get_red(IMAGE_RGBAU8_REF(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"GET_GREEN", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_U8_REF(0) = image_get_green(IMAGE_RGBAU8_REF(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"GET_BLUE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_U8_REF(0) = image_get_blue(IMAGE_RGBAU8_REF(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"GET_ALPHA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_U8_REF(0) = image_get_alpha(IMAGE_RGBAU8_REF(1));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), IMAGE_U8_REF(2), INT_VALUE(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), INT_VALUE(2), IMAGE_U8_REF(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), IMAGE_U8_REF(2), INT_VALUE(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), INT_VALUE(2), IMAGE_U8_REF(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), INT_VALUE(2), INT_VALUE(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), IMAGE_U8_REF(2), IMAGE_U8_REF(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), IMAGE_U8_REF(2), INT_VALUE(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), INT_VALUE(2), IMAGE_U8_REF(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(INT_VALUE(1), IMAGE_U8_REF(2), IMAGE_U8_REF(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), INT_VALUE(2), IMAGE_U8_REF(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), IMAGE_U8_REF(2), INT_VALUE(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), IMAGE_U8_REF(2), IMAGE_U8_REF(3), INT_VALUE(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"PACK_RGBA", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			IMAGE_RGBAU8_REF(0) = image_pack(IMAGE_U8_REF(1), IMAGE_U8_REF(2), IMAGE_U8_REF(3), IMAGE_U8_REF(4));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"Red", true, DataType_ImageU8),
+		ArgSig(U"Green", true, DataType_ImageU8),
+		ArgSig(U"Blue", true, DataType_ImageU8),
+		ArgSig(U"Alpha", true, DataType_ImageU8)
+	),
+	InsSig::create(U"LINE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_line(IMAGE_U8_REF(0), INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4), INT_VALUE(5));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"X1", true, DataType_FixedPoint),
+		ArgSig(U"Y1", true, DataType_FixedPoint),
+		ArgSig(U"X2", true, DataType_FixedPoint),
+		ArgSig(U"Y2", true, DataType_FixedPoint),
+		ArgSig(U"Luma", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"LINE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			draw_line(
+			  IMAGE_RGBAU8_REF(0),
+			  INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4),
+			  ColorRgbaI32(INT_VALUE(5), INT_VALUE(6), INT_VALUE(7), INT_VALUE(8))
+			);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"X1", true, DataType_FixedPoint),
+		ArgSig(U"Y1", true, DataType_FixedPoint),
+		ArgSig(U"X2", true, DataType_FixedPoint),
+		ArgSig(U"Y2", true, DataType_FixedPoint),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"FADE_LINEAR", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_fade_linear(IMAGE_U8_REF(0), SCALAR_VALUE(1), SCALAR_VALUE(2), SCALAR_VALUE(3), SCALAR_VALUE(4), SCALAR_VALUE(5), SCALAR_VALUE(6));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"X1", true, DataType_FixedPoint),
+		ArgSig(U"Y1", true, DataType_FixedPoint),
+		ArgSig(U"Luma1", true, DataType_FixedPoint), // At x1, y1
+		ArgSig(U"X2", true, DataType_FixedPoint),
+		ArgSig(U"Y2", true, DataType_FixedPoint),
+		ArgSig(U"Luma2", true, DataType_FixedPoint) // At x2, y2
+	),
+	InsSig::create(U"FADE_REGION_LINEAR", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_fade_region_linear(IMAGE_U8_REF(0), IRect(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4)), SCALAR_VALUE(5), SCALAR_VALUE(6), SCALAR_VALUE(7), SCALAR_VALUE(8), SCALAR_VALUE(9), SCALAR_VALUE(10));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Left", true, DataType_FixedPoint),
+		ArgSig(U"Top", true, DataType_FixedPoint),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint),
+		ArgSig(U"X1", true, DataType_FixedPoint), // Relative to Left
+		ArgSig(U"Y1", true, DataType_FixedPoint), // Relative to Top
+		ArgSig(U"Luma1", true, DataType_FixedPoint), // At Left + X1, Top + Y1
+		ArgSig(U"X2", true, DataType_FixedPoint), // Relative to Left
+		ArgSig(U"Y2", true, DataType_FixedPoint), // Relative to Top
+		ArgSig(U"Luma2", true, DataType_FixedPoint)  // At Left + X2, Top + Y2
+	),
+	//void media_fade_radial(ImageU8& targetImage, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma);
+	InsSig::create(U"FADE_RADIAL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_fade_radial(IMAGE_U8_REF(0), SCALAR_VALUE(1), SCALAR_VALUE(2), SCALAR_VALUE(3), SCALAR_VALUE(4), SCALAR_VALUE(5), SCALAR_VALUE(6));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"CenterX", true, DataType_FixedPoint),
+		ArgSig(U"CenterY", true, DataType_FixedPoint),
+		ArgSig(U"InnerRadius", true, DataType_FixedPoint),
+		ArgSig(U"InnerLuma", true, DataType_FixedPoint),
+		ArgSig(U"OuterRadius", true, DataType_FixedPoint),
+		ArgSig(U"OuterLuma", true, DataType_FixedPoint)
+	),
+	// void media_fade_region_radial(ImageU8& targetImage, const IRect& viewport, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma);
+	InsSig::create(U"FADE_REGION_RADIAL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			media_fade_region_radial(IMAGE_U8_REF(0), IRect(INT_VALUE(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4)), SCALAR_VALUE(5), SCALAR_VALUE(6), SCALAR_VALUE(7), SCALAR_VALUE(8), SCALAR_VALUE(9), SCALAR_VALUE(10));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"Left", true, DataType_FixedPoint),
+		ArgSig(U"Top", true, DataType_FixedPoint),
+		ArgSig(U"Width", true, DataType_FixedPoint),
+		ArgSig(U"Height", true, DataType_FixedPoint),
+		ArgSig(U"CenterX", true, DataType_FixedPoint),
+		ArgSig(U"CenterY", true, DataType_FixedPoint),
+		ArgSig(U"InnerRadius", true, DataType_FixedPoint),
+		ArgSig(U"InnerLuma", true, DataType_FixedPoint),
+		ArgSig(U"OuterRadius", true, DataType_FixedPoint),
+		ArgSig(U"OuterLuma", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"WRITE_PIXEL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			image_writePixel(IMAGE_U8_REF(0), INT_VALUE(1), INT_VALUE(2), INT_VALUE(3));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint),
+		ArgSig(U"Luma", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"WRITE_PIXEL", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			image_writePixel(
+			  IMAGE_RGBAU8_REF(0),
+			  INT_VALUE(1), INT_VALUE(2),
+			  ColorRgbaI32(INT_VALUE(3), INT_VALUE(4), INT_VALUE(5), INT_VALUE(6))
+			);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"Target", false, DataType_ImageRgbaU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint),
+		ArgSig(U"Red", true, DataType_FixedPoint),
+		ArgSig(U"Green", true, DataType_FixedPoint),
+		ArgSig(U"Blue", true, DataType_FixedPoint),
+		ArgSig(U"Alpha", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_BORDER", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_readPixel_border(IMAGE_U8_REF(1), INT_VALUE(2), INT_VALUE(3), INT_VALUE(4)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"LumaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint),
+		ArgSig(U"LumaBorder", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_BORDER", 4,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			ColorRgbaI32 result = image_readPixel_border(
+			  IMAGE_RGBAU8_REF(4),
+			  INT_VALUE(5), INT_VALUE(6),
+			  ColorRgbaI32(INT_VALUE(7), INT_VALUE(8), INT_VALUE(9), INT_VALUE(10))
+			);
+			SCALAR_REF(0) = FixedPoint::fromWhole(result.red);
+			SCALAR_REF(1) = FixedPoint::fromWhole(result.green);
+			SCALAR_REF(2) = FixedPoint::fromWhole(result.blue);
+			SCALAR_REF(3) = FixedPoint::fromWhole(result.alpha);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"RedOutput", false, DataType_FixedPoint),
+		ArgSig(U"GreenOutput", false, DataType_FixedPoint),
+		ArgSig(U"BlueOutput", false, DataType_FixedPoint),
+		ArgSig(U"AlphaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint),
+		ArgSig(U"RedBorder", true, DataType_FixedPoint),
+		ArgSig(U"GreenBorder", true, DataType_FixedPoint),
+		ArgSig(U"BlueBorder", true, DataType_FixedPoint),
+		ArgSig(U"AlphaBorder", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_CLAMP", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_readPixel_clamp(IMAGE_U8_REF(1), INT_VALUE(2), INT_VALUE(3)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"LumaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_CLAMP", 4,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			ColorRgbaI32 result = image_readPixel_clamp(
+			  IMAGE_RGBAU8_REF(4),
+			  INT_VALUE(5), INT_VALUE(6)
+			);
+			SCALAR_REF(0) = FixedPoint::fromWhole(result.red);
+			SCALAR_REF(1) = FixedPoint::fromWhole(result.green);
+			SCALAR_REF(2) = FixedPoint::fromWhole(result.blue);
+			SCALAR_REF(3) = FixedPoint::fromWhole(result.alpha);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"RedOutput", false, DataType_FixedPoint),
+		ArgSig(U"GreenOutput", false, DataType_FixedPoint),
+		ArgSig(U"BlueOutput", false, DataType_FixedPoint),
+		ArgSig(U"AlphaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_TILE", 1,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			SCALAR_REF(0) = FixedPoint::fromWhole(image_readPixel_tile(IMAGE_U8_REF(1), INT_VALUE(2), INT_VALUE(3)));
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"LumaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint)
+	),
+	InsSig::create(U"READ_PIXEL_TILE", 4,
+		[](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+			ColorRgbaI32 result = image_readPixel_tile(
+			  IMAGE_RGBAU8_REF(4),
+			  INT_VALUE(5), INT_VALUE(6)
+			);
+			SCALAR_REF(0) = FixedPoint::fromWhole(result.red);
+			SCALAR_REF(1) = FixedPoint::fromWhole(result.green);
+			SCALAR_REF(2) = FixedPoint::fromWhole(result.blue);
+			SCALAR_REF(3) = FixedPoint::fromWhole(result.alpha);
+			NEXT_INSTRUCTION
+		},
+		ArgSig(U"RedOutput", false, DataType_FixedPoint),
+		ArgSig(U"GreenOutput", false, DataType_FixedPoint),
+		ArgSig(U"BlueOutput", false, DataType_FixedPoint),
+		ArgSig(U"AlphaOutput", false, DataType_FixedPoint),
+		ArgSig(U"Source", true, DataType_ImageRgbaU8),
+		ArgSig(U"X", true, DataType_FixedPoint),
+		ArgSig(U"Y", true, DataType_FixedPoint)
+	)
+};
+
+// API implementation
+
+static void checkMethodIndex(MediaMachine& machine, int methodIndex) {
+	if (methodIndex < 0 || methodIndex >= machine->methods.length()) {
+		throwError("Invalid method index ", methodIndex, " of 0..", (machine->methods.length() - 1), ".");
+	}
+}
+
+MediaMachine machine_create(const ReadableString& code) {
+	std::shared_ptr<PlanarMemory> memory = std::make_shared<MediaMemory>();
+	static const int mediaMachineInstructionCount = sizeof(mediaMachineInstructions) / sizeof(InsSig);
+	static const int mediaMachineTypeCount = sizeof(mediaMachineTypes) / sizeof(VMTypeDef);
+	return MediaMachine(std::make_shared<VirtualMachine>(code, memory, mediaMachineInstructions, mediaMachineInstructionCount, mediaMachineTypes, mediaMachineTypeCount));
+}
+
+void machine_executeMethod(MediaMachine& machine, int methodIndex) {
+	checkMethodIndex(machine, methodIndex);
+	machine->executeMethod(methodIndex);
+}
+
+template <typename T>
+static void setInputByIndex(MemoryPlane<T>& stack, int framePointer, Method& method, DataType givenType, int inputIndex, const T& value) {
+	if (inputIndex < 0 || inputIndex >= method.inputCount) {
+		throwError("Invalid input index ", inputIndex, " of 0..", (method.inputCount - 1), ".");
+	}
+	Variable* variable = &method.locals[inputIndex];
+	DataType expected = variable->typeDescription->dataType;
+	if (givenType != expected) {
+		throwError("Cannot assign ", getMediaTypeName(givenType), " to ", variable->name, " of ", getMediaTypeName(expected), ".");
+	}
+	stack.accessByStackIndex(framePointer + variable->typeLocalIndex) = value;
+}
+template <typename T>
+static T& accessOutputByIndex(MemoryPlane<T>& stack, int framePointer, Method& method, DataType wantedType, int outputIndex) {
+	if (outputIndex < 0 || outputIndex >= method.outputCount) {
+		throwError("Invalid output index ", outputIndex, " of 0..", (method.outputCount - 1), ".");
+	}
+	Variable* variable = &method.locals[method.inputCount + outputIndex];
+	DataType foundType = variable->typeDescription->dataType;
+	if (wantedType != foundType) {
+		throwError("Cannot get ", variable->name, " of ", getMediaTypeName(wantedType), " as ", getMediaTypeName(wantedType), ".");
+	}
+	return stack.accessByStackIndex(framePointer + variable->typeLocalIndex);
+}
+
+// Set input by argument index
+//   Indexed arguments are confirmed to be inputs during compilation of the script
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, int32_t input) {
+	checkMethodIndex(machine, methodIndex);
+	setInputByIndex(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, FixedPoint::fromWhole(input));
+}
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const FixedPoint& input) {
+	checkMethodIndex(machine, methodIndex);
+	setInputByIndex(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, inputIndex, input);
+}
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const AlignedImageU8& input) {
+	checkMethodIndex(machine, methodIndex);
+	setInputByIndex(((MediaMemory*)machine->memory.get())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, inputIndex, input);
+}
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const OrderedImageRgbaU8& input) {
+	checkMethodIndex(machine, methodIndex);
+	setInputByIndex(((MediaMemory*)machine->memory.get())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, inputIndex, input);
+}
+
+// Get output by index
+FixedPoint machine_getFixedPointOutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return accessOutputByIndex<FixedPoint>(((MediaMemory*)machine->memory.get())->FixedPointMemory, machine->memory->current.framePointer[DataType_FixedPoint], machine->methods[methodIndex], DataType_FixedPoint, outputIndex);
+}
+AlignedImageU8 machine_getImageU8OutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return accessOutputByIndex<AlignedImageU8>(((MediaMemory*)machine->memory.get())->AlignedImageU8Memory, machine->memory->current.framePointer[DataType_ImageU8], machine->methods[methodIndex], DataType_ImageU8, outputIndex);
+}
+OrderedImageRgbaU8 machine_getImageRgbaU8OutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return accessOutputByIndex<OrderedImageRgbaU8>(((MediaMemory*)machine->memory.get())->OrderedImageRgbaU8Memory, machine->memory->current.framePointer[DataType_ImageRgbaU8], machine->methods[methodIndex], DataType_ImageRgbaU8, outputIndex);
+}
+
+int machine_findMethod(MediaMachine& machine, const ReadableString& methodName) {
+	return machine->findMethod(methodName);
+}
+
+MediaMethod machine_getMethod(MediaMachine& machine, const ReadableString& methodName) {
+	return MediaMethod(machine, machine_findMethod(machine, methodName));
+}
+
+String machine_getMethodName(MediaMachine& machine, int methodIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return machine->methods[methodIndex].name;
+}
+
+int machine_getInputCount(MediaMachine& machine, int methodIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return machine->methods[methodIndex].inputCount;
+}
+
+int machine_getOutputCount(MediaMachine& machine, int methodIndex) {
+	checkMethodIndex(machine, methodIndex);
+	return machine->methods[methodIndex].outputCount;
+}
+
+}

+ 137 - 0
Source/DFPSR/api/mediaMachineAPI.h

@@ -0,0 +1,137 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_MEDIA_MACHINE
+#define DFPSR_API_MEDIA_MACHINE
+
+#include "../math/FixedPoint.h"
+#include "../api/types.h"
+
+namespace dsr {
+
+MediaMachine machine_create(const ReadableString& code);
+
+// Low-level call API
+int machine_findMethod(MediaMachine& machine, const ReadableString& methodName);
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, int32_t input);
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const FixedPoint& input);
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const AlignedImageU8& input);
+void machine_setInputByIndex(MediaMachine& machine, int methodIndex, int inputIndex, const OrderedImageRgbaU8& input);
+void machine_executeMethod(MediaMachine& machine, int methodIndex);
+FixedPoint machine_getFixedPointOutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex);
+AlignedImageU8 machine_getImageU8OutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex);
+OrderedImageRgbaU8 machine_getImageRgbaU8OutputByIndex(MediaMachine& machine, int methodIndex, int outputIndex);
+String machine_getMethodName(MediaMachine& machine, int methodIndex);
+int machine_getInputCount(MediaMachine& machine, int methodIndex);
+int machine_getOutputCount(MediaMachine& machine, int methodIndex);
+
+inline constexpr int argCount() {
+	return 0;
+}
+template<typename HEAD, typename... TAIL>
+inline constexpr int argCount(HEAD& first, TAIL&... args) {
+	return argCount(args...) + 1;
+}
+
+// TODO: Prevent saving the result to avoid reading after another call
+class MediaResult {
+private:
+	MediaMachine machine;
+	int methodIndex;
+	void writeResult(int outputIndex, int8_t& target) {
+		target = fixedPoint_round(machine_getFixedPointOutputByIndex(this->machine, this->methodIndex, outputIndex));
+	}
+	void writeResult(int outputIndex, int16_t& target) {
+		target = fixedPoint_round(machine_getFixedPointOutputByIndex(this->machine, this->methodIndex, outputIndex));
+	}
+	void writeResult(int outputIndex, int32_t& target) {
+		target = fixedPoint_round(machine_getFixedPointOutputByIndex(this->machine, this->methodIndex, outputIndex));
+	}
+	void writeResult(int outputIndex, int64_t& target) {
+		target = fixedPoint_round(machine_getFixedPointOutputByIndex(this->machine, this->methodIndex, outputIndex));
+	}
+	void writeResult(int outputIndex, FixedPoint& target) {
+		target = machine_getFixedPointOutputByIndex(this->machine, this->methodIndex, outputIndex);
+	}
+	void writeResult(int outputIndex, AlignedImageU8& target) {
+		target = machine_getImageU8OutputByIndex(this->machine, this->methodIndex, outputIndex);
+	}
+	void writeResult(int outputIndex, OrderedImageRgbaU8& target) {
+		target = machine_getImageRgbaU8OutputByIndex(this->machine, this->methodIndex, outputIndex);
+	}
+	inline void writeResults(int firstInputIndex) {}
+	template<typename HEAD, typename... TAIL>
+	inline void writeResults(int firstInputIndex, HEAD& first, TAIL&... args) {
+		this->writeResult(firstInputIndex, first);
+		this->writeResults(firstInputIndex + 1, args...);
+	}
+public:
+	MediaResult(const MediaMachine& machine, int methodIndex)
+ 	: machine(machine), methodIndex(methodIndex) {}
+	// Write target references within () after a call to assign multiple outputs
+	template <typename... ARGS>
+	void operator () (ARGS&... args) {
+		int givenCount = argCount(args...);
+		int expectedCount = machine_getOutputCount(this->machine, this->methodIndex);
+		if (givenCount != expectedCount) {
+			throwError("The call to ", machine_getMethodName(this->machine, this->methodIndex), " expected ", expectedCount, " outputs, but ", givenCount, " references were assigned.\n");
+		}
+		this->writeResults(0, args...);
+	}
+};
+
+class MediaMethod {
+public:
+	MediaMachine machine;
+	int methodIndex;
+private:
+	inline void setInputs(int firstInputIndex) {}
+	template<typename HEAD, typename... TAIL>
+	inline void setInputs(int firstInputIndex, HEAD first, TAIL... args) {
+		machine_setInputByIndex(this->machine, this->methodIndex, firstInputIndex, first);
+		this->setInputs(firstInputIndex + 1, args...);
+	}
+public:
+	MediaMethod()
+ 	: methodIndex(-1) {}
+	MediaMethod(const MediaMachine& machine, int methodIndex)
+ 	: machine(machine), methodIndex(methodIndex) {}
+	// MediaMethod can be called like a function using arguments
+	template <typename... ARGS>
+	MediaResult operator () (ARGS... args) {
+		int givenCount = argCount(args...);
+		int expectedCount = machine_getInputCount(this->machine, this->methodIndex);
+		if (givenCount != expectedCount) {
+			throwError("The call to ", machine_getMethodName(this->machine, this->methodIndex), " expected ", expectedCount, " inputs, but ", givenCount, " values were given.\n");
+		}
+		this->setInputs(0, args...);
+		machine_executeMethod(this->machine, this->methodIndex);
+		return MediaResult(this->machine, this->methodIndex);
+	}
+};
+
+MediaMethod machine_getMethod(MediaMachine& machine, const ReadableString& methodName);
+
+}
+
+#endif

+ 274 - 0
Source/DFPSR/api/modelAPI.cpp

@@ -0,0 +1,274 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "modelAPI.h"
+#include "imageAPI.h"
+#include "../render/model/Model.h"
+
+#define MUST_EXIST(OBJECT, METHOD) if (OBJECT.get() == nullptr) { throwError("The " #OBJECT " handle was null in " #METHOD "\n"); }
+
+namespace dsr {
+
+Model model_create() {
+	return std::make_shared<ModelImpl>();
+}
+
+Model model_clone(const Model& model) {
+	MUST_EXIST(model,model_clone);
+	return std::make_shared<ModelImpl>(model->filter, model->partBuffer, model->positionBuffer);
+}
+
+void model_setFilter(const Model& model, Filter filter) {
+	MUST_EXIST(model,model_setFilter);
+	model->filter = filter;
+}
+
+Filter model_getFilter(const Model& model) {
+	MUST_EXIST(model,model_getFilter);
+	return model->filter;
+}
+
+bool model_exists(const Model& model) {
+	return model.get() != nullptr;
+}
+
+int model_addEmptyPart(Model& model, const String &name) {
+	MUST_EXIST(model,model_addEmptyPart);
+	return model->addEmptyPart(name);
+}
+
+int model_getNumberOfParts(const Model& model) {
+	MUST_EXIST(model,model_getNumberOfParts);
+	return model->getNumberOfParts();
+}
+
+void model_setPartName(Model& model, int partIndex, const String &name) {
+	MUST_EXIST(model,model_setPartName);
+	model->setPartName(partIndex, name);
+}
+
+String model_getPartName(const Model& model, int partIndex) {
+	MUST_EXIST(model,model_getPartName);
+	return model->getPartName(partIndex);
+}
+
+int model_getNumberOfPoints(const Model& model) {
+	MUST_EXIST(model,model_getNumberOfPoints);
+	return model->getNumberOfPoints();
+}
+
+FVector3D model_getPoint(const Model& model, int pointIndex) {
+	MUST_EXIST(model,model_getPoint);
+	return model->getPoint(pointIndex);
+}
+
+void model_setPoint(Model& model, int pointIndex, const FVector3D& position) {
+	MUST_EXIST(model,model_setPoint);
+	model->setPoint(pointIndex, position);
+}
+
+int model_findPoint(const Model& model, const FVector3D &position, float treshold) {
+	MUST_EXIST(model,model_findPoint);
+	return model->findPoint(position, treshold);
+}
+
+int model_addPoint(const Model& model, const FVector3D &position) {
+	MUST_EXIST(model,model_addPoint);
+	return model->addPoint(position);
+}
+
+int model_addPointIfNeeded(Model& model, const FVector3D &position, float treshold) {
+	MUST_EXIST(model,model_addPointIfNeeded);
+	return model->addPointIfNeeded(position, treshold);
+}
+
+int model_getVertexPointIndex(const Model& model, int partIndex, int polygonIndex, int vertexIndex) {
+	MUST_EXIST(model,model_getVertexPointIndex);
+	return model->getVertexPointIndex(partIndex, polygonIndex, vertexIndex);
+}
+
+void model_setVertexPointIndex(Model& model, int partIndex, int polygonIndex, int vertexIndex, int pointIndex) {
+	MUST_EXIST(model,model_setVertexPointIndex);
+	model->setVertexPointIndex(partIndex, polygonIndex, vertexIndex, pointIndex);
+}
+
+FVector3D model_getVertexPosition(const Model& model, int partIndex, int polygonIndex, int vertexIndex) {
+	MUST_EXIST(model,model_getVertexPosition);
+	return model->getVertexPosition(partIndex, polygonIndex, vertexIndex);
+}
+
+FVector4D model_getVertexColor(const Model& model, int partIndex, int polygonIndex, int vertexIndex) {
+	MUST_EXIST(model,model_getVertexColor);
+	return model->getVertexColor(partIndex, polygonIndex, vertexIndex);
+}
+
+void model_setVertexColor(Model& model, int partIndex, int polygonIndex, int vertexIndex, const FVector4D& color) {
+	MUST_EXIST(model,model_setVertexColor);
+	model->setVertexColor(partIndex, polygonIndex, vertexIndex, color);
+}
+
+FVector4D model_getTexCoord(const Model& model, int partIndex, int polygonIndex, int vertexIndex) {
+	MUST_EXIST(model,model_getTexCoord);
+	return model->getTexCoord(partIndex, polygonIndex, vertexIndex);
+}
+
+void model_setTexCoord(Model& model, int partIndex, int polygonIndex, int vertexIndex, const FVector4D& texCoord) {
+	MUST_EXIST(model,model_setTexCoord);
+	model->setTexCoord(partIndex, polygonIndex, vertexIndex, texCoord);
+}
+
+int model_addTriangle(Model& model, int partIndex, int pointA, int pointB, int pointC) {
+	MUST_EXIST(model,model_addTriangle);
+	return model->addPolygon(Polygon(pointA, pointB, pointC), partIndex);
+}
+
+int model_addQuad(Model& model, int partIndex, int pointA, int pointB, int pointC, int pointD) {
+	MUST_EXIST(model,model_addQuad);
+	return model->addPolygon(Polygon(pointA, pointB, pointC, pointD), partIndex);
+}
+
+int model_getNumberOfPolygons(const Model& model, int partIndex) {
+	MUST_EXIST(model,model_getNumberOfPolygons);
+	return model->getNumberOfPolygons(partIndex);
+}
+
+int model_getPolygonVertexCount(const Model& model, int partIndex, int polygonIndex) {
+	MUST_EXIST(model,model_getPolygonVertexCount);
+	return model->getPolygonVertexCount(partIndex, polygonIndex);
+}
+
+ImageRgbaU8 model_getDiffuseMap(const Model& model, int partIndex) {
+	MUST_EXIST(model,model_getDiffuseMap);
+	return model->getDiffuseMap(partIndex);
+}
+
+// TODO: Change the backend's argument order for partIndex or simply inline all of it's functionality
+void model_setDiffuseMap(Model& model, int partIndex, const ImageRgbaU8 &diffuseMap) {
+	MUST_EXIST(model,model_setDiffuseMap);
+	model->setDiffuseMap(diffuseMap, partIndex);
+}
+
+// TODO: Change the backend's argument order for partIndex or simply inline all of it's functionality
+void model_setDiffuseMapByName(Model& model, int partIndex, ResourcePool &pool, const String &filename) {
+	MUST_EXIST(model,model_setDiffuseMapByName);
+	model->setDiffuseMapByName(pool, filename, partIndex);
+}
+
+ImageRgbaU8 model_getLightMap(Model& model, int partIndex) {
+	MUST_EXIST(model,model_getLightMap);
+	return model->getLightMap(partIndex);
+}
+
+// TODO: Change the backend's argument order for partIndex or simply inline all of it's functionality
+void model_setLightMap(Model& model, int partIndex, const ImageRgbaU8 &lightMap) {
+	MUST_EXIST(model,model_setLightMap);
+	model->setLightMap(lightMap, partIndex);
+}
+
+// TODO: Change the backend's argument order for partIndex or simply inline all of it's functionality
+void model_setLightMapByName(Model& model, int partIndex, ResourcePool &pool, const String &filename) {
+	MUST_EXIST(model,model_setLightMapByName);
+	model->setLightMapByName(pool, filename, partIndex);
+}
+
+// Single-threaded rendering for the simple cases where you just want it to work
+void model_render(const Model& model, const Transform3D &modelToWorldTransform, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer, const Camera &camera) {
+	MUST_EXIST(model,model_render);
+	model->render((CommandQueue*)nullptr, colorBuffer, depthBuffer, modelToWorldTransform, camera);
+}
+void model_renderDepth(const Model& model, const Transform3D &modelToWorldTransform, ImageF32& depthBuffer, const Camera &camera) {
+	MUST_EXIST(model,model_renderDepth);
+	model->renderDepth(depthBuffer, modelToWorldTransform, camera);
+}
+
+// Context for rendering multiple models at the same time for improved speed
+class RendererImpl {
+private:
+	bool receiving = false; // Preventing version dependency by only allowing calls in the expected order
+	ImageRgbaU8 colorBuffer;
+	ImageF32 depthBuffer;
+	CommandQueue commandQueue;
+public:
+	RendererImpl() {}
+	void beginFrame(ImageRgbaU8& colorBuffer, ImageF32& depthBuffer) {
+		if (this->receiving) {
+			throwError("Called renderer_begin on the same renderer twice without ending the previous batch!\n");
+		}
+		this->receiving = true;
+		this->colorBuffer = colorBuffer;
+		this->depthBuffer = depthBuffer;
+	}
+	void giveTask(const Model& model, const Transform3D &modelToWorldTransform, const Camera &camera) {
+		if (!this->receiving) {
+			throwError("Cannot call renderer_giveTask before renderer_begin!\n");
+		}
+		// TODO: Make an algorithm for selecting if the model should be queued as an instance or triangulated at once
+		//       An extra argument may choose to force an instance directly into the command queue
+		//           Because the model is being borrowed for vertex animation
+		//           To prevent the command queue from getting full hold as much as possible in a sorted list of instances
+		//           When the command queue is full, the solid
+		model->render(&this->commandQueue, this->colorBuffer, this->depthBuffer, modelToWorldTransform, camera);
+	}
+	void endFrame() {
+		if (!this->receiving) {
+			throwError("Called renderer_end without renderer_begin!\n");
+		}
+		this->receiving = false;
+		if (image_exists(this->colorBuffer)) {
+			this->commandQueue.execute(IRect::FromSize(image_getWidth(this->colorBuffer), image_getHeight(this->colorBuffer)));
+		} else if (image_exists(this->depthBuffer)) {
+			this->commandQueue.execute(IRect::FromSize(image_getWidth(this->depthBuffer), image_getHeight(this->depthBuffer)));
+		}
+		this->commandQueue.clear();
+	}
+};
+
+Renderer renderer_create() {
+	return std::make_shared<RendererImpl>();
+}
+
+void renderer_begin(Renderer& renderer, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer) {
+	MUST_EXIST(renderer,renderer_begin);
+	renderer->beginFrame(colorBuffer, depthBuffer);
+}
+
+// TODO: Synchronous setting
+//       * Asynchronous (default)
+//         Only works on models that are locked from further editing
+//         Locked models can also be safely pooled for reuse then (ResourcePool)
+//       * Synced (for animation)
+//         Dispatch triangles directly to the command queue so that the current state of the model is captured
+//         This allow rendering many instances using the same model at different times
+//         Enabling vertex light, reflection maps and bone animation
+void renderer_giveTask(Renderer& renderer, const Model& model, const Transform3D &modelToWorldTransform, const Camera &camera) {
+	MUST_EXIST(renderer,renderer_giveTask);
+	renderer->giveTask(model, modelToWorldTransform, camera);
+}
+
+void renderer_end(Renderer& renderer) {
+	MUST_EXIST(renderer,renderer_end);
+	renderer->endFrame();
+}
+
+}
+

+ 118 - 0
Source/DFPSR/api/modelAPI.h

@@ -0,0 +1,118 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_MODEL
+#define DFPSR_API_MODEL
+
+#include "types.h"
+#include "../math/FVector.h"
+
+// TODO: How should these be exposed to the caller?
+#include "../render/Camera.h"
+#include "../render/ResourcePool.h"
+
+namespace dsr {
+
+	// TODO: Document the API
+
+	// Construction
+	Model model_create();
+	Model model_clone(const Model& model);
+
+	// Whole model
+	void model_setFilter(const Model& model, Filter filter);
+	Filter model_getFilter(const Model& model);
+	bool model_exists(const Model& model);
+
+	// Part
+	int model_addEmptyPart(Model& model, const String &name);
+	int model_getNumberOfParts(const Model& model);
+	void model_setPartName(Model& model, int partIndex, const String &name);
+	String model_getPartName(const Model& model, int partIndex);
+
+	// Point
+	int model_getNumberOfPoints(const Model& model);
+	FVector3D model_getPoint(const Model& model, int pointIndex);
+	void model_setPoint(Model& model, int pointIndex, const FVector3D& position);
+	int model_findPoint(const Model& model, const FVector3D &position, float treshold);
+	int model_addPoint(const Model& model, const FVector3D &position);
+	int model_addPointIfNeeded(Model& model, const FVector3D &position, float treshold);
+
+	// Vertex
+	int model_getVertexPointIndex(const Model& model, int partIndex, int polygonIndex, int vertexIndex);
+	void model_setVertexPointIndex(Model& model, int partIndex, int polygonIndex, int vertexIndex, int pointIndex);
+	FVector3D model_getVertexPosition(const Model& model, int partIndex, int polygonIndex, int vertexIndex);
+	FVector4D model_getVertexColor(const Model& model, int partIndex, int polygonIndex, int vertexIndex);
+	void model_setVertexColor(Model& model, int partIndex, int polygonIndex, int vertexIndex, const FVector4D& color);
+	FVector4D model_getTexCoord(const Model& model, int partIndex, int polygonIndex, int vertexIndex);
+	void model_setTexCoord(Model& model, int partIndex, int polygonIndex, int vertexIndex, const FVector4D& texCoord);
+
+	// Polygon
+	int model_addTriangle(Model& model, int partIndex, int pointA, int pointB, int pointC);
+	int model_addQuad(Model& model, int partIndex, int pointA, int pointB, int pointC, int pointD);
+	int model_getNumberOfPolygons(const Model& model, int partIndex);
+	int model_getPolygonVertexCount(const Model& model, int partIndex, int polygonIndex);
+
+	// Texture
+	ImageRgbaU8 model_getDiffuseMap(const Model& model, int partIndex);
+	void model_setDiffuseMap(Model& model, int partIndex, const ImageRgbaU8 &diffuseMap);
+	void model_setDiffuseMapByName(Model& model, int partIndex, ResourcePool &pool, const String &filename);
+	ImageRgbaU8 model_getLightMap(Model& model, int partIndex);
+	void model_setLightMap(Model& model, int partIndex, const ImageRgbaU8 &lightMap);
+	void model_setLightMapByName(Model& model, int partIndex, ResourcePool &pool, const String &filename);
+
+	// Single-threaded rendering
+	//   Can be executed on different threads if targetImage and depthBuffer doesn't have overlapping memory lines between the threads
+	void model_render(const Model& model, const Transform3D &modelToWorldTransform, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer, const Camera &camera);
+	// Simpler rendering without colorBuffer, for shadows and other depth effects
+	//   Equivalent to model_render with a non-existing colorBuffer and filter forced to solid.
+	//   Skip this call conditionally for filtered models (using model_getFilter) if you want full equivalence with model_render.
+	void model_renderDepth(const Model& model, const Transform3D &modelToWorldTransform, ImageF32& depthBuffer, const Camera &camera);
+
+	// Returns a new rendering context
+	//   After creating a renderer, you may execute a number of batches using it
+	//   Each batch may execute a number of tasks in parallel
+	//   Call pattern:
+	//     create (begin giveTask* end)*
+	Renderer renderer_create();
+	// Begin rendering to target color and depth buffers of the same dimensions
+	void renderer_begin(Renderer& renderer, ImageRgbaU8& colorBuffer, ImageF32& depthBuffer);
+	// Once an object passed game-specific occlusion tests, give it to the renderer using renderer_giveTask
+	// The render job will be performed during the next call to renderer_execute
+	void renderer_giveTask(Renderer& renderer, const Model& model, const Transform3D &modelToWorldTransform, const Camera &camera);
+	// Finish all the jobs in the rendering context
+	void renderer_end(Renderer& renderer);
+
+	// How to import from the DMF1 format:
+	//   * Only use M_Diffuse_0Tex, M_Diffuse_1Tex or M_Diffuse_2Tex as shaders.
+	//       Place any diffuse texture in texture slot 0 and any lightmap in slot 1.
+	//       Remove any textures that are not used by the shaders.
+	//       The fixed pipeline only checks which textures are used.
+	//   * Make sure that texture names are spelled case sensitive or they might not be found on some operating systems like Linux.
+	//   See dmf1.cpp for the implementation
+	Model importFromContent_DMF1(const String &fileContent, ResourcePool &pool, int detailLevel = 2);
+
+}
+
+#endif
+

+ 54 - 0
Source/DFPSR/api/timeAPI.cpp

@@ -0,0 +1,54 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "timeAPI.h"
+#include <chrono>
+#include <thread>
+#include <stdint.h>
+
+static bool started = false;
+static std::chrono::time_point<std::chrono::steady_clock> startTime;
+
+double dsr::time_getSeconds() {
+	if (!started) {
+		started = true;
+		startTime = std::chrono::steady_clock::now();
+		return 0.0;
+	} else {
+		auto currentTime = std::chrono::steady_clock::now();
+		std::chrono::duration<double> diff = currentTime - startTime;
+		return diff.count();
+	}
+}
+
+void dsr::time_sleepSeconds(double seconds) {
+	// Skips instantly if there's no delay
+	if (seconds > 0.0) {
+		// Limits the time
+		if (seconds > 9000000000000.0) {
+			seconds = 9000000000000.0;
+		}
+		// The seconds are converted into integer microseconds without any risk of overflow
+		std::this_thread::sleep_for(std::chrono::microseconds((int64_t)(seconds * 1000000.0)));
+	}
+}

+ 37 - 0
Source/DFPSR/api/timeAPI.h

@@ -0,0 +1,37 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_TIME
+#define DFPSR_API_TIME
+
+namespace dsr {
+	// Returns the time in seconds since the first call.
+	double time_getSeconds();
+	// Sleeps for a number of seconds to save power and prevent overheating.
+	//   For example, time_sleepSeconds(0.001) will sleep for a millisecond.
+	//   Limits the time to a maximum of nine trillion seconds. (285192 years)
+	void time_sleepSeconds(double seconds);
+}
+
+#endif
+

+ 55 - 0
Source/DFPSR/api/types.cpp

@@ -0,0 +1,55 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#define DFPSR_INTERNAL_ACCESS
+
+#include "types.h"
+#include "../image/Image.h"
+#include "../image/ImageU8.h"
+#include "../image/ImageU16.h"
+#include "../image/ImageF32.h"
+#include "../image/ImageRgbaU8.h"
+#include "../image/PackOrder.h"
+
+using namespace dsr;
+
+// Null
+ImageU8::ImageU8() {}
+ImageU16::ImageU16() {}
+ImageF32::ImageF32() {}
+ImageRgbaU8::ImageRgbaU8() {}
+MediaMachine::MediaMachine() {}
+
+// Existing shared pointer
+ImageU8::ImageU8(const std::shared_ptr<ImageU8Impl>& image) : std::shared_ptr<ImageU8Impl>(image) {}
+ImageU16::ImageU16(const std::shared_ptr<ImageU16Impl>& image) : std::shared_ptr<ImageU16Impl>(image) {}
+ImageF32::ImageF32(const std::shared_ptr<ImageF32Impl>& image) : std::shared_ptr<ImageF32Impl>(image) {}
+ImageRgbaU8::ImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : std::shared_ptr<ImageRgbaU8Impl>(image) {}
+MediaMachine::MediaMachine(const std::shared_ptr<VirtualMachine>& machine) : std::shared_ptr<VirtualMachine>(machine) {}
+
+// Shallow copy
+ImageU8::ImageU8(const ImageU8Impl& image) : std::shared_ptr<ImageU8Impl>(std::make_shared<ImageU8Impl>(image)) {}
+ImageU16::ImageU16(const ImageU16Impl& image) : std::shared_ptr<ImageU16Impl>(std::make_shared<ImageU16Impl>(image)) {}
+ImageF32::ImageF32(const ImageF32Impl& image) : std::shared_ptr<ImageF32Impl>(std::make_shared<ImageF32Impl>(image)) {}
+ImageRgbaU8::ImageRgbaU8(const ImageRgbaU8Impl& image) : std::shared_ptr<ImageRgbaU8Impl>(std::make_shared<ImageRgbaU8Impl>(image)) {}

+ 201 - 0
Source/DFPSR/api/types.h

@@ -0,0 +1,201 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_API_TYPES
+#define DFPSR_API_TYPES
+
+#include <stdint.h>
+#include <memory>
+#include "../image/Color.h"
+#include "../math/IRect.h"
+#include "../base/text.h"
+
+#ifdef DFPSR_INTERNAL_ACCESS
+	#define IMPL_ACCESS public
+#else
+	#define IMPL_ACCESS protected
+#endif
+
+namespace dsr {
+
+enum class PackOrderIndex {
+	RGBA, // Windows
+	BGRA, // Ubuntu
+	ARGB,
+	ABGR
+};
+
+enum class Sampler {
+	Nearest,
+	Linear
+};
+
+enum class ReturnCode {
+	Good,
+	KeyNotFound,
+	ParsingFailure
+};
+
+// A handle to a model.
+class ModelImpl;
+using Model = std::shared_ptr<ModelImpl>;
+
+// A handle to a multi-threaded rendering context.
+class RendererImpl;
+using Renderer = std::shared_ptr<RendererImpl>;
+
+// A handle to a window.
+//  The Window wraps itself around native window backends to abstract away platform specific details.
+//  It also makes it easy to load and use a graphical interface using the optional component system.
+class DsrWindow;
+using Window = std::shared_ptr<DsrWindow>;
+
+// A handle to a GUI component.
+//   Components are an abstraction for graphical user interfaces, which might not always be powerful enough.
+//   * If you're making something advanced that components cannot do,
+//     you can also use draw calls and input events directly against the window without using Component.
+class VisualComponent;
+using Component = std::shared_ptr<VisualComponent>;
+
+// A handle to a GUI theme.
+//   Themes describes the visual appearance of an interface.
+//   By having more than one theme for your interface, you can let the user select one.
+class VisualThemeImpl;
+using VisualTheme = std::shared_ptr<VisualThemeImpl>;
+
+// A handle to a media machine.
+//   Media machines can be used to generate, filter and analyze images.
+//   Everything running in a media machine is guaranteed to be 100% deterministic to the last bit.
+//     This reduces the amount of code where maintenance has to be performed during porting.
+//     It also means that any use of float or double is forbidden.
+class VirtualMachine;
+struct MediaMachine : IMPL_ACCESS std::shared_ptr<VirtualMachine> {
+	MediaMachine(); // Defaults to null
+IMPL_ACCESS:
+	explicit MediaMachine(const std::shared_ptr<VirtualMachine>& machine);
+};
+
+// Images
+// Points to a buffer region holding at least height * stride bytes.
+// Each row contains:
+//   * A number of visible pixels
+//   * A number of unused bytes
+//     New or cloned images have their stride aligned to 16-bytes
+//       Stride is the number of bytes from the start of one row to the next
+//     Sub-images have the same stride and buffer as their parent
+//       Some unused pixels may be visible somewhere else
+
+// 8-bit unsigned integer grayscale image
+class ImageU8Impl;
+struct ImageU8 : IMPL_ACCESS std::shared_ptr<ImageU8Impl> {
+	ImageU8(); // Defaults to null
+IMPL_ACCESS:
+	explicit ImageU8(const std::shared_ptr<ImageU8Impl>& image);
+	explicit ImageU8(const ImageU8Impl& image);
+};
+// Invariant:
+//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 16 pixels)
+//      This allow reading a full SIMD vector at each row's end without violating memory bounds
+//    * No other image can displays pixels from its padding
+//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
+struct AlignedImageU8 : public ImageU8 {
+	AlignedImageU8() {} // Defaults to null
+IMPL_ACCESS:
+	explicit AlignedImageU8(const std::shared_ptr<ImageU8Impl>& image) : ImageU8(image) {}
+	explicit AlignedImageU8(const ImageU8Impl& image) : ImageU8(image) {}
+};
+
+// 16-bit unsigned integer grayscale image
+class ImageU16Impl;
+struct ImageU16 : IMPL_ACCESS std::shared_ptr<ImageU16Impl> {
+	ImageU16(); // Defaults to null
+IMPL_ACCESS:
+	explicit ImageU16(const std::shared_ptr<ImageU16Impl>& image);
+	explicit ImageU16(const ImageU16Impl& image);
+};
+// Invariant:
+//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 16 pixels)
+//      This allow reading a full SIMD vector at each row's end without violating memory bounds
+//    * No other image can displays pixels from its padding
+//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
+struct AlignedImageU16 : public ImageU16 {
+	AlignedImageU16() {} // Defaults to null
+IMPL_ACCESS:
+	explicit AlignedImageU16(const std::shared_ptr<ImageU16Impl>& image) : ImageU16(image) {}
+	explicit AlignedImageU16(const ImageU16Impl& image) : ImageU16(image) {}
+};
+
+// 32-bit floating-point grayscale image
+class ImageF32Impl;
+struct ImageF32 : IMPL_ACCESS std::shared_ptr<ImageF32Impl> {
+	ImageF32(); // Defaults to null
+IMPL_ACCESS:
+	explicit ImageF32(const std::shared_ptr<ImageF32Impl>& image);
+	explicit ImageF32(const ImageF32Impl& image);
+};
+// Invariant:
+//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 4 pixels)
+//      This allow reading a full SIMD vector at each row's end without violating memory bounds
+//    * No other image can displays pixels from its padding
+//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
+struct AlignedImageF32 : public ImageF32 {
+	AlignedImageF32() {} // Defaults to null
+IMPL_ACCESS:
+	explicit AlignedImageF32(const std::shared_ptr<ImageF32Impl>& image) : ImageF32(image) {}
+	explicit AlignedImageF32(const ImageF32Impl& image) : ImageF32(image) {}
+};
+
+// 4x8-bit unsigned integer RGBA color image
+class ImageRgbaU8Impl;
+struct ImageRgbaU8 : IMPL_ACCESS std::shared_ptr<ImageRgbaU8Impl> {
+	ImageRgbaU8(); // Defaults to null
+IMPL_ACCESS:
+	explicit ImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image);
+	explicit ImageRgbaU8(const ImageRgbaU8Impl& image);
+};
+// Invariant:
+//    * Each row's start and stride is aligned with 16-bytes in memory (16-byte = 4 pixels)
+//      This allow reading a full SIMD vector at each row's end without violating memory bounds
+//    * No other image can displays pixels from its padding
+//      This allow writing a full SIMD vector at each row's end without making visible changes outside of the bound
+struct AlignedImageRgbaU8 : public ImageRgbaU8 {
+	AlignedImageRgbaU8() {} // Defaults to null
+IMPL_ACCESS:
+	explicit AlignedImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : ImageRgbaU8(image) {}
+	explicit AlignedImageRgbaU8(const ImageRgbaU8Impl& image) : ImageRgbaU8(image) {}
+};
+// Invariant:
+//    * Using the default RGBA pack order
+//      This removes the need to implement filters for different pack orders when RGBA can be safely assumed
+//      Just use AlignedImageRgbaU8 if channels don't have to be aligned
+struct OrderedImageRgbaU8 : public AlignedImageRgbaU8 {
+	OrderedImageRgbaU8() {} // Defaults to null
+IMPL_ACCESS:
+	explicit OrderedImageRgbaU8(const std::shared_ptr<ImageRgbaU8Impl>& image) : AlignedImageRgbaU8(image) {}
+	explicit OrderedImageRgbaU8(const ImageRgbaU8Impl& image) : AlignedImageRgbaU8(image) {}
+};
+
+}
+
+#endif

+ 88 - 0
Source/DFPSR/base/Buffer.cpp

@@ -0,0 +1,88 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "Buffer.h"
+#include "../math/scalar.h"
+
+using namespace dsr;
+
+// buffer_alignment must be a power of two for buffer_alignment_mask to work
+static const int buffer_alignment = 16;
+static const uintptr_t buffer_alignment_mask = ~((uintptr_t)(buffer_alignment - 1));
+
+// If this C++ version additionally includes the C11 features then we may assume that aligned_alloc is available
+#ifdef _ISOC11_SOURCE
+	// Allocate data of newSize and write the corresponding destructor function to targetDestructor
+	static uint8_t* buffer_allocate(int32_t newSize, std::function<void(uint8_t *)>& targetDestructor) {
+		uint8_t* allocation = (uint8_t*)aligned_alloc(buffer_alignment, newSize);
+		targetDestructor = [](uint8_t *data) { free(data); };
+		return allocation;
+	}
+#else
+	// Allocate data of newSize and write the corresponding destructor function to targetDestructor
+	static uint8_t* buffer_allocate(int32_t newSize, std::function<void(uint8_t *)>& targetDestructor) {
+		uintptr_t padding = buffer_alignment - 1;
+		uint8_t* allocation = (uint8_t*)malloc(newSize + padding);
+		uint8_t* aligned = (uint8_t*)(((uintptr_t)allocation + padding) & buffer_alignment_mask);
+		uintptr_t offset = allocation - aligned;
+		targetDestructor = [offset](uint8_t *data) { free(data - offset); };
+		return aligned;
+	}
+#endif
+
+Buffer::Buffer(int32_t newSize) :
+  size(newSize),
+  bufferSize(roundUp(newSize, buffer_alignment)) {
+	this->data = buffer_allocate(this->bufferSize, this->destructor);
+	this->set(0);
+}
+
+Buffer::Buffer(int32_t newSize, uint8_t *newData)
+: size(newSize), bufferSize(newSize), data(newData), destructor([](uint8_t *data) { free(data); }) {}
+
+Buffer::~Buffer() {
+	this->destructor(this->data);
+}
+
+void Buffer::replaceDestructor(const std::function<void(uint8_t *)>& newDestructor) {
+	this->destructor = newDestructor;
+}
+
+void Buffer::set(uint8_t value) {
+	memset(this->data, value, this->bufferSize);
+}
+
+std::shared_ptr<Buffer> Buffer::clone() const {
+	std::shared_ptr<Buffer> newBuffer = std::make_shared<Buffer>(this->size);
+	memcpy(newBuffer->data, this->data, this->size);
+	return newBuffer;
+}
+
+std::shared_ptr<Buffer> Buffer::create(int32_t newSize) {
+	return std::make_shared<Buffer>(newSize);
+}
+
+std::shared_ptr<Buffer> Buffer::create(int32_t newSize, uint8_t *newData) {
+	return std::make_shared<Buffer>(newSize, newData);
+}
+

+ 77 - 0
Source/DFPSR/base/Buffer.h

@@ -0,0 +1,77 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_BUFFER
+#define DFPSR_BUFFER
+
+#include <stdint.h>
+#include <memory>
+#include <functional>
+#include "SafePointer.h"
+
+namespace dsr {
+
+class Buffer {
+public:
+	const int32_t size; // The actually used data
+	const int32_t bufferSize; // The accessible data
+private:
+	uint8_t *data;
+	std::function<void(uint8_t *)> destructor;
+public:
+	explicit Buffer(int32_t newSize);
+	Buffer(int32_t newSize, uint8_t *newData);
+	~Buffer();
+public:
+	void replaceDestructor(const std::function<void(uint8_t *)>& newDestructor);
+	void set(uint8_t value);
+	uint8_t *getUnsafeData() {
+		return this->data;
+	}
+	// Get the buffer
+	template <typename T>
+	SafePointer<T> getSafeData(const char *name) {
+		return SafePointer<T>(name, (T*)this->data, this->bufferSize, (T*)this->data);
+	}
+	// Get the buffer
+	template <typename T>
+	const SafePointer<T> getSafeData(const char *name) const {
+		return SafePointer<T>(name, (T*)this->data, this->bufferSize, (T*)this->data);
+	}
+	// Get a part of the buffer
+	template <typename T>
+	SafePointer<T> getSafeSlice(const char *name, int offset, int size) {
+		return SafePointer<T>(name, (T*)this->data, this->bufferSize, (T*)this->data).slice(name, offset, size);
+	}
+	std::shared_ptr<Buffer> clone() const;
+	static std::shared_ptr<Buffer> create(int32_t newSize);
+	static std::shared_ptr<Buffer> create(int32_t newSize, uint8_t *newData);
+	// No implicit copies, only pass by reference or pointer
+	Buffer(const Buffer&) = delete;
+	Buffer& operator=(const Buffer&) = delete;
+};
+
+}
+
+#endif
+

+ 78 - 0
Source/DFPSR/base/SafePointer.cpp

@@ -0,0 +1,78 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "SafePointer.h"
+#include "text.h"
+
+using namespace dsr;
+
+void dsr::assertNonNegativeSize(int size) {
+	if (size < 0) {
+		throwError(U"Negative size of SafePointer!\n");
+	}
+}
+
+void dsr::assertInsideSafePointer(const char* method, const char* name, const uint8_t* pointer, const uint8_t* data, const uint8_t* regionStart, const uint8_t* regionEnd, int claimedSize, int elementSize) {
+	if (pointer < regionStart || pointer + claimedSize > regionEnd) {
+		String message;
+		if (data == nullptr) {
+			string_append(message, U"\n _____________________ SafePointer null exception! _____________________\n");
+		} else {
+			string_append(message, U"\n _________________ SafePointer out of bound exception! _________________\n");
+		}
+		string_append(message, U"/\n");
+		string_append(message, U"|  Name: ", name, U"\n");
+		string_append(message, U"|  Method: ", method, U"\n");
+		string_append(message, U"|  Region: ", (uintptr_t)regionStart, U" to ", (uintptr_t)regionEnd, U"\n");
+		string_append(message, U"|  Region size: ", (intptr_t)(regionEnd - regionStart), U" bytes\n");
+		string_append(message, U"|  Base pointer: ", (uintptr_t)data, U"\n");
+		string_append(message, U"|  Requested pointer: ", (uintptr_t)pointer, U"\n");
+		string_append(message, U"|  Requested size: ", claimedSize, U" bytes\n");
+
+		int startOffset = (int)((intptr_t)pointer - (intptr_t)regionStart);
+		int baseOffset = (int)((intptr_t)pointer - (intptr_t)data);
+
+		// Index relative to allocation start
+		//   regionStart is the start of the accessible memory region
+		if (startOffset != baseOffset) {
+			string_append(message, U"|  Start offset: ", startOffset, U" bytes\n");
+			if (startOffset % elementSize == 0) {
+				int index = startOffset / elementSize;
+				int elementCount = ((int)((intptr_t)regionEnd - (intptr_t)regionStart)) / elementSize;
+				string_append(message, U"|    Start index: ", index, U" [0..", (elementCount - 1), U"]\n");
+			}
+		}
+
+		// Base index relative to the stored pointer within the region
+		//   data is the base of the allocation at index zero
+		string_append(message, U"|  Base offset: ", baseOffset, U" bytes\n");
+		if (baseOffset % elementSize == 0) {
+			int index = baseOffset / elementSize;
+			int elementCount = ((int)((intptr_t)regionEnd - (intptr_t)data)) / elementSize;
+			string_append(message, U"|    Base index: ", index, U" [0..", (elementCount - 1), U"]\n");
+		}
+		string_append(message, U"\\_______________________________________________________________________\n\n");
+		throwError(message);
+	}
+}
+

+ 243 - 0
Source/DFPSR/base/SafePointer.h

@@ -0,0 +1,243 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_SAFE_POINTER
+#define DFPSR_SAFE_POINTER
+
+#include <cstring>
+#include <cassert>
+#include <stdint.h>
+
+// Disabled in release mode
+#ifndef NDEBUG
+	#define SAFE_POINTER_CHECKS
+#endif
+
+namespace dsr {
+
+// Generic implementaions
+void assertInsideSafePointer(const char* method, const char* name, const uint8_t* pointer, const uint8_t* data, const uint8_t* regionStart, const uint8_t* regionEnd, int claimedSize, int elementSize);
+void assertNonNegativeSize(int size);
+
+template<typename T>
+class SafePointer {
+private:
+	// A pointer from regionStart to regionEnd
+	//   Mutable because only the data being pointed to is write protected in a const SafePointer
+	mutable T *data;
+	#ifdef SAFE_POINTER_CHECKS
+		mutable T *regionStart;
+		mutable T *regionEnd;
+		mutable const char * name;
+	#endif
+public:
+	#ifdef SAFE_POINTER_CHECKS
+	SafePointer() : data(nullptr), regionStart(nullptr), regionEnd(nullptr), name("Unnamed null pointer") {}
+	explicit SafePointer(const char* name) : data(nullptr), regionStart(nullptr), regionEnd(nullptr), name(name) {}
+	SafePointer(const char* name, T* regionStart, int regionByteSize = sizeof(T)) : data(regionStart), regionStart(regionStart), regionEnd((T*)(((uint8_t*)regionStart) + (intptr_t)regionByteSize)), name(name) {
+		assertNonNegativeSize(regionByteSize);
+	}
+	SafePointer(const char* name, T* regionStart, int regionByteSize, T* data) : data(data), regionStart(regionStart), regionEnd((T*)(((uint8_t*)regionStart) + (intptr_t)regionByteSize)), name(name) {
+		assertNonNegativeSize(regionByteSize);
+	}
+	#else
+	SafePointer() : data(nullptr) {}
+	explicit SafePointer(const char* name) : data(nullptr) {}
+	SafePointer(const char* name, T* regionStart, int regionByteSize = sizeof(T)) : data(regionStart) {}
+	SafePointer(const char* name, T* regionStart, int regionByteSize, T* data) : data(data) {}
+	#endif
+public:
+	#ifdef SAFE_POINTER_CHECKS
+	inline void assertInside(const char* method, const T* pointer, int size = (int)sizeof(T)) const {
+		assertInsideSafePointer(method, this->name, (const uint8_t*)pointer, (const uint8_t*)this->data, (const uint8_t*)this->regionStart, (const uint8_t*)this->regionEnd, size, sizeof(T));
+	}
+	inline void assertInside(const char* method) const {
+		this->assertInside(method, this->data);
+	}
+	#endif
+public:
+	// Back to unsafe pointer with a clearly visible method name as a warning
+	// The same can be done by mistake using the & operator on a reference
+	// p.getUnsafe() = &(*p) = &(p[0])
+	inline T* getUnsafe() {
+		#ifdef SAFE_POINTER_CHECKS
+		this->assertInside("getUnsafe");
+		#endif
+		return this->data;
+	}
+	inline const T* getUnsafe() const {
+		#ifdef SAFE_POINTER_CHECKS
+		this->assertInside("getUnsafe");
+		#endif
+		return this->data;
+	}
+	// Returns the pointer in modulo byteAlignment
+	// Returns 0 if the pointer is aligned with byteAlignment
+	inline int getAlignmentOffset(int byteAlignment) const {
+		return ((uintptr_t)this->data) % byteAlignment;
+	}
+	inline bool isNull() const {
+		return this->data == nullptr;
+	}
+	inline bool isNotNull() const {
+		return this->data != nullptr;
+	}
+	// Get a new safe pointer from data to data + size
+	inline SafePointer<T> slice(const char* name, int byteOffset, int size) {
+		T *newStart = (T*)(((uint8_t*)(this->data)) + (intptr_t)byteOffset);
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("getSlice", newStart, size);
+		return SafePointer<T>(name, newStart, size);
+		#else
+		return SafePointer<T>(name, newStart);
+		#endif
+	}
+	inline const SafePointer<T> slice(const char* name, int byteOffset, int size) const {
+		T *newStart = (T*)(((uint8_t*)(this->data)) + (intptr_t)byteOffset);
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("getSlice", newStart, size);
+		return SafePointer<T>(name, newStart, size);
+		#else
+		return SafePointer<T>(name, newStart);
+		#endif
+	}
+	// Dereference
+	template <typename S = T>
+	inline S& get() {
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("get", this->data, sizeof(S));
+		#endif
+		return *((S*)this->data);
+	}
+	template <typename S = T>
+	inline const S& get() const {
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("get", this->data, sizeof(S));
+		#endif
+		return *((const S*)this->data);
+	}
+	inline T& operator*() {
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("operator*");
+		#endif
+		return *(this->data);
+	}
+	inline const T& operator*() const {
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("operator*");
+		#endif
+		return *(this->data);
+	}
+	inline T& operator[] (int index) {
+		T* address = this->data + index;
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("operator[]", address);
+		#endif
+		return *address;
+	}
+	inline const T& operator[] (int index) const {
+		T* address = this->data + index;
+		#ifdef SAFE_POINTER_CHECKS
+		assertInside("operator[]", address);
+		#endif
+		return *address;
+	}
+	inline void increaseBytes(intptr_t byteOffset) const {
+		this->data = (T*)(((uint8_t*)(this->data)) + byteOffset);
+	}
+	inline void increaseElements(intptr_t elementOffset) const {
+		this->data += elementOffset;
+	}
+	inline SafePointer<T>& operator+=(intptr_t elementOffset) {
+		this->data += elementOffset;
+		return *this;
+	}
+	inline const SafePointer<T>& operator+=(intptr_t elementOffset) const {
+		this->data += elementOffset;
+		return *this;
+	}
+	inline SafePointer<T>& operator-=(intptr_t elementOffset) {
+		this->data -= elementOffset;
+		return *this;
+	}
+	inline const SafePointer<T>& operator-=(intptr_t elementOffset) const {
+		this->data -= elementOffset;
+		return *this;
+	}
+	inline SafePointer<T> operator+(intptr_t elementOffset) {
+		SafePointer<T> result = *this;
+		result += elementOffset;
+		return result;
+	}
+	inline const SafePointer<T> operator+(intptr_t elementOffset) const {
+		SafePointer<T> result = *this;
+		result += elementOffset;
+		return result;
+	}
+	inline SafePointer<T> operator-(intptr_t elementOffset) {
+		SafePointer<T> result = *this;
+		result -= elementOffset;
+		return result;
+	}
+	inline const SafePointer<T> operator-(intptr_t elementOffset) const {
+		SafePointer<T> result = *this;
+		result -= elementOffset;
+		return result;
+	}
+	inline const SafePointer<T>& operator=(const SafePointer<T>& source) const {
+		this->data = source.data;
+		#ifdef SAFE_POINTER_CHECKS
+			this->regionStart = source.regionStart;
+			this->regionEnd = source.regionEnd;
+			this->name = source.name;
+		#endif
+		return *this;
+	}
+};
+
+template <typename T, typename S>
+inline void safeMemoryCopy(SafePointer<T> target, const SafePointer<S>& source, int byteSize) {
+	#ifdef SAFE_POINTER_CHECKS
+		// Both target and source must be in valid memory
+		target.assertInside("memoryCopy (target)", target.getUnsafe(), (size_t)byteSize);
+		source.assertInside("memoryCopy (source)", source.getUnsafe(), (size_t)byteSize);
+		// memcpy doesn't allow pointer aliasing
+		// TODO: Make a general assertion with the same style as out of bound exceptions
+		assert(((const uint8_t*)target.getUnsafe()) + byteSize < (uint8_t*)source.getUnsafe() || ((const uint8_t*)source.getUnsafe()) + byteSize < (uint8_t*)target.getUnsafe());
+	#endif
+	std::memcpy(target.getUnsafe(), source.getUnsafe(), (size_t)byteSize);
+}
+
+template <typename T>
+inline void safeMemorySet(SafePointer<T>& target, uint8_t value, int byteSize) {
+	#ifdef SAFE_POINTER_CHECKS
+		// Target must be in valid memory
+		target.assertInside("memoryCopy (target)", target.getUnsafe(), byteSize);
+	#endif
+	std::memset(target.getUnsafe(), value, (size_t)byteSize);
+}
+
+}
+
+#endif
+

+ 56 - 0
Source/DFPSR/base/endian.h

@@ -0,0 +1,56 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// Endianness abstraction layer for manipulating byte arrays within unsigned integers
+//   ENDIAN_POS_ADDR
+//     Bit-shift in the positive direction of addresses
+//     Precondition: OFFSET % 8 == 0
+//   ENDIAN_NEG_ADDR
+//     Bit-shift in the negative direction of addresses
+//     Precondition: OFFSET % 8 == 0
+//   ENDIAN32_BYTE_0, A mask from the byte array {255, 0, 0, 0}
+//   ENDIAN32_BYTE_1, A mask from the byte array {0, 255, 0, 0}
+//   ENDIAN32_BYTE_2, A mask from the byte array {0, 0, 255, 0}
+//   ENDIAN32_BYTE_3, A mask from the byte array {0, 0, 0, 255}
+//   The DSR_BIG_ENDIAN flag should be given manually as a compiler argument when compiling for big-endian hardware
+
+#ifndef DFPSR_ENDIAN
+#define DFPSR_ENDIAN
+	#include <stdint.h>
+	// TODO: Detect endianness automatically
+	#ifdef DSR_BIG_ENDIAN
+		// TODO: Not yet tested on a big-endian machine!
+		#define ENDIAN_POS_ADDR(VALUE,OFFSET) ((VALUE) >> (OFFSET))
+		#define ENDIAN_NEG_ADDR(VALUE,OFFSET) ((VALUE) << (OFFSET))
+		#define ENDIAN32_BYTE_0 0xFF000000u
+		static_assert(false, "Big-endian mode has not been officially tested!");
+	#else
+		#define ENDIAN_POS_ADDR(VALUE,OFFSET) ((VALUE) << (OFFSET))
+		#define ENDIAN_NEG_ADDR(VALUE,OFFSET) ((VALUE) >> (OFFSET))
+		#define ENDIAN32_BYTE_0 0x000000FFu
+	#endif
+	#define ENDIAN32_BYTE_1 ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, 8)
+	#define ENDIAN32_BYTE_2 ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, 16)
+	#define ENDIAN32_BYTE_3 ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, 24)
+#endif
+

+ 1624 - 0
Source/DFPSR/base/simd.h

@@ -0,0 +1,1624 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// Hardware abstraction layer for portable SIMD math.
+//    Covers a small intersection of SSE2 and NEON in order to reduce the number
+//    of bugs from multiple implementations when nothing advanced is required.
+
+#ifndef DFPSR_SIMD
+#define DFPSR_SIMD
+	#include <stdint.h>
+	#include <cassert>
+	#include "SafePointer.h"
+	#include "../math/FVector.h"
+	#include "../math/IVector.h"
+	#include "../math/UVector.h"
+	#define ALIGN16  __attribute__((aligned(16)))
+
+	// To allow turning off SIMD intrinsics for testing
+	#ifdef __SSE2__
+		// Comment out this line to test without SSE2
+		#define USE_SSE2
+	#elif __ARM_NEON
+		// Comment out this line to test without NEON
+		#define USE_NEON
+	#endif
+
+	// Everything declared in here handles things specific for SSE.
+	// Direct use of the macros will not provide portability to all hardware.
+	#ifdef USE_SSE2
+		#define USE_BASIC_SIMD
+		#define USE_DIRECT_SIMD_MEMORY_ACCESS
+		#include <emmintrin.h> // SSE2
+
+		#ifdef __AVX2__
+			#include <immintrin.h> // AVX2
+			#define GATHER_U32_AVX2(SOURCE, FOUR_OFFSETS, SCALE) _mm_i32gather_epi32((const int32_t*)(SOURCE), FOUR_OFFSETS, SCALE)
+			// Comment out this line to test without AVX2
+			#define USE_AVX2
+		#endif
+
+		// Vector types
+		#define SIMD_F32x4 __m128
+		#define SIMD_U8x16 __m128i
+		#define SIMD_U16x8 __m128i
+		#define SIMD_U32x4 __m128i
+		#define SIMD_I32x4 __m128i
+
+		// Vector uploads in address order
+		#define LOAD_VECTOR_F32_SIMD(A, B, C, D) _mm_set_ps(D, C, B, A)
+		#define LOAD_SCALAR_F32_SIMD(A) _mm_set1_ps(A)
+		#define LOAD_VECTOR_U8_SIMD(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) _mm_set_epi8(P, O, N, M, L, K, J, I, H, G, F, E, D, C, B, A)
+		#define LOAD_SCALAR_U8_SIMD(A) _mm_set1_epi8(A)
+		#define LOAD_VECTOR_U16_SIMD(A, B, C, D, E, F, G, H) _mm_set_epi16(H, G, F, E, D, C, B, A)
+		#define LOAD_SCALAR_U16_SIMD(A) _mm_set1_epi16(A)
+		#define LOAD_VECTOR_U32_SIMD(A, B, C, D) _mm_set_epi32(D, C, B, A)
+		#define LOAD_SCALAR_U32_SIMD(A) _mm_set1_epi32(A)
+		#define LOAD_VECTOR_I32_SIMD(A, B, C, D) _mm_set_epi32(D, C, B, A)
+		#define LOAD_SCALAR_I32_SIMD(A) _mm_set1_epi32(A)
+
+		// Conversions
+		#define F32_TO_I32_SIMD(A) _mm_cvttps_epi32(A)
+		#define F32_TO_U32_SIMD(A) _mm_cvttps_epi32(A)
+		#define I32_TO_F32_SIMD(A) _mm_cvtepi32_ps(A)
+		#define U32_TO_F32_SIMD(A) _mm_cvtepi32_ps(A)
+
+		// Unpacking conversions
+		#define U8_LOW_TO_U16_SIMD(A) _mm_unpacklo_epi8(A, _mm_set1_epi8(0))
+		#define U8_HIGH_TO_U16_SIMD(A) _mm_unpackhi_epi8(A, _mm_set1_epi8(0))
+		#define U16_LOW_TO_U32_SIMD(A) _mm_unpacklo_epi16(A, _mm_set1_epi16(0))
+		#define U16_HIGH_TO_U32_SIMD(A) _mm_unpackhi_epi16(A, _mm_set1_epi16(0))
+
+		// Saturated packing
+		//   Credit: Using ideas from Victoria Zhislina's NEON_2_SSE.h header from the Intel corporation, but not trying to emulate NEON
+		inline SIMD_U8x16 PACK_SAT_U16_TO_U8(const SIMD_U16x8& a, const SIMD_U16x8& b) {
+			SIMD_U16x8 mask, a2, b2;
+			mask = _mm_set1_epi16(0x7fff);
+			a2 = _mm_and_si128(a, mask);
+			a2 = _mm_or_si128(a2, _mm_and_si128(_mm_cmpgt_epi16(a2, a), mask));
+			b2 = _mm_and_si128(b, mask);
+			b2 = _mm_or_si128(b2, _mm_and_si128(_mm_cmpgt_epi16(b2, b), mask));
+			return _mm_packus_epi16(a2, b2);
+		}
+
+		// Reinterpret casting
+		#define REINTERPRET_U32_TO_U8_SIMD(A) (A)
+		#define REINTERPRET_U32_TO_U16_SIMD(A) (A)
+		#define REINTERPRET_U8_TO_U32_SIMD(A) (A)
+		#define REINTERPRET_U16_TO_U32_SIMD(A) (A)
+		#define REINTERPRET_U32_TO_I32_SIMD(A) (A)
+		#define REINTERPRET_I32_TO_U32_SIMD(A) (A)
+
+		// Vector float operations returning SIMD_F32x4
+		#define ADD_F32_SIMD(A, B) _mm_add_ps(A, B)
+		#define SUB_F32_SIMD(A, B) _mm_sub_ps(A, B)
+		#define MUL_F32_SIMD(A, B) _mm_mul_ps(A, B)
+
+		// Vector integer operations returning SIMD_I32x4
+		#define ADD_I32_SIMD(A, B) _mm_add_epi32(A, B)
+		#define SUB_I32_SIMD(A, B) _mm_sub_epi32(A, B)
+		// 32-bit integer multiplications are not available on SSE2.
+
+		// Vector integer operations returning SIMD_U32x4
+		#define ADD_U32_SIMD(A, B) _mm_add_epi32(A, B)
+		#define SUB_U32_SIMD(A, B) _mm_sub_epi32(A, B)
+		// 32-bit integer multiplications are not available on SSE2.
+
+		// Vector integer operations returning SIMD_U16x8
+		#define ADD_U16_SIMD(A, B) _mm_add_epi16(A, B)
+		#define SUB_U16_SIMD(A, B) _mm_sub_epi16(A, B)
+		#define MUL_U16_SIMD(A, B) _mm_mullo_epi16(A, B)
+
+		// Vector integer operations returning SIMD_U8x16
+		#define ADD_U8_SIMD(A, B) _mm_add_epi8(A, B)
+		#define ADD_SAT_U8_SIMD(A, B) _mm_adds_epu8(A, B) // Saturated addition
+		#define SUB_U8_SIMD(A, B) _mm_sub_epi8(A, B)
+		// No 8-bit multiplications
+
+		// Statistics
+		#define MIN_F32_SIMD(A, B) _mm_min_ps(A, B)
+		#define MAX_F32_SIMD(A, B) _mm_max_ps(A, B)
+
+		// Bitwise
+		#define BITWISE_AND_U32_SIMD(A, B) _mm_and_si128(A, B)
+		#define BITWISE_OR_U32_SIMD(A, B) _mm_or_si128(A, B)
+	#endif
+
+	// Everything declared in here handles things specific for NEON.
+	// Direct use of the macros will not provide portability to all hardware.
+	#ifdef USE_NEON
+		#define USE_BASIC_SIMD
+		#include <arm_neon.h> // NEON
+
+		// Vector types
+		#define SIMD_F32x4 float32x4_t
+		#define SIMD_U8x16 uint8x16_t
+		#define SIMD_U16x8 uint16x8_t
+		#define SIMD_U32x4 uint32x4_t
+		#define SIMD_I32x4 int32x4_t
+
+		// Vector uploads in address order
+		inline SIMD_F32x4 LOAD_VECTOR_F32_SIMD(float a, float b, float c, float d) {
+			float data[4] ALIGN16 = {a, b, c, d};
+			return vld1q_f32(data);
+		}
+		inline SIMD_F32x4 LOAD_SCALAR_F32_SIMD(float a) {
+			return vdupq_n_f32(a);
+		}
+		inline SIMD_U8x16 LOAD_VECTOR_U8_SIMD(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h,
+		                                      uint8_t i, uint8_t j, uint8_t k, uint8_t l, uint8_t m, uint8_t n, uint8_t o, uint8_t p) {
+			uint8_t data[16] ALIGN16 = {a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p};
+			return vld1q_u8(data);
+		}
+		inline SIMD_U16x8 LOAD_SCALAR_U8_SIMD(uint16_t a) {
+			return vdupq_n_u8(a);
+		}
+		inline SIMD_U16x8 LOAD_VECTOR_U16_SIMD(uint16_t a, uint16_t b, uint16_t c, uint16_t d, uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
+			uint16_t data[8] ALIGN16 = {a, b, c, d, e, f, g, h};
+			return vld1q_u16(data);
+		}
+		inline SIMD_U16x8 LOAD_SCALAR_U16_SIMD(uint16_t a) {
+			return vdupq_n_u16(a);
+		}
+		inline SIMD_U32x4 LOAD_VECTOR_U32_SIMD(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+			uint32_t data[4] ALIGN16 = {a, b, c, d};
+			return vld1q_u32(data);
+		}
+		inline SIMD_U32x4 LOAD_SCALAR_U32_SIMD(uint32_t a) {
+			return vdupq_n_u32(a);
+		}
+		inline SIMD_I32x4 LOAD_VECTOR_I32_SIMD(int32_t a, int32_t b, int32_t c, int32_t d) {
+			int32_t data[4] ALIGN16 = {a, b, c, d};
+			return vld1q_s32(data);
+		}
+		inline SIMD_I32x4 LOAD_SCALAR_I32_SIMD(int32_t a) {
+			return vdupq_n_s32(a);
+		}
+
+		// Conversions
+		#define F32_TO_I32_SIMD(A) vcvtq_s32_f32(A)
+		#define F32_TO_U32_SIMD(A) vcvtq_u32_f32(A)
+		#define I32_TO_F32_SIMD(A) vcvtq_f32_s32(A)
+		#define U32_TO_F32_SIMD(A) vcvtq_f32_u32(A)
+
+		// Unpacking conversions
+		#define U8_LOW_TO_U16_SIMD(A) vmovl_u8(vget_low_u8(A))
+		#define U8_HIGH_TO_U16_SIMD(A) vmovl_u8(vget_high_u8(A))
+		#define U16_LOW_TO_U32_SIMD(A) vmovl_u16(vget_low_u16(A))
+		#define U16_HIGH_TO_U32_SIMD(A) vmovl_u16(vget_high_u16(A))
+
+		// Saturated packing
+		#define PACK_SAT_U16_TO_U8(A, B) vcombine_u8(vqmovn_u16(A), vqmovn_u16(B))
+
+		// Reinterpret casting
+		#define REINTERPRET_U32_TO_U8_SIMD(A) vreinterpretq_u8_u32(A)
+		#define REINTERPRET_U32_TO_U16_SIMD(A) vreinterpretq_u16_u32(A)
+		#define REINTERPRET_U8_TO_U32_SIMD(A) vreinterpretq_u32_u8(A)
+		#define REINTERPRET_U16_TO_U32_SIMD(A) vreinterpretq_u32_u16(A)
+		#define REINTERPRET_U32_TO_I32_SIMD(A) vreinterpretq_s32_u32(A)
+		#define REINTERPRET_I32_TO_U32_SIMD(A) vreinterpretq_u32_s32(A)
+
+		// Vector float operations returning SIMD_F32x4
+		#define ADD_F32_SIMD(A, B) vaddq_f32(A, B)
+		#define SUB_F32_SIMD(A, B) vsubq_f32(A, B)
+		#define MUL_F32_SIMD(A, B) vmulq_f32(A, B)
+
+		// Vector integer operations returning SIMD_I32x4
+		#define ADD_I32_SIMD(A, B) vaddq_s32(A, B)
+		#define SUB_I32_SIMD(A, B) vsubq_s32(A, B)
+		#define MUL_I32_NEON(A, B) vmulq_s32(A, B)
+
+		// Vector integer operations returning SIMD_U32x4
+		#define ADD_U32_SIMD(A, B) vaddq_u32(A, B)
+		#define SUB_U32_SIMD(A, B) vsubq_u32(A, B)
+		#define MUL_U32_NEON(A, B) vmulq_u32(A, B)
+
+		// Vector integer operations returning SIMD_U16x8
+		#define ADD_U16_SIMD(A, B) vaddq_u16(A, B)
+		#define SUB_U16_SIMD(A, B) vsubq_u16(A, B)
+		#define MUL_U16_SIMD(A, B) vmulq_u16(A, B)
+
+		// Vector integer operations returning SIMD_U8x16
+		#define ADD_U8_SIMD(A, B) vaddq_u8(A, B)
+		#define ADD_SAT_U8_SIMD(A, B) vqaddq_u8(A, B) // Saturated addition
+		#define SUB_U8_SIMD(A, B) vsubq_u8(A, B)
+		// No 8-bit multiplications
+
+		// Statistics
+		#define MIN_F32_SIMD(A, B) vminq_f32(A, B)
+		#define MAX_F32_SIMD(A, B) vmaxq_f32(A, B)
+
+		// Bitwise
+		#define BITWISE_AND_U32_SIMD(A, B) vandq_u32(A, B)
+		#define BITWISE_OR_U32_SIMD(A, B) vorrq_u32(A, B)
+	#endif
+
+	/*
+	The vector types (F32x4, I32x4, U32x4, U16x8) below are supposed to be portable across different CPU architectures.
+	When this abstraction layer is mixed with handwritten SIMD intrinsics:
+		Use "USE_SSE2" instead of "__SSE2__"
+		Use "USE_AVX2" instead of "__AVX2__"
+		Use "USE_NEON" instead of "__ARM_NEON"
+	Portability exceptions:
+		* The "v" variable is the native backend, which is only defined when SIMD is supported by hardware.
+			Only use when USE_BASIC_SIMD is defined.
+			Will not work on scalar emulation.
+		* The "shared_memory" array is only defined for targets with direct access to SIMD registers. (SSE)
+			Only use when USE_DIRECT_SIMD_MEMORY_ACCESS is defined.
+			Will not work on NEON or scalar emulation.
+		* The "emulated" array is ony defined when SIMD is turned off.
+			Cannot be used when USE_BASIC_SIMD is defined.
+			Will not work when either SSE or NEON is enabled.
+	*/
+
+	union F32x4 {
+		#ifdef USE_BASIC_SIMD
+			public:
+			#ifdef USE_DIRECT_SIMD_MEMORY_ACCESS
+				// Only use if USE_DIRECT_SIMD_MEMORY_ACCESS is defined!
+				// Direct access cannot be done on NEON!
+				float shared_memory[4];
+			#endif
+			// The SIMD vector of undefined type
+			// Not accessible while emulating!
+			SIMD_F32x4 v;
+			// Construct a portable vector from a native SIMD vector
+			explicit F32x4(const SIMD_F32x4& v) : v(v) {}
+			// Construct a portable vector from a set of scalars
+			F32x4(float a1, float a2, float a3, float a4) : v(LOAD_VECTOR_F32_SIMD(a1, a2, a3, a4)) {}
+			// Construct a portable vector from a single duplicated scalar
+			explicit F32x4(float scalar) : v(LOAD_SCALAR_F32_SIMD(scalar)) {}
+		#else
+			public:
+			// Emulate a SIMD vector as an array of scalars without hardware support
+			// Only accessible while emulating!
+			float emulated[4];
+			// Construct a portable vector from a set of scalars
+			F32x4(float a1, float a2, float a3, float a4) {
+				this->emulated[0] = a1;
+				this->emulated[1] = a2;
+				this->emulated[2] = a3;
+				this->emulated[3] = a4;
+			}
+			// Construct a portable vector from a single duplicated scalar
+			explicit F32x4(float scalar) {
+				this->emulated[0] = scalar;
+				this->emulated[1] = scalar;
+				this->emulated[2] = scalar;
+				this->emulated[3] = scalar;
+			}
+		#endif
+		// Construct a portable SIMD vector from a pointer to aligned data
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		static inline F32x4 readAlignedUnsafe(const float* data) {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					return F32x4(_mm_load_ps(data));
+				#elif USE_NEON
+					return F32x4(vld1q_f32(data));
+				#endif
+			#else
+				return F32x4(data[0], data[1], data[2], data[3]);
+			#endif
+		}
+		// Write to aligned memory from the existing vector
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		inline void writeAlignedUnsafe(float* data) const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					_mm_store_ps(data, this->v);
+				#elif USE_NEON
+					vst1q_f32(data, this->v);
+				#endif
+			#else
+				data[0] = this->emulated[0];
+				data[1] = this->emulated[1];
+				data[2] = this->emulated[2];
+				data[3] = this->emulated[3];
+			#endif
+		}
+		#ifdef DFPSR_GEOMETRY_FVECTOR
+			dsr::FVector4D get() const {
+				float data[4] ALIGN16;
+				this->writeAlignedUnsafe(data);
+				return dsr::FVector4D(data[0], data[1], data[2], data[3]);
+			}
+		#endif
+		// Bound and alignment checked reading
+		static inline F32x4 readAligned(const dsr::SafePointer<float> data, const char* methodName) {
+			const float* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			return F32x4::readAlignedUnsafe(pointer);
+		}
+		// Bound and alignment checked writing
+		inline void writeAligned(dsr::SafePointer<float> data, const char* methodName) const {
+			float* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			this->writeAlignedUnsafe(pointer);
+		}
+		// 1 / x
+		//   Useful for multiple divisions with the same denominator
+		//   Useless if the denominator is a constant
+		F32x4 reciprocal() const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					// Approximate
+					SIMD_F32x4 lowQ = _mm_rcp_ps(this->v);
+					// Refine
+					return F32x4(SUB_F32_SIMD(ADD_F32_SIMD(lowQ, lowQ), MUL_F32_SIMD(this->v, MUL_F32_SIMD(lowQ, lowQ))));
+				#elif USE_NEON
+					// Approximate
+					SIMD_F32x4 result = vrecpeq_f32(this->v);
+					// Refine
+					result = MUL_F32_SIMD(vrecpsq_f32(this->v, result), result);
+					return F32x4(MUL_F32_SIMD(vrecpsq_f32(this->v, result), result));
+				#else
+					assert(false);
+					return F32x4(0);
+				#endif
+			#else
+				return F32x4(1.0f / this->emulated[0], 1.0f / this->emulated[1], 1.0f / this->emulated[2], 1.0f / this->emulated[3]);
+			#endif
+		}
+		// 1 / sqrt(x)
+		//   Useful for normalizing vectors
+		F32x4 reciprocalSquareRoot() const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					//__m128 reciRoot = _mm_rsqrt_ps(this->v);
+					SIMD_F32x4 reciRoot = _mm_rsqrt_ps(this->v);
+					SIMD_F32x4 mul = MUL_F32_SIMD(MUL_F32_SIMD(this->v, reciRoot), reciRoot);
+					reciRoot = MUL_F32_SIMD(MUL_F32_SIMD(LOAD_SCALAR_F32_SIMD(0.5f), reciRoot), SUB_F32_SIMD(LOAD_SCALAR_F32_SIMD(3.0f), mul));
+					return F32x4(reciRoot);
+				#elif USE_NEON
+					// TODO: Test on ARM
+					// Approximate
+					SIMD_F32x4 reciRoot = vrsqrteq_f32(this->v);
+					// Refine
+					reciRoot = MUL_F32_SIMD(vrsqrtsq_f32(MUL_F32_SIMD(this->v, reciRoot), reciRoot), reciRoot);
+					return reciRoot;
+				#else
+					assert(false);
+					return F32x4(0);
+				#endif
+			#else
+				return F32x4(1.0f / sqrt(this->emulated[0]), 1.0f / sqrt(this->emulated[1]), 1.0f / sqrt(this->emulated[2]), 1.0f / sqrt(this->emulated[3]));
+			#endif
+		}
+		// sqrt(x)
+		//   Useful for getting lengths of vectors
+		F32x4 squareRoot() const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					SIMD_F32x4 half = LOAD_SCALAR_F32_SIMD(0.5f);
+					// Approximate
+					SIMD_F32x4 root = _mm_sqrt_ps(this->v);
+					// Refine
+					root = _mm_mul_ps(_mm_add_ps(root, _mm_div_ps(this->v, root)), half);
+					return F32x4(root);
+				#elif USE_NEON
+					// TODO: Test on ARM
+					return F32x4(MUL_F32_SIMD(this->v, this->reciprocalSquareRoot().v));
+				#else
+					assert(false);
+					return F32x4(0);
+				#endif
+			#else
+				return F32x4(sqrt(this->emulated[0]), sqrt(this->emulated[1]), sqrt(this->emulated[2]), sqrt(this->emulated[3]));
+			#endif
+		}
+		F32x4 clamp(float min, float max) const {
+			#ifdef USE_BASIC_SIMD
+				return F32x4(MIN_F32_SIMD(MAX_F32_SIMD(this->v, LOAD_SCALAR_F32_SIMD(min)), LOAD_SCALAR_F32_SIMD(max)));
+			#else
+				float val0 = this->emulated[0];
+				float val1 = this->emulated[1];
+				float val2 = this->emulated[2];
+				float val3 = this->emulated[3];
+				if (min > val0) { val0 = min; }
+				if (max < val0) { val0 = max; }
+				if (min > val1) { val1 = min; }
+				if (max < val1) { val1 = max; }
+				if (min > val2) { val2 = min; }
+				if (max < val2) { val2 = max; }
+				if (min > val3) { val3 = min; }
+				if (max < val3) { val3 = max; }
+				return F32x4(val0, val1, val2, val3);
+			#endif
+		}
+		F32x4 clampLower(float min) const {
+			#ifdef USE_BASIC_SIMD
+				return F32x4(MAX_F32_SIMD(this->v, LOAD_SCALAR_F32_SIMD(min)));
+			#else
+				float val0 = this->emulated[0];
+				float val1 = this->emulated[1];
+				float val2 = this->emulated[2];
+				float val3 = this->emulated[3];
+				if (min > val0) { val0 = min; }
+				if (min > val1) { val1 = min; }
+				if (min > val2) { val2 = min; }
+				if (min > val3) { val3 = min; }
+				return F32x4(val0, val1, val2, val3);
+			#endif
+		}
+		F32x4 clampUpper(float max) const {
+			#ifdef USE_BASIC_SIMD
+				return F32x4(MIN_F32_SIMD(this->v, LOAD_SCALAR_F32_SIMD(max)));
+			#else
+				float val0 = this->emulated[0];
+				float val1 = this->emulated[1];
+				float val2 = this->emulated[2];
+				float val3 = this->emulated[3];
+				if (max < val0) { val0 = max; }
+				if (max < val1) { val1 = max; }
+				if (max < val2) { val2 = max; }
+				if (max < val3) { val3 = max; }
+				return F32x4(val0, val1, val2, val3);
+			#endif
+		}
+	};
+	inline dsr::String& string_toStreamIndented(dsr::String& target, const F32x4& source, const dsr::ReadableString& indentation) {
+		string_append(target, indentation, source.get());
+		return target;
+	}
+	inline bool operator==(const F32x4& left, const F32x4& right) {
+		float a[4] ALIGN16;
+		float b[4] ALIGN16;
+		left.writeAlignedUnsafe(a);
+		right.writeAlignedUnsafe(b);
+		return fabs(a[0] - b[0]) < 0.0001f && fabs(a[1] - b[1]) < 0.0001f && fabs(a[2] - b[2]) < 0.0001f && fabs(a[3] - b[3]) < 0.0001f;
+	}
+	inline bool operator!=(const F32x4& left, const F32x4& right) {
+		return !(left == right);
+	}
+	inline F32x4 operator+(const F32x4& left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(ADD_F32_SIMD(left.v, right.v));
+		#else
+			return F32x4(left.emulated[0] + right.emulated[0], left.emulated[1] + right.emulated[1], left.emulated[2] + right.emulated[2], left.emulated[3] + right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator+(float left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(ADD_F32_SIMD(LOAD_SCALAR_F32_SIMD(left), right.v));
+		#else
+			return F32x4(left + right.emulated[0], left + right.emulated[1], left + right.emulated[2], left + right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator+(const F32x4& left, float right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(ADD_F32_SIMD(left.v, LOAD_SCALAR_F32_SIMD(right)));
+		#else
+			return F32x4(left.emulated[0] + right, left.emulated[1] + right, left.emulated[2] + right, left.emulated[3] + right);
+		#endif
+	}
+	inline F32x4 operator-(const F32x4& left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(SUB_F32_SIMD(left.v, right.v));
+		#else
+			return F32x4(left.emulated[0] - right.emulated[0], left.emulated[1] - right.emulated[1], left.emulated[2] - right.emulated[2], left.emulated[3] - right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator-(float left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(SUB_F32_SIMD(LOAD_SCALAR_F32_SIMD(left), right.v));
+		#else
+			return F32x4(left - right.emulated[0], left - right.emulated[1], left - right.emulated[2], left - right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator-(const F32x4& left, float right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(SUB_F32_SIMD(left.v, LOAD_SCALAR_F32_SIMD(right)));
+		#else
+			return F32x4(left.emulated[0] - right, left.emulated[1] - right, left.emulated[2] - right, left.emulated[3] - right);
+		#endif
+	}
+	inline F32x4 operator*(const F32x4& left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(MUL_F32_SIMD(left.v, right.v));
+		#else
+			return F32x4(left.emulated[0] * right.emulated[0], left.emulated[1] * right.emulated[1], left.emulated[2] * right.emulated[2], left.emulated[3] * right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator*(float left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(MUL_F32_SIMD(LOAD_SCALAR_F32_SIMD(left), right.v));
+		#else
+			return F32x4(left * right.emulated[0], left * right.emulated[1], left * right.emulated[2], left * right.emulated[3]);
+		#endif
+	}
+	inline F32x4 operator*(const F32x4& left, float right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(MUL_F32_SIMD(left.v, LOAD_SCALAR_F32_SIMD(right)));
+		#else
+			return F32x4(left.emulated[0] * right, left.emulated[1] * right, left.emulated[2] * right, left.emulated[3] * right);
+		#endif
+	}
+	inline F32x4 min(const F32x4& left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(MIN_F32_SIMD(left.v, right.v));
+		#else
+			float v0 = left.emulated[0];
+			float v1 = left.emulated[1];
+			float v2 = left.emulated[2];
+			float v3 = left.emulated[3];
+			float r0 = right.emulated[0];
+			float r1 = right.emulated[1];
+			float r2 = right.emulated[2];
+			float r3 = right.emulated[3];
+			if (r0 < v0) { v0 = r0; }
+			if (r1 < v1) { v1 = r1; }
+			if (r2 < v2) { v2 = r2; }
+			if (r3 < v3) { v3 = r3; }
+			return F32x4(v0, v1, v2, v3);
+		#endif
+	}
+	inline F32x4 max(const F32x4& left, const F32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(MAX_F32_SIMD(left.v, right.v));
+		#else
+			float v0 = left.emulated[0];
+			float v1 = left.emulated[1];
+			float v2 = left.emulated[2];
+			float v3 = left.emulated[3];
+			float r0 = right.emulated[0];
+			float r1 = right.emulated[1];
+			float r2 = right.emulated[2];
+			float r3 = right.emulated[3];
+			if (r0 > v0) { v0 = r0; }
+			if (r1 > v1) { v1 = r1; }
+			if (r2 > v2) { v2 = r2; }
+			if (r3 > v3) { v3 = r3; }
+			return F32x4(v0, v1, v2, v3);
+		#endif
+	}
+	union I32x4 {
+		#ifdef USE_BASIC_SIMD
+			public:
+			#ifdef USE_DIRECT_SIMD_MEMORY_ACCESS
+				// Only use if USE_DIRECT_SIMD_MEMORY_ACCESS is defined!
+				// Direct access cannot be done on NEON!
+				int32_t shared_memory[4];
+			#endif
+			// The SIMD vector of undefined type
+			// Not accessible while emulating!
+			SIMD_I32x4 v;
+			// Construct a portable vector from a native SIMD vector
+			explicit I32x4(const SIMD_I32x4& v) : v(v) {}
+			// Construct a portable vector from a set of scalars
+			I32x4(int32_t a1, int32_t a2, int32_t a3, int32_t a4) : v(LOAD_VECTOR_I32_SIMD(a1, a2, a3, a4)) {}
+			// Construct a portable vector from a single duplicated scalar
+			explicit I32x4(int32_t scalar) : v(LOAD_SCALAR_I32_SIMD(scalar)) {}
+		#else
+			public:
+			// Emulate a SIMD vector as an array of scalars without hardware support
+			// Only accessible while emulating!
+			int32_t emulated[4];
+			// Construct a portable vector from a set of scalars
+			I32x4(int32_t a1, int32_t a2, int32_t a3, int32_t a4) {
+				this->emulated[0] = a1;
+				this->emulated[1] = a2;
+				this->emulated[2] = a3;
+				this->emulated[3] = a4;
+			}
+			// Construct a portable vector from a single duplicated scalar
+			explicit I32x4(int32_t scalar) {
+				this->emulated[0] = scalar;
+				this->emulated[1] = scalar;
+				this->emulated[2] = scalar;
+				this->emulated[3] = scalar;
+			}
+		#endif
+		// Construct a portable SIMD vector from a pointer to aligned data
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		static inline I32x4 readAlignedUnsafe(const int32_t* data) {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					return I32x4(_mm_load_si128((const __m128i*)data));
+				#elif USE_NEON
+					return I32x4(vld1q_s32(data));
+				#endif
+			#else
+				return I32x4(data[0], data[1], data[2], data[3]);
+			#endif
+		}
+		// Write to aligned memory from the existing vector
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		inline void writeAlignedUnsafe(int32_t* data) const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					_mm_store_si128((__m128i*)data, this->v);
+				#elif USE_NEON
+					vst1q_s32(data, this->v);
+				#endif
+			#else
+				data[0] = this->emulated[0];
+				data[1] = this->emulated[1];
+				data[2] = this->emulated[2];
+				data[3] = this->emulated[3];
+			#endif
+		}
+		#ifdef DFPSR_GEOMETRY_IVECTOR
+			dsr::IVector4D get() const {
+				int32_t data[4] ALIGN16;
+				this->writeAlignedUnsafe(data);
+				return dsr::IVector4D(data[0], data[1], data[2], data[3]);
+			}
+		#endif
+		// Bound and alignment checked reading
+		static inline I32x4 readAligned(const dsr::SafePointer<int32_t> data, const char* methodName) {
+			const int32_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			return I32x4::readAlignedUnsafe(pointer);
+		}
+		// Bound and alignment checked writing
+		inline void writeAligned(dsr::SafePointer<int32_t> data, const char* methodName) const {
+			int32_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			this->writeAlignedUnsafe(pointer);
+		}
+	};
+	inline dsr::String& string_toStreamIndented(dsr::String& target, const I32x4& source, const dsr::ReadableString& indentation) {
+		string_append(target, indentation, source.get());
+		return target;
+	}
+	inline bool operator==(const I32x4& left, const I32x4& right) {
+		int32_t a[4] ALIGN16;
+		int32_t b[4] ALIGN16;
+		left.writeAlignedUnsafe(a);
+		right.writeAlignedUnsafe(b);
+		return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
+	}
+	inline bool operator!=(const I32x4& left, const I32x4& right) {
+		return !(left == right);
+	}
+	inline I32x4 operator+(const I32x4& left, const I32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return I32x4(ADD_I32_SIMD(left.v, right.v));
+		#else
+			return I32x4(left.emulated[0] + right.emulated[0], left.emulated[1] + right.emulated[1], left.emulated[2] + right.emulated[2], left.emulated[3] + right.emulated[3]);
+		#endif
+	}
+	inline I32x4 operator+(int32_t left, const I32x4& right) {
+		return I32x4(left) + right;
+	}
+	inline I32x4 operator+(const I32x4& left, int32_t right) {
+		return left + I32x4(right);
+	}
+	inline I32x4 operator-(const I32x4& left, const I32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return I32x4(SUB_I32_SIMD(left.v, right.v));
+		#else
+			return I32x4(left.emulated[0] - right.emulated[0], left.emulated[1] - right.emulated[1], left.emulated[2] - right.emulated[2], left.emulated[3] - right.emulated[3]);
+		#endif
+	}
+	inline I32x4 operator-(int32_t left, const I32x4& right) {
+		return I32x4(left) - right;
+	}
+	inline I32x4 operator-(const I32x4& left, int32_t right) {
+		return left - I32x4(right);
+	}
+	inline I32x4 operator*(const I32x4& left, const I32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			#ifdef USE_SSE2
+				// Emulate a NEON instruction
+				return I32x4(left.shared_memory[0] * right.shared_memory[0], left.shared_memory[1] * right.shared_memory[1], left.shared_memory[2] * right.shared_memory[2], left.shared_memory[3] * right.shared_memory[3]);
+			#elif USE_NEON
+				return I32x4(MUL_I32_NEON(left.v, right.v));
+			#endif
+		#else
+			return I32x4(left.emulated[0] * right.emulated[0], left.emulated[1] * right.emulated[1], left.emulated[2] * right.emulated[2], left.emulated[3] * right.emulated[3]);
+		#endif
+	}
+	inline I32x4 operator*(int32_t left, const I32x4& right) {
+		return I32x4(left) * right;
+	}
+	inline I32x4 operator*(const I32x4& left, int32_t right) {
+		return left * I32x4(right);
+	}
+
+	union U32x4 {
+		#ifdef USE_BASIC_SIMD
+			public:
+			#ifdef USE_DIRECT_SIMD_MEMORY_ACCESS
+				// Only use if USE_DIRECT_SIMD_MEMORY_ACCESS is defined!
+				// Direct access cannot be done on NEON!
+				uint32_t shared_memory[4];
+			#endif
+			// The SIMD vector of undefined type
+			// Not accessible while emulating!
+			SIMD_U32x4 v;
+			// Construct a portable vector from a native SIMD vector
+			explicit U32x4(const SIMD_U32x4& v) : v(v) {}
+			// Construct a portable vector from a set of scalars
+			U32x4(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4) : v(LOAD_VECTOR_U32_SIMD(a1, a2, a3, a4)) {}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U32x4(uint32_t scalar) : v(LOAD_SCALAR_U32_SIMD(scalar)) {}
+		#else
+			public:
+			// Emulate a SIMD vector as an array of scalars without hardware support
+			// Only accessible while emulating!
+			uint32_t emulated[4];
+			// Construct a portable vector from a set of scalars
+			U32x4(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4) {
+				this->emulated[0] = a1;
+				this->emulated[1] = a2;
+				this->emulated[2] = a3;
+				this->emulated[3] = a4;
+			}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U32x4(uint32_t scalar) {
+				this->emulated[0] = scalar;
+				this->emulated[1] = scalar;
+				this->emulated[2] = scalar;
+				this->emulated[3] = scalar;
+			}
+		#endif
+		// Construct a portable SIMD vector from a pointer to aligned data
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		static inline U32x4 readAlignedUnsafe(const uint32_t* data) {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					return U32x4(_mm_load_si128((const __m128i*)data));
+				#elif USE_NEON
+					return U32x4(vld1q_u32(data));
+				#endif
+			#else
+				return U32x4(data[0], data[1], data[2], data[3]);
+			#endif
+		}
+		// Write to aligned memory from the existing vector
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		inline void writeAlignedUnsafe(uint32_t* data) const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					_mm_store_si128((__m128i*)data, this->v);
+				#elif USE_NEON
+					vst1q_u32(data, this->v);
+				#endif
+			#else
+				data[0] = this->emulated[0];
+				data[1] = this->emulated[1];
+				data[2] = this->emulated[2];
+				data[3] = this->emulated[3];
+			#endif
+		}
+		#ifdef DFPSR_GEOMETRY_UVECTOR
+			dsr::UVector4D get() const {
+				uint32_t data[4] ALIGN16;
+				this->writeAlignedUnsafe(data);
+				return dsr::UVector4D(data[0], data[1], data[2], data[3]);
+			}
+		#endif
+		// Bound and alignment checked reading
+		static inline U32x4 readAligned(const dsr::SafePointer<uint32_t> data, const char* methodName) {
+			const uint32_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			return U32x4::readAlignedUnsafe(pointer);
+		}
+		// Bound and alignment checked writing
+		inline void writeAligned(dsr::SafePointer<uint32_t> data, const char* methodName) const {
+			uint32_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			this->writeAlignedUnsafe(pointer);
+		}
+	};
+	inline dsr::String& string_toStreamIndented(dsr::String& target, const U32x4& source, const dsr::ReadableString& indentation) {
+		string_append(target, indentation, source.get());
+		return target;
+	}
+	inline bool operator==(const U32x4& left, const U32x4& right) {
+		uint32_t a[4] ALIGN16;
+		uint32_t b[4] ALIGN16;
+		left.writeAlignedUnsafe(a);
+		right.writeAlignedUnsafe(b);
+		return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
+	}
+	inline bool operator!=(const U32x4& left, const U32x4& right) {
+		return !(left == right);
+	}
+	inline U32x4 operator+(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(ADD_U32_SIMD(left.v, right.v));
+		#else
+			return U32x4(left.emulated[0] + right.emulated[0], left.emulated[1] + right.emulated[1], left.emulated[2] + right.emulated[2], left.emulated[3] + right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator+(uint32_t left, const U32x4& right) {
+		return U32x4(left) + right;
+	}
+	inline U32x4 operator+(const U32x4& left, uint32_t right) {
+		return left + U32x4(right);
+	}
+	inline U32x4 operator-(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(SUB_U32_SIMD(left.v, right.v));
+		#else
+			return U32x4(left.emulated[0] - right.emulated[0], left.emulated[1] - right.emulated[1], left.emulated[2] - right.emulated[2], left.emulated[3] - right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator-(uint32_t left, const U32x4& right) {
+		return U32x4(left) - right;
+	}
+	inline U32x4 operator-(const U32x4& left, uint32_t right) {
+		return left - U32x4(right);
+	}
+	inline U32x4 operator*(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			#ifdef USE_SSE2
+				// Emulate a NEON instruction on SSE2 registers
+				return U32x4(left.shared_memory[0] * right.shared_memory[0], left.shared_memory[1] * right.shared_memory[1], left.shared_memory[2] * right.shared_memory[2], left.shared_memory[3] * right.shared_memory[3]);
+			#else // NEON
+				return U32x4(MUL_U32_NEON(left.v, right.v));
+			#endif
+		#else
+			return U32x4(left.emulated[0] * right.emulated[0], left.emulated[1] * right.emulated[1], left.emulated[2] * right.emulated[2], left.emulated[3] * right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator*(uint32_t left, const U32x4& right) {
+		return U32x4(left) * right;
+	}
+	inline U32x4 operator*(const U32x4& left, uint32_t right) {
+		return left * U32x4(right);
+	}
+	inline U32x4 operator&(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_AND_U32_SIMD(left.v, right.v));
+		#else
+			return U32x4(left.emulated[0] & right.emulated[0], left.emulated[1] & right.emulated[1], left.emulated[2] & right.emulated[2], left.emulated[3] & right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator&(const U32x4& left, uint32_t mask) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_AND_U32_SIMD(left.v, LOAD_SCALAR_U32_SIMD(mask)));
+		#else
+			return U32x4(left.emulated[0] & mask, left.emulated[1] & mask, left.emulated[2] & mask, left.emulated[3] & mask);
+		#endif
+	}
+	inline U32x4 operator|(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_OR_U32_SIMD(left.v, right.v));
+		#else
+			return U32x4(left.emulated[0] | right.emulated[0], left.emulated[1] | right.emulated[1], left.emulated[2] | right.emulated[2], left.emulated[3] | right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator|(const U32x4& left, uint32_t mask) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_OR_U32_SIMD(left.v, LOAD_SCALAR_U32_SIMD(mask)));
+		#else
+			return U32x4(left.emulated[0] | mask, left.emulated[1] | mask, left.emulated[2] | mask, left.emulated[3] | mask);
+		#endif
+	}
+	inline U32x4 operator<<(const U32x4& left, uint32_t bitOffset) {
+		#ifdef USE_SSE2
+			return U32x4(_mm_slli_epi32(left.v, bitOffset));
+		#else
+			#ifdef USE_NEON
+				return U32x4(vshlq_u32(left.v, LOAD_SCALAR_I32_SIMD(bitOffset)));
+			#else
+				return U32x4(left.emulated[0] << bitOffset, left.emulated[1] << bitOffset, left.emulated[2] << bitOffset, left.emulated[3] << bitOffset);
+			#endif
+		#endif
+	}
+	inline U32x4 operator>>(const U32x4& left, uint32_t bitOffset) {
+		#ifdef USE_SSE2
+			return U32x4(_mm_srli_epi32(left.v, bitOffset));
+		#else
+			#ifdef USE_NEON
+				return U32x4(vshlq_u32(left.v, LOAD_SCALAR_I32_SIMD(-bitOffset)));
+			#else
+				return U32x4(left.emulated[0] >> bitOffset, left.emulated[1] >> bitOffset, left.emulated[2] >> bitOffset, left.emulated[3] >> bitOffset);
+			#endif
+		#endif
+	}
+
+	union U16x8 {
+		#ifdef USE_BASIC_SIMD
+			public:
+			#ifdef USE_DIRECT_SIMD_MEMORY_ACCESS
+				// Only use if USE_DIRECT_SIMD_MEMORY_ACCESS is defined!
+				// Direct access cannot be done on NEON!
+				uint16_t shared_memory[8];
+			#endif
+			// The SIMD vector of undefined type
+			// Not accessible while emulating!
+			SIMD_U16x8 v;
+			// Construct a portable vector from a native SIMD vector
+			explicit U16x8(const SIMD_U16x8& v) : v(v) {}
+			// Construct a vector of 8 x 16-bit unsigned integers from a vector of 4 x 32-bit unsigned integers
+			//   Reinterpret casting is used
+			explicit U16x8(const U32x4& vector) : v(REINTERPRET_U32_TO_U16_SIMD(vector.v)) {}
+			// Construct a portable vector from a set of scalars
+			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) : v(LOAD_VECTOR_U16_SIMD(a1, a2, a3, a4, a5, a6, a7, a8)) {}
+			// Construct a vector of 8 x 16-bit unsigned integers from a single duplicated 32-bit unsigned integer
+			//   Reinterpret casting is used
+			// TODO: Remove all reintreprets from constructors to improve readability
+			explicit U16x8(uint32_t scalar) : v(REINTERPRET_U32_TO_U16_SIMD(LOAD_SCALAR_U32_SIMD(scalar))) {}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U16x8(uint16_t scalar) : v(LOAD_SCALAR_U16_SIMD(scalar)) {}
+			// Reinterpret cast to a vector of 4 x 32-bit unsigned integers
+			U32x4 get_U32() const {
+				return U32x4(REINTERPRET_U16_TO_U32_SIMD(this->v));
+			}
+		#else
+			public:
+			// Emulate a SIMD vector as an array of scalars without hardware support
+			// Only accessible while emulating!
+			uint16_t emulated[8];
+			// Construct a vector of 8 x 16-bit unsigned integers from a vector of 4 x 32-bit unsigned integers
+			//   Reinterpret casting is used
+			explicit U16x8(const U32x4& vector) {
+				uint64_t *target = (uint64_t*)this->emulated;
+				uint64_t *source = (uint64_t*)vector.emulated;
+				target[0] = source[0];
+				target[1] = source[1];
+			}
+			// Construct a portable vector from a set of scalars
+			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) {
+				this->emulated[0] = a1;
+				this->emulated[1] = a2;
+				this->emulated[2] = a3;
+				this->emulated[3] = a4;
+				this->emulated[4] = a5;
+				this->emulated[5] = a6;
+				this->emulated[6] = a7;
+				this->emulated[7] = a8;
+			}
+			// Construct a vector of 8 x 16-bit unsigned integers from a single duplicated 32-bit unsigned integer
+			//   Reinterpret casting is used
+			explicit U16x8(uint32_t scalar) {
+				uint32_t *target = (uint32_t*)this->emulated;
+				target[0] = scalar;
+				target[1] = scalar;
+				target[2] = scalar;
+				target[3] = scalar;
+			}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U16x8(uint16_t scalar) {
+				this->emulated[0] = scalar;
+				this->emulated[1] = scalar;
+				this->emulated[2] = scalar;
+				this->emulated[3] = scalar;
+				this->emulated[4] = scalar;
+				this->emulated[5] = scalar;
+				this->emulated[6] = scalar;
+				this->emulated[7] = scalar;
+			}
+			// Reinterpret cast to a vector of 4 x 32-bit unsigned integers
+			U32x4 get_U32() const {
+				U32x4 result(0);
+				uint64_t *target = (uint64_t*)result.emulated;
+				uint64_t *source = (uint64_t*)this->emulated;
+				target[0] = source[0];
+				target[1] = source[1];
+				return result;
+			}
+		#endif
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		//static inline U16x8 readSlow(uint16_t* data) {
+		//	return U16x8(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]);
+		//}
+		static inline U16x8 readAlignedUnsafe(const uint16_t* data) {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					return U16x8(_mm_load_si128((const __m128i*)data));
+				#elif USE_NEON
+					return U16x8(vld1q_u16(data));
+				#endif
+			#else
+				return U16x8(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]);
+			#endif
+		}
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		inline void writeAlignedUnsafe(uint16_t* data) const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					_mm_store_si128((__m128i*)data, this->v);
+				#elif USE_NEON
+					vst1q_u16(data, this->v);
+				#endif
+			#else
+				data[0] = this->emulated[0];
+				data[1] = this->emulated[1];
+				data[2] = this->emulated[2];
+				data[3] = this->emulated[3];
+				data[4] = this->emulated[4];
+				data[5] = this->emulated[5];
+				data[6] = this->emulated[6];
+				data[7] = this->emulated[7];
+			#endif
+		}
+		// Bound and alignment checked reading
+		static inline U16x8 readAligned(const dsr::SafePointer<uint16_t> data, const char* methodName) {
+			const uint16_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			return U16x8::readAlignedUnsafe(pointer);
+		}
+		// Bound and alignment checked writing
+		inline void writeAligned(dsr::SafePointer<uint16_t> data, const char* methodName) const {
+			uint16_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			this->writeAlignedUnsafe(pointer);
+		}
+	};
+	inline dsr::String& string_toStreamIndented(dsr::String& target, const U16x8& source, const dsr::ReadableString& indentation) {
+		ALIGN16 uint16_t data[8];
+		source.writeAlignedUnsafe(data);
+		string_append(target, indentation, "(", data[0], ", ", data[1], ", ", data[2], ", ", data[3], ", ", data[4], ", ", data[5], ", ", data[6], ", ", data[7], ")");
+		return target;
+	}
+	inline bool operator==(const U16x8& left, const U16x8& right) {
+		ALIGN16 uint16_t a[8];
+		ALIGN16 uint16_t b[8];
+		left.writeAlignedUnsafe(a);
+		right.writeAlignedUnsafe(b);
+		return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] && a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7];
+	}
+	inline bool operator!=(const U16x8& left, const U16x8& right) {
+		return !(left == right);
+	}
+	inline U16x8 operator+(const U16x8& left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(ADD_U16_SIMD(left.v, right.v));
+		#else
+			return U16x8(left.emulated[0] + right.emulated[0], left.emulated[1] + right.emulated[1], left.emulated[2] + right.emulated[2], left.emulated[3] + right.emulated[3],
+			           left.emulated[4] + right.emulated[4], left.emulated[5] + right.emulated[5], left.emulated[6] + right.emulated[6], left.emulated[7] + right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator+(uint16_t left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(ADD_U16_SIMD(LOAD_SCALAR_U16_SIMD(left), right.v));
+		#else
+			return U16x8(left + right.emulated[0], left + right.emulated[1], left + right.emulated[2], left + right.emulated[3],
+			           left + right.emulated[4], left + right.emulated[5], left + right.emulated[6], left + right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator+(const U16x8& left, uint16_t right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(ADD_U16_SIMD(left.v, LOAD_SCALAR_U16_SIMD(right)));
+		#else
+			return U16x8(left.emulated[0] + right, left.emulated[1] + right, left.emulated[2] + right, left.emulated[3] + right,
+			           left.emulated[4] + right, left.emulated[5] + right, left.emulated[6] + right, left.emulated[7] + right);
+		#endif
+	}
+	inline U16x8 operator-(const U16x8& left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(SUB_U16_SIMD(left.v, right.v));
+		#else
+			return U16x8(left.emulated[0] - right.emulated[0], left.emulated[1] - right.emulated[1], left.emulated[2] - right.emulated[2], left.emulated[3] - right.emulated[3],
+			           left.emulated[4] - right.emulated[4], left.emulated[5] - right.emulated[5], left.emulated[6] - right.emulated[6], left.emulated[7] - right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator-(uint16_t left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(SUB_U16_SIMD(LOAD_SCALAR_U16_SIMD(left), right.v));
+		#else
+			return U16x8(left - right.emulated[0], left - right.emulated[1], left - right.emulated[2], left - right.emulated[3],
+			           left - right.emulated[4], left - right.emulated[5], left - right.emulated[6], left - right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator-(const U16x8& left, uint16_t right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(SUB_U16_SIMD(left.v, LOAD_SCALAR_U16_SIMD(right)));
+		#else
+			return U16x8(left.emulated[0] - right, left.emulated[1] - right, left.emulated[2] - right, left.emulated[3] - right,
+			           left.emulated[4] - right, left.emulated[5] - right, left.emulated[6] - right, left.emulated[7] - right);
+		#endif
+	}
+	inline U16x8 operator*(const U16x8& left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(MUL_U16_SIMD(left.v, right.v));
+		#else
+			return U16x8(left.emulated[0] * right.emulated[0], left.emulated[1] * right.emulated[1], left.emulated[2] * right.emulated[2], left.emulated[3] * right.emulated[3],
+			           left.emulated[4] * right.emulated[4], left.emulated[5] * right.emulated[5], left.emulated[6] * right.emulated[6], left.emulated[7] * right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator*(uint16_t left, const U16x8& right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(MUL_U16_SIMD(LOAD_SCALAR_U16_SIMD(left), right.v));
+		#else
+			return U16x8(left * right.emulated[0], left * right.emulated[1], left * right.emulated[2], left * right.emulated[3],
+			           left * right.emulated[4], left * right.emulated[5], left * right.emulated[6], left * right.emulated[7]);
+		#endif
+	}
+	inline U16x8 operator*(const U16x8& left, uint16_t right) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(MUL_U16_SIMD(left.v, LOAD_SCALAR_U16_SIMD(right)));
+		#else
+			return U16x8(
+			  left.emulated[0] * right, left.emulated[1] * right, left.emulated[2] * right, left.emulated[3] * right,
+			  left.emulated[4] * right, left.emulated[5] * right, left.emulated[6] * right, left.emulated[7] * right
+			);
+		#endif
+	}
+
+	union U8x16 {
+		#ifdef USE_BASIC_SIMD
+			public:
+			#ifdef USE_DIRECT_SIMD_MEMORY_ACCESS
+				// Only use if USE_DIRECT_SIMD_MEMORY_ACCESS is defined!
+				// Direct access cannot be done on NEON!
+				uint8_t shared_memory[16];
+			#endif
+			// The SIMD vector of undefined type
+			// Not accessible while emulating!
+			SIMD_U8x16 v;
+			// Construct a portable vector from a native SIMD vector
+			explicit U8x16(const SIMD_U8x16& v) : v(v) {}
+			// Construct a portable vector from a set of scalars
+			U8x16(uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8,
+			      uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16)
+			: v(LOAD_VECTOR_U8_SIMD(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16)) {}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U8x16(uint8_t scalar) : v(LOAD_SCALAR_U8_SIMD(scalar)) {}
+		#else
+			public:
+			// Emulate a SIMD vector as an array of scalars without hardware support
+			// Only accessible while emulating!
+			uint8_t emulated[16];
+			// Construct a portable vector from a set of scalars
+			U8x16(uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8,
+			      uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16) {
+				this->emulated[0] = a1;
+				this->emulated[1] = a2;
+				this->emulated[2] = a3;
+				this->emulated[3] = a4;
+				this->emulated[4] = a5;
+				this->emulated[5] = a6;
+				this->emulated[6] = a7;
+				this->emulated[7] = a8;
+				this->emulated[8] = a9;
+				this->emulated[9] = a10;
+				this->emulated[10] = a11;
+				this->emulated[11] = a12;
+				this->emulated[12] = a13;
+				this->emulated[13] = a14;
+				this->emulated[14] = a15;
+				this->emulated[15] = a16;
+			}
+			// Construct a portable vector from a single duplicated scalar
+			explicit U8x16(uint8_t scalar) {
+				this->emulated[0] = scalar;
+				this->emulated[1] = scalar;
+				this->emulated[2] = scalar;
+				this->emulated[3] = scalar;
+				this->emulated[4] = scalar;
+				this->emulated[5] = scalar;
+				this->emulated[6] = scalar;
+				this->emulated[7] = scalar;
+				this->emulated[8] = scalar;
+				this->emulated[9] = scalar;
+				this->emulated[10] = scalar;
+				this->emulated[11] = scalar;
+				this->emulated[12] = scalar;
+				this->emulated[13] = scalar;
+				this->emulated[14] = scalar;
+				this->emulated[15] = scalar;
+			}
+		#endif
+		static inline U8x16 readAlignedUnsafe(const uint8_t* data) {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					return U8x16(_mm_load_si128((const __m128i*)data));
+				#elif USE_NEON
+					return U8x16(vld1q_u8(data));
+				#endif
+			#else
+				return U8x16(
+				  data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
+				  data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15]
+				);
+			#endif
+		}
+		// data must be aligned with at least 8 bytes, but preferrably 16 bytes
+		inline void writeAlignedUnsafe(uint8_t* data) const {
+			#ifdef USE_BASIC_SIMD
+				#ifdef USE_SSE2
+					_mm_store_si128((__m128i*)data, this->v);
+				#elif USE_NEON
+					vst1q_u8(data, this->v);
+				#endif
+			#else
+				data[0] = this->emulated[0];
+				data[1] = this->emulated[1];
+				data[2] = this->emulated[2];
+				data[3] = this->emulated[3];
+				data[4] = this->emulated[4];
+				data[5] = this->emulated[5];
+				data[6] = this->emulated[6];
+				data[7] = this->emulated[7];
+				data[8] = this->emulated[8];
+				data[9] = this->emulated[9];
+				data[10] = this->emulated[10];
+				data[11] = this->emulated[11];
+				data[12] = this->emulated[12];
+				data[13] = this->emulated[13];
+				data[14] = this->emulated[14];
+				data[15] = this->emulated[15];
+			#endif
+		}
+		// Bound and alignment checked reading
+		static inline U8x16 readAligned(const dsr::SafePointer<uint8_t> data, const char* methodName) {
+			const uint8_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			return U8x16::readAlignedUnsafe(pointer);
+		}
+		// Bound and alignment checked writing
+		inline void writeAligned(dsr::SafePointer<uint8_t> data, const char* methodName) const {
+			uint8_t* pointer = data.getUnsafe();
+			assert(((uintptr_t)pointer & 15) == 0);
+			#ifdef SAFE_POINTER_CHECKS
+				data.assertInside(methodName, pointer, 16);
+			#endif
+			this->writeAlignedUnsafe(pointer);
+		}
+	};
+	inline dsr::String& string_toStreamIndented(dsr::String& target, const U8x16& source, const dsr::ReadableString& indentation) {
+		ALIGN16 uint8_t data[16];
+		source.writeAlignedUnsafe(data);
+		string_append(target, indentation,
+		  "(", data[0], ", ", data[1], ", ", data[2], ", ", data[3], ", ", data[4], ", ", data[5], ", ", data[6], ", ", data[7],
+		  ", ", data[8], ", ", data[9], ", ", data[10], ", ", data[11], ", ", data[12], ", ", data[13], ", ", data[14], ", ", data[15], ")"
+		);
+		return target;
+	}
+	inline bool operator==(const U8x16& left, const U8x16& right) {
+		ALIGN16 uint8_t a[16];
+		ALIGN16 uint8_t b[16];
+		left.writeAlignedUnsafe(a);
+		right.writeAlignedUnsafe(b);
+		return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] && a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7]
+		    && a[8] == b[8] && a[9] == b[9] && a[10] == b[10] && a[11] == b[11] && a[12] == b[12] && a[13] == b[13] && a[14] == b[14] && a[15] == b[15];
+	}
+	inline bool operator!=(const U8x16& left, const U8x16& right) {
+		return !(left == right);
+	}
+	inline U8x16 operator+(const U8x16& left, const U8x16& right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(ADD_U8_SIMD(left.v, right.v));
+		#else
+			return U8x16(
+			  left.emulated[0] + right.emulated[0],
+			  left.emulated[1] + right.emulated[1],
+			  left.emulated[2] + right.emulated[2],
+			  left.emulated[3] + right.emulated[3],
+			  left.emulated[4] + right.emulated[4],
+			  left.emulated[5] + right.emulated[5],
+			  left.emulated[6] + right.emulated[6],
+			  left.emulated[7] + right.emulated[7],
+			  left.emulated[8] + right.emulated[8],
+			  left.emulated[9] + right.emulated[9],
+			  left.emulated[10] + right.emulated[10],
+			  left.emulated[11] + right.emulated[11],
+			  left.emulated[12] + right.emulated[12],
+			  left.emulated[13] + right.emulated[13],
+			  left.emulated[14] + right.emulated[14],
+			  left.emulated[15] + right.emulated[15]
+			);
+		#endif
+	}
+	inline U8x16 operator+(uint8_t left, const U8x16& right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(ADD_U8_SIMD(LOAD_SCALAR_U8_SIMD(left), right.v));
+		#else
+			return U8x16(
+			  left + right.emulated[0],
+			  left + right.emulated[1],
+			  left + right.emulated[2],
+			  left + right.emulated[3],
+			  left + right.emulated[4],
+			  left + right.emulated[5],
+			  left + right.emulated[6],
+			  left + right.emulated[7],
+			  left + right.emulated[8],
+			  left + right.emulated[9],
+			  left + right.emulated[10],
+			  left + right.emulated[11],
+			  left + right.emulated[12],
+			  left + right.emulated[13],
+			  left + right.emulated[14],
+			  left + right.emulated[15]
+			);
+		#endif
+	}
+	inline U8x16 operator+(const U8x16& left, uint8_t right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(ADD_U8_SIMD(left.v, LOAD_SCALAR_U8_SIMD(right)));
+		#else
+			return U8x16(
+			  left.emulated[0] + right,
+			  left.emulated[1] + right,
+			  left.emulated[2] + right,
+			  left.emulated[3] + right,
+			  left.emulated[4] + right,
+			  left.emulated[5] + right,
+			  left.emulated[6] + right,
+			  left.emulated[7] + right,
+			  left.emulated[8] + right,
+			  left.emulated[9] + right,
+			  left.emulated[10] + right,
+			  left.emulated[11] + right,
+			  left.emulated[12] + right,
+			  left.emulated[13] + right,
+			  left.emulated[14] + right,
+			  left.emulated[15] + right
+			);
+		#endif
+	}
+	inline U8x16 operator-(const U8x16& left, const U8x16& right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(SUB_U8_SIMD(left.v, right.v));
+		#else
+			return U8x16(
+			  left.emulated[0] - right.emulated[0],
+			  left.emulated[1] - right.emulated[1],
+			  left.emulated[2] - right.emulated[2],
+			  left.emulated[3] - right.emulated[3],
+			  left.emulated[4] - right.emulated[4],
+			  left.emulated[5] - right.emulated[5],
+			  left.emulated[6] - right.emulated[6],
+			  left.emulated[7] - right.emulated[7],
+			  left.emulated[8] - right.emulated[8],
+			  left.emulated[9] - right.emulated[9],
+			  left.emulated[10] - right.emulated[10],
+			  left.emulated[11] - right.emulated[11],
+			  left.emulated[12] - right.emulated[12],
+			  left.emulated[13] - right.emulated[13],
+			  left.emulated[14] - right.emulated[14],
+			  left.emulated[15] - right.emulated[15]
+			);
+		#endif
+	}
+	inline U8x16 operator-(uint8_t left, const U8x16& right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(SUB_U8_SIMD(LOAD_SCALAR_U8_SIMD(left), right.v));
+		#else
+			return U8x16(
+			  left - right.emulated[0],
+			  left - right.emulated[1],
+			  left - right.emulated[2],
+			  left - right.emulated[3],
+			  left - right.emulated[4],
+			  left - right.emulated[5],
+			  left - right.emulated[6],
+			  left - right.emulated[7],
+			  left - right.emulated[8],
+			  left - right.emulated[9],
+			  left - right.emulated[10],
+			  left - right.emulated[11],
+			  left - right.emulated[12],
+			  left - right.emulated[13],
+			  left - right.emulated[14],
+			  left - right.emulated[15]
+			);
+		#endif
+	}
+	inline U8x16 operator-(const U8x16& left, uint8_t right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(SUB_U8_SIMD(left.v, LOAD_SCALAR_U8_SIMD(right)));
+		#else
+			return U8x16(
+			  left.emulated[0] - right,
+			  left.emulated[1] - right,
+			  left.emulated[2] - right,
+			  left.emulated[3] - right,
+			  left.emulated[4] - right,
+			  left.emulated[5] - right,
+			  left.emulated[6] - right,
+			  left.emulated[7] - right,
+			  left.emulated[8] - right,
+			  left.emulated[9] - right,
+			  left.emulated[10] - right,
+			  left.emulated[11] - right,
+			  left.emulated[12] - right,
+			  left.emulated[13] - right,
+			  left.emulated[14] - right,
+			  left.emulated[15] - right
+			);
+		#endif
+	}
+	inline uint8_t saturateToU8(uint32_t x) {
+		// No need to check lower bound for unsigned input
+		return x > 255 ? 255 : x;
+	}
+	inline U8x16 saturatedAddition(const U8x16& left, const U8x16& right) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(ADD_SAT_U8_SIMD(left.v, right.v));
+		#else
+			return U8x16(
+			  saturateToU8((uint32_t)left.emulated[0] + (uint32_t)right.emulated[0]),
+			  saturateToU8((uint32_t)left.emulated[1] + (uint32_t)right.emulated[1]),
+			  saturateToU8((uint32_t)left.emulated[2] + (uint32_t)right.emulated[2]),
+			  saturateToU8((uint32_t)left.emulated[3] + (uint32_t)right.emulated[3]),
+			  saturateToU8((uint32_t)left.emulated[4] + (uint32_t)right.emulated[4]),
+			  saturateToU8((uint32_t)left.emulated[5] + (uint32_t)right.emulated[5]),
+			  saturateToU8((uint32_t)left.emulated[6] + (uint32_t)right.emulated[6]),
+			  saturateToU8((uint32_t)left.emulated[7] + (uint32_t)right.emulated[7]),
+			  saturateToU8((uint32_t)left.emulated[8] + (uint32_t)right.emulated[8]),
+			  saturateToU8((uint32_t)left.emulated[9] + (uint32_t)right.emulated[9]),
+			  saturateToU8((uint32_t)left.emulated[10] + (uint32_t)right.emulated[10]),
+			  saturateToU8((uint32_t)left.emulated[11] + (uint32_t)right.emulated[11]),
+			  saturateToU8((uint32_t)left.emulated[12] + (uint32_t)right.emulated[12]),
+			  saturateToU8((uint32_t)left.emulated[13] + (uint32_t)right.emulated[13]),
+			  saturateToU8((uint32_t)left.emulated[14] + (uint32_t)right.emulated[14]),
+			  saturateToU8((uint32_t)left.emulated[15] + (uint32_t)right.emulated[15])
+			);
+		#endif
+	}
+
+	// TODO: Use overloading to only name the target type
+	inline I32x4 truncateToI32(const F32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return I32x4(F32_TO_I32_SIMD(vector.v));
+		#else
+			return I32x4((int32_t)vector.emulated[0], (int32_t)vector.emulated[1], (int32_t)vector.emulated[2], (int32_t)vector.emulated[3]);
+		#endif
+	}
+	inline U32x4 truncateToU32(const F32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(F32_TO_U32_SIMD(vector.v));
+		#else
+			return U32x4((uint32_t)vector.emulated[0], (uint32_t)vector.emulated[1], (uint32_t)vector.emulated[2], (uint32_t)vector.emulated[3]);
+		#endif
+	}
+	inline F32x4 floatFromI32(const I32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(I32_TO_F32_SIMD(vector.v));
+		#else
+			return F32x4((float)vector.emulated[0], (float)vector.emulated[1], (float)vector.emulated[2], (float)vector.emulated[3]);
+		#endif
+	}
+	inline F32x4 floatFromU32(const U32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return F32x4(U32_TO_F32_SIMD(vector.v));
+		#else
+			return F32x4((float)vector.emulated[0], (float)vector.emulated[1], (float)vector.emulated[2], (float)vector.emulated[3]);
+		#endif
+	}
+	inline I32x4 I32FromU32(const U32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return I32x4(REINTERPRET_U32_TO_I32_SIMD(vector.v));
+		#else
+			return I32x4((int32_t)vector.emulated[0], (int32_t)vector.emulated[1], (int32_t)vector.emulated[2], (int32_t)vector.emulated[3]);
+		#endif
+	}
+	inline U32x4 U32FromI32(const I32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(REINTERPRET_I32_TO_U32_SIMD(vector.v));
+		#else
+			return U32x4((uint32_t)vector.emulated[0], (uint32_t)vector.emulated[1], (uint32_t)vector.emulated[2], (uint32_t)vector.emulated[3]);
+		#endif
+	}
+	// Warning! Behaviour depends on endianness.
+	inline U8x16 reinterpret_U8FromU32(const U32x4& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(REINTERPRET_U32_TO_U8_SIMD(vector.v));
+		#else
+			const uint8_t *source = (const uint8_t*)vector.emulated;
+			return U8x16(
+			  source[0], source[1], source[2], source[3], source[4], source[5], source[6], source[7],
+			  source[8], source[9], source[10], source[11], source[12], source[13], source[14], source[15]
+			);
+		#endif
+	}
+	// Warning! Behaviour depends on endianness.
+	inline U32x4 reinterpret_U32FromU8(const U8x16& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(REINTERPRET_U8_TO_U32_SIMD(vector.v));
+		#else
+			const uint32_t *source = (const uint32_t*)vector.emulated;
+			return U32x4(source[0], source[1], source[2], source[3]);
+		#endif
+	}
+
+	// Unpacking to larger integers
+	inline U32x4 lowerToU32(const U16x8& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(U16_LOW_TO_U32_SIMD(vector.v));
+		#else
+			return U32x4(vector.emulated[0], vector.emulated[1], vector.emulated[2], vector.emulated[3]);
+		#endif
+	}
+	inline U32x4 higherToU32(const U16x8& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(U16_HIGH_TO_U32_SIMD(vector.v));
+		#else
+			return U32x4(vector.emulated[4], vector.emulated[5], vector.emulated[6], vector.emulated[7]);
+		#endif
+	}
+	inline U16x8 lowerToU16(const U8x16& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(U8_LOW_TO_U16_SIMD(vector.v));
+		#else
+			return U16x8(
+			  vector.emulated[0], vector.emulated[1], vector.emulated[2], vector.emulated[3],
+			  vector.emulated[4], vector.emulated[5], vector.emulated[6], vector.emulated[7]
+			);
+		#endif
+	}
+	inline U16x8 higherToU16(const U8x16& vector) {
+		#ifdef USE_BASIC_SIMD
+			return U16x8(U8_HIGH_TO_U16_SIMD(vector.v));
+		#else
+			return U16x8(
+			  vector.emulated[8], vector.emulated[9], vector.emulated[10], vector.emulated[11],
+			  vector.emulated[12], vector.emulated[13], vector.emulated[14], vector.emulated[15]
+			);
+		#endif
+	}
+
+	// Saturated packing
+	inline U8x16 saturateToU8(const U16x8& lower, const U16x8& upper) {
+		#ifdef USE_BASIC_SIMD
+			return U8x16(PACK_SAT_U16_TO_U8(lower.v, upper.v));
+		#else
+			return U8x16(
+			  saturateToU8(lower.emulated[0]),
+			  saturateToU8(lower.emulated[1]),
+			  saturateToU8(lower.emulated[2]),
+			  saturateToU8(lower.emulated[3]),
+			  saturateToU8(lower.emulated[4]),
+			  saturateToU8(lower.emulated[5]),
+			  saturateToU8(lower.emulated[6]),
+			  saturateToU8(lower.emulated[7]),
+			  saturateToU8(upper.emulated[0]),
+			  saturateToU8(upper.emulated[1]),
+			  saturateToU8(upper.emulated[2]),
+			  saturateToU8(upper.emulated[3]),
+			  saturateToU8(upper.emulated[4]),
+			  saturateToU8(upper.emulated[5]),
+			  saturateToU8(upper.emulated[6]),
+			  saturateToU8(upper.emulated[7])
+			);
+		#endif
+	}
+
+#endif
+

+ 114 - 0
Source/DFPSR/base/simd3D.h

@@ -0,0 +1,114 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "simd.h"
+#include "../math/FVector.h"
+
+// Linear 3D algebra for operating on 4 unrelated pixels in parallel.
+//   Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
+//     This module builds on top of simd.h for higher levels of abstraction.
+//   The 4D SIMD vectors are stored as matrix rows, but the 3D math vectors are stored as the columns.
+//     This allow treating each SIMD vector as a separate scalar element when
+//     abstracting away the fact that we're operating on 4 pixels at a time.
+//     Therefore less waste on padding when you only need 3 dimensions.
+//     No need to rely on custom SIMD instructions that doesn't exist in the other set.
+//     The only penalty is having to run all the operations together.
+
+#ifndef DFPSR_SIMD_3D
+#define DFPSR_SIMD_3D
+
+struct F32x4x3 {
+	F32x4 v1, v2, v3;
+	// Direct constructor given 3 rows of length 4
+	F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
+	: v1(v1), v2(v2), v3(v3) {}
+	// Transposed constructor given 4 columns of length 3
+	F32x4x3(const dsr::FVector3D& vx, const dsr::FVector3D& vy, const dsr::FVector3D& vz, const dsr::FVector3D& vw)
+	: v1(F32x4(vx.x, vy.x, vz.x, vw.x)),
+	  v2(F32x4(vx.y, vy.y, vz.y, vw.y)),
+	  v3(F32x4(vx.z, vy.z, vz.z, vw.z)) {}
+	// Transposed constructor given a single repeated column
+	F32x4x3(const dsr::FVector3D& v)
+	: v1(F32x4(v.x, v.x, v.x, v.x)),
+	  v2(F32x4(v.y, v.y, v.y, v.y)),
+	  v3(F32x4(v.z, v.z, v.z, v.z)) {}
+	// In-place math operations
+	inline F32x4x3& operator+=(const F32x4x3& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; }
+	inline F32x4x3& operator-=(const F32x4x3& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; }
+	inline F32x4x3& operator*=(const F32x4x3& offset) { this->v1 = this->v1 * offset.v1; this->v2 = this->v2 * offset.v2; this->v3 = this->v3 * offset.v3; return *this; }
+	inline F32x4x3& operator+=(const F32x4& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; }
+	inline F32x4x3& operator-=(const F32x4& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; }
+	inline F32x4x3& operator*=(const F32x4& offset) { this->v1 = this->v1 * offset; this->v2 = this->v2 * offset; this->v3 = this->v3 * offset; return *this; }
+	inline F32x4x3& operator+=(const float& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; }
+	inline F32x4x3& operator-=(const float& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; }
+	inline F32x4x3& operator*=(const float& offset) { this->v1 = this->v1 * offset; this->v2 = this->v2 * offset; this->v3 = this->v3 * offset; return *this; }
+};
+
+inline F32x4x3 operator+(const F32x4x3 &left, const F32x4x3 &right) {
+	return F32x4x3(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3);
+}
+inline F32x4x3 operator+(const F32x4x3 &left, const F32x4 &right) {
+	return F32x4x3(left.v1 + right, left.v2 + right, left.v3 + right);
+}
+inline F32x4x3 operator+(const F32x4x3 &left, const float &right) {
+	return F32x4x3(left.v1 + right, left.v2 + right, left.v3 + right);
+}
+
+inline F32x4x3 operator-(const F32x4x3 &left, const F32x4x3 &right) {
+	return F32x4x3(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3);
+}
+inline F32x4x3 operator-(const F32x4x3 &left, const F32x4 &right) {
+	return F32x4x3(left.v1 - right, left.v2 - right, left.v3 - right);
+}
+inline F32x4x3 operator-(const F32x4x3 &left, const float &right) {
+	return F32x4x3(left.v1 - right, left.v2 - right, left.v3 - right);
+}
+
+inline F32x4x3 operator*(const F32x4x3 &left, const F32x4x3 &right) {
+	return F32x4x3(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3);
+}
+inline F32x4x3 operator*(const F32x4x3 &left, const F32x4 &right) {
+	return F32x4x3(left.v1 * right, left.v2 * right, left.v3 * right);
+}
+inline F32x4x3 operator*(const F32x4x3 &left, const float &right) {
+	return F32x4x3(left.v1 * right, left.v2 * right, left.v3 * right);
+}
+
+inline F32x4 dotProduct(const F32x4x3 &a, const F32x4x3 &b) {
+	return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3);
+}
+
+inline F32x4 squareLength(const F32x4x3 &v) {
+	return dotProduct(v, v);
+}
+
+inline F32x4 length(const F32x4x3 &v) {
+	return squareLength(v).squareRoot();
+}
+
+inline F32x4x3 normalize(const F32x4x3 &v) {
+	return v * squareLength(v).reciprocalSquareRoot();
+}
+
+#endif
+

+ 84 - 0
Source/DFPSR/base/simdExtra.h

@@ -0,0 +1,84 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+// An advanced high performance extension to the simpler simd.h
+//    The caller is expected to write the reference implementation separatelly for unhandled target machines.
+//        Because the code is not as clean as when using infix math operations from simd.h,
+//        so you will need to write a separate scalar version anyway for documentating the behaviour.
+//    This module can only be used when the USE_SIMD_EXTRA macro is defined.
+//        This allow USE_SIMD_EXTRA to be more picky about which SIMD instruction sets to use
+//        in order to get access to a larger intersection between the platforms.
+//        It also keeps simd.h easy to port and emulate.
+//    Works directly with simd vectors using aliases, instead of the wrappers.
+//        This makes it easier to mix directly with SIMD intrinsics for a specific target.
+
+#ifndef DFPSR_SIMD_EXTRA
+#define DFPSR_SIMD_EXTRA
+	#include "simd.h"
+
+	#ifdef USE_SSE2
+		#define USE_SIMD_EXTRA
+		//struct SIMD_F32x4x2 {
+		//	SIMD_F32x4 val[2];
+		//};
+		//struct SIMD_U16x8x2 {
+		//	SIMD_U16x8 val[2];
+		//};
+		struct SIMD_U32x4x2 {
+			SIMD_U32x4 val[2];
+		};
+		//struct SIMD_I32x4x2 {
+		//	SIMD_I32x4 val[2];
+		//};
+		static inline SIMD_U32x4x2 ZIP_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			ALIGN16 SIMD_U32x4x2 result;
+			result.val[0] = _mm_unpacklo_epi32(lower, higher);
+			result.val[1] = _mm_unpackhi_epi32(lower, higher);
+			return result;
+		}
+		static inline SIMD_U32x4 ZIP_LOW_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			return _mm_unpacklo_epi32(lower, higher);
+		}
+		static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			return _mm_unpackhi_epi32(lower, higher);
+		}
+	#elif USE_NEON
+		#define USE_SIMD_EXTRA
+		// TODO: Write regression tests and try simdExtra.h with NEON activated
+		//#define SIMD_F32x4x2 float32x4x2_t
+		//#define SIMD_U16x8x2 uint16x8x2_t
+		#define SIMD_U32x4x2 uint32x4x2_t
+		//#define SIMD_I32x4x2 int32x4x2_t
+		static inline SIMD_U32x4x2 ZIP_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			return vzipq_u32(lower, higher);
+		}
+		static inline SIMD_U32x4 ZIP_LOW_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			//return vzipq_u32(lower, higher).val[0];
+			return float32x2x2_t vzip_u32(vget_low_u32(lower), vget_low_u32(higher));
+		}
+		static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
+			//return vzipq_u32(lower, higher).val[1];
+			return float32x2x2_t vzip_u32(vget_high_u32(lower), vget_high_u32(higher));
+		}
+	#endif
+#endif

+ 772 - 0
Source/DFPSR/base/text.cpp

@@ -0,0 +1,772 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "text.h"
+#include <fstream>
+#include <streambuf>
+#include <cstring>
+#include <stdexcept>
+
+using namespace dsr;
+
+static int strlen_utf16(const char32_t *content) {
+	int length = 0;
+	while (content[length] != 0) {
+		length++;
+	}
+	return length;
+}
+
+static char toAscii(DsrChar c) {
+	if (c > 127) {
+		return '?';
+	} else {
+		return c;
+	}
+}
+
+static bool isWhiteSpace(DsrChar c) {
+	return c <= U' ' || c == U'\t' || c == U'\r';
+}
+
+int ReadableString::findFirst(DsrChar toFind, int startIndex) const {
+	for (int i = startIndex; i < this->length(); i++) {
+		if (this->readSection[i] == toFind) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+int ReadableString::findLast(DsrChar toFind) const {
+	for (int i = this->length() - 1; i >= 0; i--) {
+		if (this->readSection[i] == toFind) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+ReadableString ReadableString::exclusiveRange(int inclusiveStart, int exclusiveEnd) const {
+	return this->getRange(inclusiveStart, exclusiveEnd - inclusiveStart);
+}
+
+ReadableString ReadableString::inclusiveRange(int inclusiveStart, int inclusiveEnd) const {
+	return this->getRange(inclusiveStart, inclusiveEnd + 1 - inclusiveStart);
+}
+
+ReadableString ReadableString::before(int exclusiveEnd) const {
+	return this->exclusiveRange(0, exclusiveEnd);
+}
+
+ReadableString ReadableString::until(int inclusiveEnd) const {
+	return this->inclusiveRange(0, inclusiveEnd);
+}
+
+ReadableString ReadableString::from(int inclusiveStart) const {
+	return this->exclusiveRange(inclusiveStart, this->length());
+}
+
+ReadableString ReadableString::after(int exclusiveStart) const {
+	return this->from(exclusiveStart + 1);
+}
+
+List<ReadableString> ReadableString::split(DsrChar separator) const {
+	List<ReadableString> result;
+	int lineStart = 0;
+	for (int i = 0; i < this->length(); i++) {
+		DsrChar c = this->readSection[i];
+		if (c == separator) {
+			result.push(this->exclusiveRange(lineStart, i));
+			lineStart = i + 1;
+		}
+	}
+	if (this->length() > lineStart) {
+		result.push(this->exclusiveRange(lineStart, this->length()));;
+	}
+	return result;
+}
+
+int64_t ReadableString::toInteger() const {
+	int64_t result;
+	bool negated;
+	result = 0;
+	negated = false;
+	for (int i = 0; i < this->length(); i++) {
+		DsrChar c = this->readSection[i];
+		if (c == '-' || c == '~') {
+			negated = !negated;
+		} else if (c >= '0' && c <= '9') {
+			result = (result * 10) + (int)(c - '0');
+		} else if (c == ',' || c == '.') {
+			// Truncate any decimals by ignoring them
+			break;
+		}
+	}
+	if (negated) {
+		return -result;
+	} else {
+		return result;
+	}
+}
+
+double ReadableString::toDouble() const {
+	double result;
+	bool negated;
+	bool reachedDecimal;
+	int digitDivider;
+	result = 0.0;
+	negated = false;
+	reachedDecimal = false;
+	digitDivider = 1;
+	for (int i = 0; i < this->length(); i++) {
+		DsrChar c = this->readSection[i];
+		if (c == '-' || c == '~') {
+			negated = !negated;
+		} else if (c >= '0' && c <= '9') {
+			if (reachedDecimal) {
+				digitDivider = digitDivider * 10;
+				result = result + ((double)(c - '0') / (double)digitDivider);
+			} else {
+				result = (result * 10) + (double)(c - '0');
+			}
+		} else if (c == ',' || c == '.') {
+			reachedDecimal = true;
+		}
+	}
+	if (negated) {
+		return -result;
+	} else {
+		return result;
+	}
+}
+
+String& Printable::toStream(String& target) const {
+	return this->toStreamIndented(target, U"");
+}
+
+String Printable::toStringIndented(const ReadableString& indentation) const {
+	String result;
+	this->toStreamIndented(result, indentation);
+	return result;
+}
+
+String Printable::toString() const {
+	return this->toStringIndented(U"");
+}
+
+std::ostream& Printable::toStreamIndented(std::ostream& out, const ReadableString& indentation) const {
+	String result;
+	this->toStreamIndented(result, indentation);
+	for (int i = 0; i < result.length(); i++) {
+		out.put(toAscii(result.read(i)));
+	}
+	return out;
+}
+
+std::ostream& Printable::toStream(std::ostream& out) const {
+	return this->toStreamIndented(out, U"");
+}
+
+std::string Printable::toStdString() const {
+	std::ostringstream result;
+	this->toStream(result);
+	return result.str();
+}
+
+bool dsr::string_match(const ReadableString& a, const ReadableString& b) {
+	if (a.length() != b.length()) {
+		return false;
+	} else {
+		for (int i = 0; i < a.length(); i++) {
+			if (a.read(i) != b.read(i)) {
+				return false;
+			}
+		}
+		return true;
+	}
+}
+
+bool dsr::string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b) {
+	if (a.length() != b.length()) {
+		return false;
+	} else {
+		for (int i = 0; i < a.length(); i++) {
+			if (towupper(a.read(i)) != towupper(b.read(i))) {
+				return false;
+			}
+		}
+		return true;
+	}
+}
+
+std::ostream& ReadableString::toStream(std::ostream& out) const {
+	for (int i = 0; i < this->length(); i++) {
+		out.put(toAscii(this->read(i)));
+	}
+	return out;
+}
+
+std::string ReadableString::toStdString() const {
+	std::ostringstream result;
+	this->toStream(result);
+	return result.str();
+}
+
+String dsr::string_upperCase(const ReadableString &text) {
+	String result;
+	result.reserve(text.length());
+	for (int i = 0; i < text.length(); i++) {
+		result.appendChar(towupper(text[i]));
+	}
+	return result;
+}
+
+String dsr::string_lowerCase(const ReadableString &text) {
+	String result;
+	result.reserve(text.length());
+	for (int i = 0; i < text.length(); i++) {
+		result.appendChar(towlower(text[i]));
+	}
+	return result;
+}
+
+String dsr::string_removeAllWhiteSpace(const ReadableString &text) {
+	String result;
+	result.reserve(text.length());
+	for (int i = 0; i < text.length(); i++) {
+		DsrChar c = text[i];
+		if (!isWhiteSpace(c)) {
+			result.appendChar(c);
+		}
+	}
+	return result;
+}
+
+ReadableString dsr::string_removeOuterWhiteSpace(const ReadableString &text) {
+	int first = -1;
+	int last = -1;
+	for (int i = 0; i < text.length(); i++) {
+		DsrChar c = text[i];
+		if (!isWhiteSpace(c)) {
+			first = i;
+			break;
+		}
+	}
+	for (int i = text.length() - 1; i >= 0; i--) {
+		DsrChar c = text[i];
+		if (!isWhiteSpace(c)) {
+			last = i;
+			break;
+		}
+	}
+	if (first == -1) {
+		// Only white space
+		return ReadableString();
+	} else {
+		// Subset
+		return text.inclusiveRange(first, last);
+	}
+}
+
+int64_t dsr::string_parseInteger(const ReadableString& content) {
+	return content.toInteger();
+}
+
+double dsr::string_parseDouble(const ReadableString& content) {
+	return content.toDouble();
+}
+
+String dsr::string_mangleQuote(const ReadableString &rawText) {
+	String result;
+	result.reserve(rawText.length() + 2);
+	result.appendChar(U'\"'); // Begin quote
+	for (int i = 0; i < rawText.length(); i++) {
+		DsrChar c = rawText[i];
+		if (c == U'\"') { // Double quote
+			result.append(U"\\\"");
+		} else if (c == U'\\') { // Backslash
+			result.append(U"\\\\");
+		} else if (c == U'\a') { // Audible bell
+			result.append(U"\\a");
+		} else if (c == U'\b') { // Backspace
+			result.append(U"\\b");
+		} else if (c == U'\f') { // Form feed
+			result.append(U"\\f");
+		} else if (c == U'\n') { // Line feed
+			result.append(U"\\n");
+		} else if (c == U'\r') { // Carriage return
+			result.append(U"\\r");
+		} else if (c == U'\t') { // Horizontal tab
+			result.append(U"\\t");
+		} else if (c == U'\v') { // Vertical tab
+			result.append(U"\\v");
+		} else if (c == U'\0') { // Null terminator
+			result.append(U"\\0");
+		} else {
+			result.appendChar(c);
+		}
+	}
+	result.appendChar(U'\"'); // End quote
+	return result;
+}
+
+String dsr::string_unmangleQuote(const ReadableString& mangledText) {
+	int firstQuote = mangledText.findFirst('\"');
+	int lastQuote = mangledText.findLast('\"');
+	String result;
+	if (firstQuote == -1 || lastQuote == -1 || firstQuote == lastQuote) {
+		throwError(U"Cannot unmangle using string_unmangleQuote without beginning and ending with quote signs!\n");
+	} else {
+		for (int i = firstQuote + 1; i < lastQuote; i++) {
+			DsrChar c = mangledText[i];
+			if (c == U'\\') { // Escape character
+				DsrChar c2 = mangledText[i + 1];
+				if (c2 == U'\"') { // Double quote
+					result.appendChar(U'\"');
+				} else if (c2 == U'\\') { // Back slash
+					result.appendChar(U'\\');
+				} else if (c2 == U'a') { // Audible bell
+					result.appendChar(U'\a');
+				} else if (c2 == U'b') { // Backspace
+					result.appendChar(U'\b');
+				} else if (c2 == U'f') { // Form feed
+					result.appendChar(U'\f');
+				} else if (c2 == U'n') { // Line feed
+					result.appendChar(U'\n');
+				} else if (c2 == U'r') { // Carriage return
+					result.appendChar(U'\r');
+				} else if (c2 == U't') { // Horizontal tab
+					result.appendChar(U'\t');
+				} else if (c2 == U'v') { // Vertical tab
+					result.appendChar(U'\v');
+				} else if (c2 == U'0') { // Null terminator
+					result.appendChar(U'\0');
+				}
+				i++; // Consume both characters
+			} else {
+				// Detect bad input
+				if (c == U'\"') { // Double quote
+					 throwError(U"Unmangled double quote sign detected in string_unmangleQuote!\n");
+				} else if (c == U'\\') { // Back slash
+					 throwError(U"Unmangled back slash detected in string_unmangleQuote!\n");
+				} else if (c == U'\a') { // Audible bell
+					 throwError(U"Unmangled audible bell detected in string_unmangleQuote!\n");
+				} else if (c == U'\b') { // Backspace
+					 throwError(U"Unmangled backspace detected in string_unmangleQuote!\n");
+				} else if (c == U'\f') { // Form feed
+					 throwError(U"Unmangled form feed detected in string_unmangleQuote!\n");
+				} else if (c == U'\n') { // Line feed
+					 throwError(U"Unmangled line feed detected in string_unmangleQuote!\n");
+				} else if (c == U'\r') { // Carriage return
+					 throwError(U"Unmangled carriage return detected in string_unmangleQuote!\n");
+				} else if (c == U'\t') { // Horizontal tab
+					 throwError(U"Unmangled horizontal tab detected in string_unmangleQuote!\n");
+				} else if (c == U'\v') { // Vertical tab
+					 throwError(U"Unmangled vertical tab detected in string_unmangleQuote!\n");
+				} else if (c == U'\0') { // Null terminator
+					 throwError(U"Unmangled null terminator detected in string_unmangleQuote!\n");
+				} else {
+					result.appendChar(c);
+				}
+			}
+		}
+	}
+	return result;
+}
+
+void dsr::uintToString_arabic(String& target, uint64_t value) {
+	static const int bufferSize = 20;
+	DsrChar digits[bufferSize];
+	int usedSize = 0;
+	if (value == 0) {
+		target.appendChar(U'0');
+	} else {
+		while (usedSize < bufferSize) {
+			DsrChar digit = U'0' + (value % 10u);
+			digits[usedSize] = digit;
+			usedSize++;
+			value /= 10u;
+			if (value == 0) {
+				break;
+			}
+		}
+		while (usedSize > 0) {
+			usedSize--;
+			target.appendChar(digits[usedSize]);
+		}
+	}
+}
+
+void dsr::intToString_arabic(String& target, int64_t value) {
+	if (value >= 0) {
+		uintToString_arabic(target, (uint64_t)value);
+	} else {
+		target.appendChar(U'-');
+		uintToString_arabic(target, (uint64_t)(-value));
+	}
+}
+
+// TODO: Implement own version to ensure that nothing strange is happening from buggy std implementations
+void dsr::doubleToString_arabic(String& target, double value) {
+	std::ostringstream buffer;
+	buffer << std::fixed << value; // Generate using a fixed number of decimals
+	std::string result = buffer.str();
+	// Remove trailing zero decimal digits
+	int decimalCount = 0;
+	int lastValueIndex = -1;
+	for (int c = 0; c < (int)result.length(); c++) {
+		if (result[c] == '.') {
+			decimalCount++;
+		} else if (result[c] == ',') {
+			result[c] = '.'; // Convert nationalized french decimal serialization into international decimals
+			decimalCount++;
+		} else if (decimalCount > 0 && result[c] >= '1' && result[c] <= '9') {
+			lastValueIndex = c;
+		} else  if (decimalCount == 0 && result[c] >= '0' && result[c] <= '9') {
+			lastValueIndex = c;
+		}
+	}
+	for (int c = 0; c <= lastValueIndex; c++) {
+		target.appendChar(result[c]);
+	}
+}
+
+#define TO_RAW_ASCII(TARGET, SOURCE) \
+	char TARGET[SOURCE.length() + 1]; \
+	for (int i = 0; i < SOURCE.length(); i++) { \
+		TARGET[i] = toAscii(SOURCE[i]); \
+	} \
+	TARGET[SOURCE.length()] = '\0';
+
+String dsr::string_load(const ReadableString& filename) {
+	// TODO: Load files using Unicode filenames
+	TO_RAW_ASCII(asciiFilename, filename);
+	std::ifstream inputFile(asciiFilename);
+	if (inputFile.is_open()) {
+		std::stringstream outputBuffer;
+		// TODO: Feed directly to String
+		outputBuffer << inputFile.rdbuf();
+		std::string content = outputBuffer.str();
+		String result;
+		result.reserve(content.size());
+		for (int i = 0; i < (int)(content.size()); i++) {
+			result.appendChar(content[i]);
+		}
+		inputFile.close();
+		return result;
+	} else {
+		throwError("Failed to load ", filename, "\n");
+		return U"";
+	}
+}
+
+void dsr::string_save(const ReadableString& filename, const ReadableString& content) {
+	// TODO: Load files using Unicode filenames
+	TO_RAW_ASCII(asciiFilename, filename);
+	TO_RAW_ASCII(asciiContent, content);
+	std::ofstream outputFile;
+	outputFile.open(asciiFilename);
+	if (outputFile.is_open()) {
+		outputFile << asciiContent;
+		outputFile.close();
+	} else {
+		throwError("Failed to save ", filename, "\n");
+	}
+}
+
+const char32_t* dsr::file_separator() {
+	#ifdef _WIN32
+		return U"\\";
+	#else
+		return U"/";
+	#endif
+}
+
+int ReadableString::length() const {
+	return this->sectionLength;
+}
+
+bool ReadableString::checkBound(int start, int length, bool warning) const {
+	if (start < 0 || start + length > this->length()) {
+		if (warning) {
+			String message;
+			string_append(message, U"\n");
+			string_append(message, U" _____________________ Sub-string bound exception! _____________________\n");
+			string_append(message, U"/\n");
+			string_append(message, U"|  Characters from ", start, U" to ", (start + length - 1), U" are out of bound!\n");
+			string_append(message, U"|  In source string of 0..", (this->length() - 1), U".\n");
+			string_append(message, U"\\_______________________________________________________________________\n");
+			throwError(message);
+		}
+		return false;
+	} else {
+		return true;
+	}
+}
+
+DsrChar ReadableString::read(int index) const {
+	if (index < 0 || index >= this->sectionLength) {
+		return '\0';
+	} else {
+		return this->readSection[index];
+	}
+}
+
+DsrChar ReadableString::operator[] (int index) const { return this->read(index); }
+
+Printable::~Printable() {}
+
+ReadableString::ReadableString() {}
+ReadableString::~ReadableString() {}
+
+ReadableString::ReadableString(const DsrChar *content, int sectionLength)
+: readSection(content), sectionLength(sectionLength) {}
+
+ReadableString::ReadableString(const DsrChar *content)
+: readSection(content), sectionLength(strlen_utf16(content)) {}
+
+String::String() {}
+String::String(const char* source) { this->append(source); }
+String::String(const char32_t* source) { this->append(source); }
+String::String(const std::string& source) { this->append(source); }
+String::String(const ReadableString& source) { this->append(source); }
+String::String(const String& source) { this->append(source); }
+
+String::String(std::shared_ptr<Buffer> buffer, DsrChar *content, int sectionLength)
+ : ReadableString(content, sectionLength), buffer(buffer), writeSection(content) {}
+
+int String::capacity() {
+	if (this->buffer.get() == nullptr) {
+		return 0;
+	} else {
+		// Get the parent allocation
+		uint8_t* parentBuffer = this->buffer->getUnsafeData();
+		// Get the offset from the parent
+		intptr_t offset = (uint8_t*)this->writeSection - parentBuffer;
+		// Subtract offset from the buffer size to get the remaining space
+		return (this->buffer->size - offset) / sizeof(DsrChar);
+	}
+}
+
+ReadableString ReadableString::getRange(int start, int length) const {
+	if (length < 1) {
+		return ReadableString();
+	} else if (this->checkBound(start, length)) {
+		return ReadableString(&(this->readSection[start]), length);
+	} else {
+		return ReadableString();
+	}
+}
+
+ReadableString String::getRange(int start, int length) const {
+	if (length < 1) {
+		return ReadableString();
+	} else if (this->checkBound(start, length)) {
+		return String(this->buffer, &(this->writeSection[start]), length);
+	} else {
+		return ReadableString();
+	}
+}
+
+static int32_t getNewBufferSize(int32_t minimumSize) {
+	if (minimumSize <= 128) {
+		return 128;
+	} else if (minimumSize <= 512) {
+		return 512;
+	} else if (minimumSize <= 2048) {
+		return 2048;
+	} else if (minimumSize <= 8192) {
+		return 8192;
+	} else if (minimumSize <= 32768) {
+		return 32768;
+	} else if (minimumSize <= 131072) {
+		return 131072;
+	} else if (minimumSize <= 524288) {
+		return 524288;
+	} else if (minimumSize <= 2097152) {
+		return 2097152;
+	} else if (minimumSize <= 8388608) {
+		return 8388608;
+	} else if (minimumSize <= 33554432) {
+		return 33554432;
+	} else if (minimumSize <= 134217728) {
+		return 134217728;
+	} else if (minimumSize <= 536870912) {
+		return 536870912;
+	} else {
+		return 2147483647;
+	}
+}
+void String::reallocateBuffer(int32_t newLength, bool preserve) {
+	// Holding oldData alive while copying to the new buffer
+	std::shared_ptr<Buffer> oldBuffer = this->buffer;
+	const char32_t* oldData = this->readSection;
+	this->buffer = std::make_shared<Buffer>(getNewBufferSize(newLength * sizeof(DsrChar)));
+	this->readSection = this->writeSection = reinterpret_cast<char32_t*>(this->buffer->getUnsafeData());
+	if (preserve && oldData) {
+		memcpy(this->writeSection, oldData, this->sectionLength * sizeof(DsrChar));
+	}
+}
+
+// Call before writing to the buffer
+//   This hides that Strings share buffers when assigning by value or taking partial strings
+void String::cloneIfShared() {
+	if (this->buffer.use_count() > 1) {
+		this->reallocateBuffer(this->sectionLength, true);
+	}
+}
+
+void String::expand(int32_t newLength, bool affectUsedLength) {
+	if (newLength > this->sectionLength) {
+		if (newLength > this->capacity()) {
+			this->reallocateBuffer(newLength, true);
+		}
+	}
+	if (affectUsedLength) {
+		this->sectionLength = newLength;
+	}
+}
+
+void String::reserve(int32_t minimumLength) {
+	this->expand(minimumLength, false);
+}
+
+void String::write(int index, DsrChar value) {
+	this->cloneIfShared();
+	if (index < 0 || index >= this->sectionLength) {
+		// TODO: Give a warning
+	} else {
+		this->writeSection[index] = value;
+	}
+}
+
+void String::clear() {
+	this->sectionLength = 0;
+}
+
+// This macro has to be used because a static template wouldn't be able to inherit access to private methods from the target class.
+//   Better to use a macro without type safety in the implementation than to expose yet another template in a global header.
+// Proof that appending to one string doesn't affect another:
+//   If it has to reallocate
+//     * Then it will have its own buffer without conflicts
+//   If it doesn't have to reallocate
+//     If it shares the buffer
+//       If source is empty
+//         * Then no risk of overwriting neighbor strings if we don't write
+//       If source isn't empty
+//         * Then the buffer will be cloned when the first character is written
+//     If it doesn't share the buffer
+//       * Then no risk of writing
+#define APPEND(TARGET, SOURCE, LENGTH) { \
+	int oldLength = (TARGET)->length(); \
+	(TARGET)->expand(oldLength + (int)(LENGTH), true); \
+	for (int i = 0; i < (int)(LENGTH); i++) { \
+		(TARGET)->write(oldLength + i, (SOURCE)[i]); \
+	} \
+}
+// TODO: See if ascii litterals can be checked for values above 127 in compile-time
+void String::append(const char* source) { APPEND(this, source, strlen(source)); }
+// TODO: Use memcpy when appending input of the same format
+void String::append(const ReadableString& source) { APPEND(this, source, source.length()); }
+void String::append(const char32_t* source) { APPEND(this, source, strlen_utf16(source)); }
+void String::append(const std::string& source) { APPEND(this, source.c_str(), (int)source.size()); }
+void String::appendChar(DsrChar source) { APPEND(this, &source, 1); }
+
+String& dsr::string_toStreamIndented(String& target, const Printable& source, const ReadableString& indentation) {
+	return source.toStreamIndented(target, indentation);
+}
+String& dsr::string_toStreamIndented(String& target, const char* value, const ReadableString& indentation) {
+	target.append(indentation);
+	target.append(value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation) {
+	target.append(indentation);
+	target.append(value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const char32_t* value, const ReadableString& indentation) {
+	target.append(indentation);
+	target.append(value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const std::string& value, const ReadableString& indentation) {
+	target.append(indentation);
+	target.append(value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const float& value, const ReadableString& indentation) {
+	target.append(indentation);
+	doubleToString_arabic(target, (double)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const double& value, const ReadableString& indentation) {
+	target.append(indentation);
+	doubleToString_arabic(target, value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const int64_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	intToString_arabic(target, value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const uint64_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	uintToString_arabic(target, value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const int32_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	intToString_arabic(target, (int64_t)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const uint32_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	uintToString_arabic(target, (uint64_t)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const int16_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	intToString_arabic(target, (int64_t)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const uint16_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	uintToString_arabic(target, (uint64_t)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const int8_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	intToString_arabic(target, (int64_t)value);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const uint8_t& value, const ReadableString& indentation) {
+	target.append(indentation);
+	uintToString_arabic(target, (uint64_t)value);
+	return target;
+}
+
+void dsr::throwErrorMessage(const String& message) {
+	throw std::runtime_error(message.toStdString());
+}

+ 304 - 0
Source/DFPSR/base/text.h

@@ -0,0 +1,304 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_BASE_TEXT
+#define DFPSR_BASE_TEXT
+
+#include <stdint.h>
+#include <string>
+
+// TODO: Try to hide in the implementation
+#include <iostream>
+#include <sstream>
+
+#include "Buffer.h"
+#include "../collection/List.h"
+
+namespace dsr {
+
+using DsrChar = char32_t;
+
+class ReadableString {
+protected:
+	// A local pointer to the sub-allocation
+	const char32_t* readSection = nullptr;
+	// The length of the current string in characters
+	int sectionLength = 0;
+public:
+	int length() const;
+	DsrChar read(int index) const;
+	DsrChar operator[] (int index) const;
+public:
+	// Empty string
+	ReadableString();
+	// Destructor
+	virtual ~ReadableString();
+	// UTF-32 litteral from U""
+	// WARNING! May crash if content is freed, even if ReadableString is freed before
+	//          ReadableString may share its buffer with sub-strings of the same type
+	ReadableString(const DsrChar *content);
+protected:
+	// Returns true iff the range is safely inside of the string
+	bool checkBound(int start, int length, bool warning = true) const;
+	// Internal constructor
+	ReadableString(const DsrChar *content, int sectionLength);
+	// Create a string from an existing string
+	// When there's no reference counter, it's important that the memory remains allocated until the application terminates
+	// Just like when reading elements in a for loop, out-of-range only causes an exception if length > 0
+	//   Length lesser than 1 will always return an empty string
+	virtual ReadableString getRange(int start, int length) const;
+public:
+	// Converting to unknown character encoding using only the ascii character subset
+	// A bug in GCC linking forces these to be virtual
+	virtual std::ostream& toStream(std::ostream& out) const;
+	virtual std::string toStdString() const;
+public:
+	// Get the index of the first character in content matching toFind, or -1 if it doesn't exist.
+	int findFirst(DsrChar toFind, int startIndex = 0) const;
+	// Get the index of the last character in content matching toFind, or -1 if it doesn't exist.
+	int findLast(DsrChar toFind) const;
+	// Exclusive intervals represent the divisions between characters |⁰ A |¹ B |² C |³...
+	//   0..2 of "ABC" then equals "AB", which has length 2 just like the index difference
+	//   0..3 gets the whole "ABC" range, by starting from zero and ending with the character count
+	ReadableString exclusiveRange(int inclusiveStart, int exclusiveEnd) const;
+	// Inclusive intervals represent whole characters | A⁰ | B¹ | C² |...
+	//   0..2 of "ABC" then equals "ABC", by taking character 0 (A), 1 (B) and 2 (C)
+	ReadableString inclusiveRange(int inclusiveStart, int inclusiveEnd) const;
+	// Simplified ranges
+	ReadableString before(int exclusiveEnd) const;
+	ReadableString until(int inclusiveEnd) const;
+	ReadableString from(int inclusiveStart) const;
+	ReadableString after(int exclusiveStart) const;
+	// Split into a list of strings without allocating any new text buffers
+	//   The result can be kept after the original string has been freed, because the buffer is reference counted
+	List<ReadableString> split(DsrChar separator) const;
+	// Value conversion
+	int64_t toInteger() const;
+	double toDouble() const;
+};
+
+class String;
+
+// Reusable conversion methods
+void uintToString_arabic(String& target, uint64_t value);
+void intToString_arabic(String& target, int64_t value);
+void doubleToString_arabic(String& target, double value);
+
+// Used as format tags around numbers passed to string_append or string_combine
+// New types can implement printing to String by making wrappers from this class
+class Printable {
+public:
+	// The method for appending the printable object into the target string
+	virtual String& toStreamIndented(String& target, const ReadableString& indentation) const = 0;
+	String& toStream(String& target) const;
+	String toStringIndented(const ReadableString& indentation) const;
+	String toString() const;
+	std::ostream& toStreamIndented(std::ostream& out, const ReadableString& indentation) const;
+	std::ostream& toStream(std::ostream& out) const;
+	std::string toStdString() const;
+	virtual ~Printable();
+};
+
+// A safe and simple string type
+//   Can be constructed from ascii litterals "", but U"" is more universal
+//   Can be used without ReadableString, but ReadableString can be wrapped over U"" litterals without allocation
+//   UTF-32
+//     Endianness is native
+//     No combined characters allowed, use precomposed instead, so that the strings can guarantee a fixed character size
+class String : public ReadableString {
+protected:
+	// A reference counted pointer to the buffer, just to keep the allocation
+	std::shared_ptr<Buffer> buffer;
+	// Same as readSection, but with write access
+	char32_t* writeSection = nullptr;
+public:
+	// The number of DsrChar characters that can be contained in the allocation before reaching the buffer's end
+	//   This doesn't imply that it's always okay to write to the remaining space, because the buffer may be shared
+	int capacity();
+protected:
+	// Internal constructor
+	String(std::shared_ptr<Buffer> buffer, DsrChar *content, int sectionLength);
+	// Create a string from the existing buffer without allocating any heap memory
+	ReadableString getRange(int start, int length) const override;
+private:
+	// Replaces the buffer with a new buffer holding at least newLength characters
+	// Guarantees that the new buffer is not shared by other strings, so that it may be written to freely
+	void reallocateBuffer(int32_t newLength, bool preserve);
+	// Call before writing to the buffer
+	//   This hides that Strings share buffers when assigning by value or taking partial strings
+	void cloneIfShared();
+	void expand(int32_t newLength, bool affectUsedLength);
+public:
+	// Constructors
+	String();
+	String(const char* source);
+	String(const char32_t* source);
+	String(const std::string& source);
+	String(const ReadableString& source);
+	String(const String& source);
+public:
+	// Ensures safely that at least minimumLength characters can he held in the buffer
+	void reserve(int32_t minimumLength);
+	// Extend the String using more text
+	void append(const char* source);
+	void append(const ReadableString& source);
+	void append(const char32_t* source);
+	void append(const std::string& source);
+	// Extend the String using another character
+	void appendChar(DsrChar source);
+public:
+	// Access
+	void write(int index, DsrChar value);
+	void clear();
+};
+
+// Define this overload for non-virtual source types that cannot inherit from Printable
+String& string_toStreamIndented(String& target, const Printable& source, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const char* value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const ReadableString& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const char32_t* value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const std::string& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const float& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const double& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const int64_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const uint64_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const int32_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const uint32_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const int16_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const uint16_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const int8_t& value, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const uint8_t& value, const ReadableString& indentation);
+
+// Procedural API
+// TODO: Create procedural constructors
+// TODO: Make wrappers around member methods
+String string_load(const ReadableString& filename);
+void string_save(const ReadableString& filename, const ReadableString& content);
+bool string_match(const ReadableString& a, const ReadableString& b);
+bool string_caseInsensitiveMatch(const ReadableString& a, const ReadableString& b);
+String string_upperCase(const ReadableString &text);
+String string_lowerCase(const ReadableString &text);
+String string_removeAllWhiteSpace(const ReadableString &text);
+ReadableString string_removeOuterWhiteSpace(const ReadableString &text);
+int64_t string_parseInteger(const ReadableString& content);
+double string_parseDouble(const ReadableString& content);
+String string_mangleQuote(const ReadableString &rawText);
+String string_unmangleQuote(const ReadableString& mangledText);
+// Append one element
+template<typename TYPE>
+inline void string_append(String& target, TYPE value) {
+	string_toStream(target, value);
+}
+// Append multiple elements
+template<typename HEAD, typename... TAIL>
+inline void string_append(String& target, HEAD head, TAIL... tail) {
+	string_append(target, head);
+	string_append(target, tail...);
+}
+// Combine a number of strings, characters and numbers
+//   If an input type is rejected, create a Printable object to wrap around it
+template<typename... ARGS>
+inline String string_combine(ARGS... args) {
+	String result;
+	string_append(result, args...);
+	return result;
+}
+
+// Operations
+inline String operator+ (const ReadableString& a, const ReadableString& b) { return string_combine(a, b); }
+inline String operator+ (const char32_t* a, const ReadableString& b) { return string_combine(a, b); }
+inline String operator+ (const ReadableString& a, const char32_t* b) { return string_combine(a, b); }
+inline String operator+ (const String& a, const String& b) { return string_combine(a, b); }
+inline String operator+ (const char32_t* a, const String& b) { return string_combine(a, b); }
+inline String operator+ (const String& a, const char32_t* b) { return string_combine(a, b); }
+inline String operator+ (const String& a, const ReadableString& b) { return string_combine(a, b); }
+inline String operator+ (const ReadableString& a, const String& b) { return string_combine(a, b); }
+
+// Print information
+template<typename... ARGS>
+void printText(ARGS... args) {
+	String result = string_combine(args...);
+	result.toStream(std::cout);
+}
+
+// Use for text printing that are useful when debugging but should not be given out in a release
+#ifdef NDEBUG
+	// Supress debugText in release mode
+	template<typename... ARGS>
+	void debugText(ARGS... args) {}
+#else
+	// Print debugText in debug mode
+	template<typename... ARGS>
+	void debugText(ARGS... args) { printText(args...); }
+#endif
+
+// Raise an exception
+//   Only catch errors to display useful error messages, emergency backups or crash logs before terminating
+//   Further execution after a partial transaction will break object invariants
+void throwErrorMessage(const String& message);
+template<typename... ARGS>
+void throwError(ARGS... args) {
+	String result = string_combine(args...);
+	throwErrorMessage(result);
+}
+
+
+// ---------------- Overloaded serialization ----------------
+
+
+// Templates reused for all types
+// The source must inherit from Printable or have its own string_feedIndented overload
+template<typename T>
+String& string_toStream(String& target, const T& source) {
+	return string_toStreamIndented(target, source, U"");
+}
+template<typename T>
+String string_toStringIndented(const T& source, const ReadableString& indentation) {
+	String result;
+	string_toStreamIndented(result, source, indentation);
+	return result;
+}
+template<typename T>
+String string_toString(const T& source) {
+	return string_toStringIndented(source, U"");
+}
+template<typename T>
+std::ostream& string_toStreamIndented(std::ostream& target, const T& source, const ReadableString& indentation) {
+	return target << string_toStringIndented(source, indentation);
+}
+template<typename T>
+std::ostream& string_toStream(std::ostream& target, const T& source) {
+	return target << string_toString(source);
+}
+
+// ---------------- Below uses hard-coded portability for specific operating systems ----------------
+
+
+// Get a path separator for the target operating system.
+const char32_t* file_separator();
+
+
+}
+
+#endif
+

+ 167 - 0
Source/DFPSR/base/threading.cpp

@@ -0,0 +1,167 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "threading.h"
+
+// Requires -pthread for linking
+#include <future>
+#include <thread>
+#include <mutex>
+#include <atomic>
+
+namespace dsr {
+
+// Enable this macro to disable multi-threading
+//   If your application still crashes when using a single thread, it's probably not a concurrency problem
+//#define DISABLE_MULTI_THREADING
+
+// Prevent doing other multi-threaded work at the same time
+//   As a side effect, this makes it safe to use global variables to prevent unsafe use of stack memory
+static std::mutex workLock, getTaskLock;
+static std::atomic<int> nextJobIndex{0};
+
+// TODO: This method really needs a thread pool for starting jobs faster,
+//       but third-party libraries often use low-level platform specific solutions.
+// TODO: Let each worker have one future doing scheduling on it's own to prevent stalling on a scheduling main thread.
+//       When a worker is done with a task, it will use a mutex protected volatile variable to pick the next task from the queue.
+void threadedWorkFromArray(std::function<void()>* jobs, int jobCount) {
+	#ifdef DISABLE_MULTI_THREADING
+		// Reference implementation
+		for (int i = 0; i < jobCount; i++) {
+			jobs[i]();
+		}
+	#else
+		if (jobCount <= 0) {
+			return;
+		} else if (jobCount == 1) {
+			jobs[0]();
+		} else {
+			workLock.lock();
+				nextJobIndex = 0;
+				// Multi-threaded work loop
+				int workerCount = std::min((int)std::thread::hardware_concurrency() - 1, jobCount); // All used threads
+				int helperCount = workerCount - 1; // Excluding the main thread
+				std::function<void()> workers[workerCount];
+				std::future<void> helpers[helperCount];
+				for (int w = 0; w < workerCount; w++) {
+					workers[w] = [jobs, jobCount]() {
+						while (true) {
+							getTaskLock.lock();
+							int taskIndex = nextJobIndex;
+							nextJobIndex++;
+							getTaskLock.unlock();
+							if (taskIndex < jobCount) {
+								jobs[taskIndex]();
+							} else {
+								break;
+							}
+						}
+					};
+				}
+				// Start working in the helper threads
+				for (int h = 0; h < helperCount; h++) {
+					helpers[h] = std::async(std::launch::async, workers[h]);
+				}
+				// Perform the same work on the main thread
+				workers[workerCount - 1]();
+				// Wait for all helpers to complete their work once all tasks have been handed out
+				for (int h = 0; h < helperCount; h++) {
+					if (helpers[h].valid()) {
+						helpers[h].wait();
+					}
+				}
+			workLock.unlock();
+		}
+	#endif
+}
+
+void threadedWorkFromList(List<std::function<void()>> jobs) {
+	threadedWorkFromArray(&jobs[0], jobs.length());
+	jobs.clear();
+}
+
+void threadedSplit(int startIndex, int stopIndex, std::function<void(int startIndex, int stopIndex)> task, int minimumJobSize, int jobsPerThread) {
+	int totalCount = stopIndex - startIndex;
+	int maxJobs = totalCount / minimumJobSize;
+	int jobCount = std::thread::hardware_concurrency() * jobsPerThread;
+	if (jobCount > maxJobs) { jobCount = maxJobs; }
+	if (jobCount < 1) { jobCount = 1; }
+	if (jobCount == 1) {
+		// Too little work for multi-threading
+		task(startIndex, stopIndex);
+	} else {
+		// Use multiple threads
+		std::function<void()> jobs[jobCount];
+		int givenRow = startIndex;
+		for (int s = 0; s < jobCount; s++) {
+			int remainingJobs = jobCount - s;
+			int remainingRows = stopIndex - givenRow;
+			int y1 = givenRow; // Inclusive
+			int taskSize = remainingRows / remainingJobs;
+			givenRow = givenRow + taskSize;
+			int y2 = givenRow; // Exclusive
+			jobs[s] = [task, y1, y2]() {
+				task(y1, y2);
+			};
+		}
+		threadedWorkFromArray(jobs, jobCount);
+	}
+}
+
+void threadedSplit_disabled(int startIndex, int stopIndex, std::function<void(int startIndex, int stopIndex)> task) {
+	task(startIndex, stopIndex);
+}
+
+void threadedSplit(const IRect& bound, std::function<void(const IRect& bound)> task, int minimumRowsPerJob, int jobsPerThread) {
+	int maxJobs = bound.height() / minimumRowsPerJob;
+	int jobCount = std::thread::hardware_concurrency() * jobsPerThread;
+	if (jobCount > maxJobs) { jobCount = maxJobs; }
+	if (jobCount < 1) { jobCount = 1; }
+	if (jobCount == 1) {
+		// Too little work for multi-threading
+		task(bound);
+	} else {
+		// Use multiple threads
+		std::function<void()> jobs[jobCount];
+		int givenRow = bound.top();
+		for (int s = 0; s < jobCount; s++) {
+			int remainingJobs = jobCount - s;
+			int remainingRows = bound.bottom() - givenRow;
+			int y1 = givenRow;
+			int taskSize = remainingRows / remainingJobs;
+			givenRow = givenRow + taskSize;
+			IRect subBound = IRect(bound.left(), y1, bound.width(), taskSize);
+			jobs[s] = [task, subBound]() {
+				task(subBound);
+			};
+		}
+		threadedWorkFromArray(jobs, jobCount);
+	}
+}
+
+void threadedSplit_disabled(const IRect& bound, std::function<void(const IRect& bound)> task) {
+	task(bound);
+}
+
+}
+

+ 67 - 0
Source/DFPSR/base/threading.h

@@ -0,0 +1,67 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_THREADING
+#define DFPSR_THREADING
+
+#include "../../DFPSR/collection/List.h"
+#include "../../DFPSR/math/IRect.h"
+#include <functional>
+
+namespace dsr {
+
+// Executes every function in the array of jobs from jobs[0] to jobs[jobCount - 1].
+void threadedWorkFromArray(std::function<void()>* jobs, int jobCount);
+
+// Executes every function in the list of jobs.
+//   Also clears the list when done.
+void threadedWorkFromList(List<std::function<void()>> jobs);
+
+// Calling the given function with sub-sets of the interval using multiple threads in parallel.
+//   Useful when you have lots of tiny jobs that can be grouped together into larger jobs.
+//     Otherwise the time to start a thread may exceed the cost of the computation.
+//   startIndex is inclusive but stopIndex is exclusive.
+//     X is within the interval iff startIndex <= X < stopIndex.
+//   Warning!
+//     * Only write to non-overlapping memory regions.
+//       This may require aligning the data or using padding depending on how cache works on the target platform.
+//       The longer the distance is, the safer it is against race conditions causing weird results.
+//       You may however read from write-protected shared input in any way you want.
+//         Because data that doesn't change cannot have race conditions.
+//     * Do not use for manipulation of pointers, stack memory from the calling thread or anything where corrupted output may lead to a crash.
+//       Drawing pixel values is okay, because a race condition would only be some noisy pixels that can be spotted and fixed.
+//       Race conditions cannot be tested nor proven away, so assume that they will happen and do your best to avoid them.
+void threadedSplit(int startIndex, int stopIndex, std::function<void(int startIndex, int stopIndex)> task, int minimumJobSize = 128, int jobsPerThread = 2);
+// Use as a place-holder if you want to disable multi-threading but easily turn it on and off for comparing performance
+void threadedSplit_disabled(int startIndex, int stopIndex, std::function<void(int startIndex, int stopIndex)> task);
+// A more convenient version for images looping over a rectangular bound of pixels.
+//   The same left and right sides are given to each sub-bound to make memory alignment easy.
+//   The top and bottoms are subdivided so that memory access is simple for cache prediction.
+void threadedSplit(const IRect& bound, std::function<void(const IRect& bound)> task, int minimumRowsPerJob = 128, int jobsPerThread = 2);
+// Use as a place-holder if you want to disable multi-threading but easily turn it on and off for comparing performance
+void threadedSplit_disabled(const IRect& bound, std::function<void(const IRect& bound)> task);
+
+}
+
+#endif
+

+ 79 - 0
Source/DFPSR/collection/Array.h

@@ -0,0 +1,79 @@
+
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_COLLECTION_ARRAY
+#define DFPSR_COLLECTION_ARRAY
+
+#include <stdint.h>
+
+namespace dsr {
+
+// Inlined Boundchecks.h
+void nonZeroLengthCheck(int64_t length, const char* property);
+void baseZeroBoundCheck(int64_t index, int64_t length, const char* property);
+
+// The simplest possible automatically deallocating array with bound checks.
+//   Indices use signed indices, which can be used directly from high-level algorithms.
+// Because std::vector is a list of members, not a fixed size array of values.
+//   Using a list instead of an array makes the code both dangerous and unreadable
+//   Using unsigned indices will either force dangerous casting from signed, or prevent
+//   the ability to loop backwards without crashing when the x < 0u criteria cannot be met.
+template <typename T>
+class Array {
+private:
+	const int32_t elementCount;
+	T *elements = nullptr;
+public:
+	// Constructor
+	Array(const int32_t newLength, const T& defaultValue)
+	  : elementCount(newLength) {
+  		nonZeroLengthCheck(newLength, "New array length");
+		this->elements = new T[newLength];
+		for (int32_t index = 0; index < newLength; index++) {
+			this->elements[index] = defaultValue;
+		}
+	}
+	// No implicit copies, only pass by reference
+	Array(const Array&) = delete;
+	Array& operator=(const Array&) = delete;
+	// Destructor
+	~Array() { delete[] this->elements; }
+	// Element access
+	T& operator[] (const int32_t index) {
+		baseZeroBoundCheck(index, this->length(), "Array index");
+		return this->elements[index];
+	}
+	const T& operator[] (const int32_t index) const {
+		baseZeroBoundCheck(index, this->length(), "Array index");
+		return this->elements[index];
+	}
+	int32_t length() const {
+		return this->elementCount;
+	}
+};
+
+}
+
+#endif
+

+ 39 - 0
Source/DFPSR/collection/BoundChecks.cpp

@@ -0,0 +1,39 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "../base/text.h"
+
+using namespace dsr;
+
+void dsr::nonZeroLengthCheck(int64_t length, const char* property) {
+	if (length <= 0) {
+		throwError(property, " may not be zero!\n");
+	}
+}
+
+void dsr::baseZeroBoundCheck(int64_t index, int64_t length, const char* property) {
+	if (index < 0 || index >= length) {
+		throwError(property, " ", index, " is out of bound 0..", (length - 1), "!\n");
+	}
+}

+ 106 - 0
Source/DFPSR/collection/Field.h

@@ -0,0 +1,106 @@
+
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_COLLECTION_FIELD
+#define DFPSR_COLLECTION_FIELD
+
+#include <stdint.h>
+#include "../math/IVector.h"
+
+namespace dsr {
+
+// Inlined Boundchecks.h
+void nonZeroLengthCheck(int64_t length, const char* property);
+void baseZeroBoundCheck(int64_t index, int64_t length, const char* property);
+
+// A 2D version of Array with built-in support for accessing elements out of bound.
+template <typename T>
+class Field {
+private:
+	const int32_t elementWidth, elementHeight;
+	T *elements = nullptr;
+public:
+	// Constructor
+	Field(const int32_t width, const int32_t height, const T& defaultValue)
+	  : elementWidth(width), elementHeight(height) {
+		nonZeroLengthCheck(width, "New array width");
+  		nonZeroLengthCheck(height, "New array height");
+		int32_t size = width * height;
+		this->elements = new T[size];
+		for (int32_t index = 0; index < size; index++) {
+			this->elements[index] = defaultValue;
+		}
+	}
+private:
+	// Direct memory access
+	// Precondition: this->inside(location.x, location.y)
+	T& writeAccess(const IVector2D& location) {
+		return this->elements[location.x + location.y * this->elementWidth];
+	}
+	const T& readAccess(const IVector2D& location) const {
+		return this->elements[location.x + location.y * this->elementWidth];
+	}
+public:
+	// No implicit copies, only pass by reference
+	Field(const Field&) = delete;
+	Field& operator=(const Field&) = delete;
+	// Destructor
+	~Field() { delete[] this->elements; }
+	// Bound check
+	bool inside(const IVector2D& location) const {
+		return location.x >= 0 && location.x < this->elementWidth && location.y >= 0 && location.y < this->elementHeight;
+	}
+	// Read access
+	T read_border(const IVector2D& location, const T& outside) const {
+		if (this->inside(location)) {
+			return this->readAccess(location);
+		} else {
+			return outside;
+		}
+	}
+	T read_clamp(IVector2D location) const {
+		if (location.x < 0) location.x = 0;
+		if (location.x >= this->elementWidth) location.x = this->elementWidth - 1;
+		if (location.y < 0) location.y = 0;
+		if (location.y >= this->elementHeight) location.y = this->elementHeight - 1;
+		return this->readAccess(location);
+	}
+	// Write access
+	void write_ignore(const IVector2D& location, const T& value) {
+		if (this->inside(location)) {
+			this->writeAccess(location) = value;
+		}
+	}
+	int32_t width() const {
+		return this->elementWidth;
+	}
+	int32_t height() const {
+		return this->elementHeight;
+	}
+};
+
+}
+
+#endif
+

+ 114 - 0
Source/DFPSR/collection/List.h

@@ -0,0 +1,114 @@
+
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_COLLECTION_LIST
+#define DFPSR_COLLECTION_LIST
+
+#include <stdint.h>
+#include <vector>
+
+namespace dsr {
+
+// Inlined Boundchecks.h
+void nonZeroLengthCheck(int64_t length, const char* property);
+void baseZeroBoundCheck(int64_t index, int64_t length, const char* property);
+
+// An array list for constant time random access to elements in a LIFO stack.
+// Technically, there's nothing wrong with the internals of std::vector, but its interface is horrible.
+//   * Forced use of iterators for cloning and element removal is both overly complex and bloating the code.
+//   * Unsigned indices will either force dangerous casting from signed, or prevent
+//     the ability to loop backwards without crashing when the x < 0u criteria cannot be met.
+template <typename T>
+class List {
+private:
+	std::vector<T> backend;
+public:
+	// Constructor
+	List() {}
+	// Clonable
+	// TODO: Make an optional performance warning
+	List(const List& source) : backend(std::vector<T>(source.backend.begin(), source.backend.end())) {}
+	int64_t length() const {
+		return (int64_t)this->backend.size();
+	}
+	// Element access
+	//   Warning! Do not push more elements to the list while a reference is being used
+	T& operator[] (int64_t index) {
+		baseZeroBoundCheck(index, this->length(), "List index");
+		return this->backend[index];
+	}
+	const T& operator[] (int64_t index) const {
+		baseZeroBoundCheck(index, this->length(), "List index");
+		return this->backend[index];
+	}
+	T& first() {
+		nonZeroLengthCheck(this->length(), "Length");
+		return this->backend[0];
+	}
+	const T& first() const {
+		nonZeroLengthCheck(this->length(), "Length");
+		return this->backend[0];
+	}
+	T& last() {
+		nonZeroLengthCheck(this->length(), "Length");
+		return this->backend[this->length() - 1];
+	}
+	const T& last() const {
+		nonZeroLengthCheck(this->length(), "Length");
+		return this->backend[this->length() - 1];
+	}
+	void clear() {
+		this->backend.clear();
+	}
+	void reserve(int64_t minimumLength) {
+		this->backend.reserve(minimumLength);
+	}
+	// Warning! Reallocation may invalidate pointers and references to elements in the replaced buffer
+	T& push(const T& newValue) {
+		// Optimize for speed by assuming that we have enough memory
+		if (this->length() == 0) {
+			this->backend.reserve(32);
+		} else if (this->length() >= (int64_t)this->backend.capacity()) {
+			this->backend.reserve((int64_t)this->backend.capacity() * 4);
+		}
+		this->backend.push_back(newValue);
+		return this->last();
+	}
+	template<typename... ARGS>
+	T& pushConstruct(ARGS... args) {
+		this->backend.emplace_back(args...);
+		return this->last();
+	}
+	void remove(int64_t removedIndex) {
+		this->backend.erase(this->backend.begin() + removedIndex);
+	}
+	void pop() {
+		this->backend.pop_back();
+	}
+};
+
+}
+
+#endif
+

+ 7 - 0
Source/DFPSR/collection/includeCollection.h

@@ -0,0 +1,7 @@
+
+// Header for including collections
+
+#include "Array.h"
+#include "Field.h"
+#include "List.h"
+

+ 70 - 0
Source/DFPSR/gui/BackendWindow.cpp

@@ -0,0 +1,70 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "BackendWindow.h"
+
+using namespace dsr;
+
+bool BackendWindow::executeEvents() {
+	bool executedEvent = false;
+	this->prefetchEvents();
+	// Execute any resize first
+ 	//   This makes sure that following events gets a canvas size synchronized with the window size
+	if (this->requestingResize) {
+		executedEvent = true;
+		this->callback_resizeEvent(this->requestedWidth, this->requestedHeight);
+		this->requestingResize = false;
+	}
+	// Look for events
+	for (int e = 0; e < this->eventQueue.length(); e++) {
+		InputEvent* event = this->eventQueue[e];
+		if (event) {
+			executedEvent = true;
+			KeyboardEvent* kEvent = dynamic_cast<KeyboardEvent*>(event);
+			MouseEvent* mEvent = dynamic_cast<MouseEvent*>(event);
+			WindowEvent* wEvent = dynamic_cast<WindowEvent*>(event);
+			if (kEvent) {
+				this->callback_keyboardEvent(*kEvent);
+			} else if (mEvent) {
+				this->callback_mouseEvent(*mEvent);
+			} else if (wEvent) {
+				if (wEvent->windowEventType == WindowEventType::Close) {
+					this->callback_closeEvent();
+				} else if (wEvent->windowEventType == WindowEventType::Redraw) {
+					this->showCanvas();
+				}
+			}
+		}
+		delete event;
+	}
+	// Check for resize again in case that one was triggered by a callback
+	if (this->requestingResize) {
+		this->callback_resizeEvent(this->requestedWidth, this->requestedHeight);
+		this->requestingResize = false;
+	}
+	// Clear the event queue to avoid repeating events
+	this->eventQueue.clear();
+	// Tell the caller if we did something
+	return executedEvent;
+}
+

+ 96 - 0
Source/DFPSR/gui/BackendWindow.h

@@ -0,0 +1,96 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_BACKEND_WINDOW
+#define DFPSR_BACKEND_WINDOW
+
+#include <stdint.h>
+#include <memory>
+#include "InputEvent.h"
+#include "../image/ImageRgbaU8.h"
+#include "../api/imageAPI.h"
+#include "../base/text.h"
+#include "../collection/List.h"
+
+namespace dsr {
+
+// The class to use when porting the window manager to another operating system.
+//   A simple interface for the most basic operations that a window can do.
+//     * Show an image over the whole window
+//     * Take input events
+//   Minimalism reduces the cost of porting core functionality to new operating systems.
+//     All other features should be optional.
+class BackendWindow {
+protected:
+	String title;
+	// Events
+	List<InputEvent*> eventQueue;
+	void queueInputEvent(InputEvent* event) {
+		this->eventQueue.push(event);
+	}
+private:
+	int requestingResize = false;
+	int requestedWidth = 0;
+	int requestedHeight = 0;
+protected:
+	// Request to resize the window.
+	//   When the implementation receives a resize, call receiveWindowResize with the new dimensions.
+	//     If requestingResize is already true, it will just overwrite the old request.
+	//   Next call to executeEvents will then use it to resize the canvas.
+	void receivedWindowResize(int width, int height) {
+		this->requestingResize = true;
+		this->requestedWidth = width;
+		this->requestedHeight = height;
+	}
+public:
+	BackendWindow() {}
+	virtual ~BackendWindow() {}
+	virtual void setFullScreen(bool enabled) = 0;
+	virtual bool isFullScreen() = 0;
+	virtual int getWidth() const = 0;
+	virtual int getHeight() const = 0;
+protected:
+	// Back-end interface
+	// Responsible for adding events to eventQueue
+	virtual void prefetchEvents() = 0;
+public:
+	// Canvas interface
+	virtual AlignedImageRgbaU8 getCanvas() = 0;
+	virtual void showCanvas() = 0;
+	virtual void resizeCanvas(int width, int height) = 0;
+	virtual String getTitle() { return this->title; }
+	virtual void setTitle(const String &newTitle) = 0;
+	// Each callback declaration has a public variable and a public getter and setter
+	DECLARE_CALLBACK(closeEvent, emptyCallback);
+	DECLARE_CALLBACK(resizeEvent, sizeCallback);
+	DECLARE_CALLBACK(keyboardEvent, keyboardCallback);
+	DECLARE_CALLBACK(mouseEvent, mouseCallback);
+	// Call executeEvents to run all callbacks collected in eventQueue
+	//   Returns true if any event was processed
+	bool executeEvents();
+};
+
+}
+
+#endif
+

+ 259 - 0
Source/DFPSR/gui/DsrWindow.cpp

@@ -0,0 +1,259 @@
+
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "DsrWindow.h"
+#include "components/Panel.h"
+#include "components/Button.h"
+#include "../math/scalar.h"
+#include "../math/IVector.h"
+#include "../api/imageAPI.h"
+#include "../api/drawAPI.h"
+
+using namespace dsr;
+
+static bool initialized = false;
+static void initializeGui() {
+	if (!initialized) {
+		// Register built-in components by name
+		REGISTER_PERSISTENT_CLASS(Panel)
+		REGISTER_PERSISTENT_CLASS(Button)
+		initialized = true;
+	}
+}
+
+DsrWindow::DsrWindow(std::shared_ptr<BackendWindow> backend)
+ : backend(backend), innerWidth(backend->getWidth()), innerHeight(backend->getHeight()) {
+	// Initialize the GUI system if needed
+	initializeGui();
+	// Listen to mouse and keyboard events from the backend window
+	this->backend->mouseEvent() = [this](const MouseEvent& event) {
+		this->sendMouseEvent(event);
+	};
+	this->backend->keyboardEvent() = [this](const KeyboardEvent& event) {
+		this->sendKeyboardEvent(event);
+	};
+	this->backend->closeEvent() = [this]() {
+		this->sendCloseEvent();
+	};
+	// Receiving notifications about resizing should be done in the main panel
+	this->backend->resizeEvent() = [this](int width, int height) {
+		BackendWindow *backend = this->backend.get();
+		ImageRgbaU8 canvas = backend->getCanvas();
+		this->innerWidth = width;
+		this->innerHeight = height;
+		if (image_getWidth(canvas) != width || image_getHeight(canvas) != height) {
+			// Resize the image that holds everything drawn on the window
+			backend->resizeCanvas(width, height);
+			// Remove the old depth buffer, so that it will resize when being requested again
+			this->removeDepthBuffer();
+		}
+		this->applyLayout();
+	};
+	this->resetInterface();
+}
+
+DsrWindow::~DsrWindow() {}
+
+void DsrWindow::applyLayout() {
+	this->mainPanel->applyLayout(IVector2D(this->getCanvasWidth(), this->getCanvasHeight()));
+}
+
+std::shared_ptr<VisualComponent> DsrWindow::findComponentByName(ReadableString name, bool mustExist) const {
+	if (string_match(this->mainPanel->getName(), name)) {
+		return this->mainPanel;
+	} else {
+		return this->mainPanel->findChildByName(name, mustExist);
+	}
+}
+
+std::shared_ptr<VisualComponent> DsrWindow::findComponentByNameAndIndex(ReadableString name, int index, bool mustExist) const {
+	if (string_match(this->mainPanel->getName(), name) && this->mainPanel->getIndex() == index) {
+		return this->mainPanel;
+	} else {
+		return this->mainPanel->findChildByNameAndIndex(name, index, mustExist);
+	}
+}
+
+std::shared_ptr<VisualComponent> DsrWindow::getRootComponent() const {
+	return this->mainPanel;
+}
+
+void DsrWindow::resetInterface() {
+	// Create an empty main panel
+	this->mainPanel = std::dynamic_pointer_cast<VisualComponent>(createPersistentClass("Panel"));
+	if (this->mainPanel.get() == nullptr) {
+		throwError(U"DsrWindow::resetInterface: The window's Panel could not be created!");
+	}
+	this->mainPanel->setName("mainPanel");
+	this->applyLayout();
+}
+
+void DsrWindow::loadInterfaceFromString(String layout) {
+	// Load a tree structure of visual components from text
+	this->mainPanel = std::dynamic_pointer_cast<VisualComponent>(createPersistentClassFromText(layout));
+	if (this->mainPanel.get() == nullptr) {
+		throwError(U"DsrWindow::loadInterfaceFromString: The window's root component could not be created!");
+	}
+	this->applyLayout();
+}
+
+String DsrWindow::saveInterfaceToString() {
+	return this->mainPanel->toString();
+}
+
+bool DsrWindow::executeEvents() {
+	return this->backend->executeEvents();
+}
+
+void DsrWindow::sendMouseEvent(const MouseEvent& event) {
+	this->lastMousePosition = event.position;
+	// Components will receive scaled mouse coordinates by being drawn to the low-resolution canvas
+	MouseEvent scaledEvent = event / this->pixelScale;
+	// Send the global event
+	this->callback_windowMouseEvent(scaledEvent);
+	// Send to the main panel and its components
+	this->mainPanel->sendMouseEvent(scaledEvent);
+}
+
+void DsrWindow::sendKeyboardEvent(const KeyboardEvent& event) {
+	// Send the global event
+	this->callback_windowKeyboardEvent(event);
+	// Send to the main panel and its components
+	this->mainPanel->sendKeyboardEvent(event);
+}
+
+void DsrWindow::sendCloseEvent() {
+	this->callback_windowCloseEvent();
+}
+
+int DsrWindow::getInnerWidth() {
+	return this->innerWidth;
+}
+
+int DsrWindow::getInnerHeight() {
+	return this->innerHeight;
+}
+
+int DsrWindow::getCanvasWidth() {
+	return std::max(1, this->innerWidth / this->pixelScale);
+}
+
+int DsrWindow::getCanvasHeight() {
+	return std::max(1, this->innerHeight / this->pixelScale);
+}
+
+AlignedImageF32 DsrWindow::getDepthBuffer() {
+	auto fullResolutionCanvas = this->backend->getCanvas();
+	int smallWidth = getCanvasWidth();
+	int smallHeight = getCanvasHeight();
+	if (!image_exists(this->depthBuffer)
+	  || image_getWidth(this->depthBuffer) != smallWidth
+	  || image_getHeight(this->depthBuffer) != smallHeight) {
+		this->depthBuffer = image_create_F32(smallWidth, smallHeight);
+	}
+	return this->depthBuffer;
+}
+
+void DsrWindow::removeDepthBuffer() {
+	this->depthBuffer = AlignedImageF32();
+}
+
+int DsrWindow::getPixelScale() const {
+	return this->pixelScale;
+}
+
+void DsrWindow::setPixelScale(int scale) {
+	if (this->pixelScale != scale) {
+		this->pixelScale = scale;
+		// Update layout
+		this->applyLayout();
+		// The mouse moves relative to the canvas when scale changes
+		this->sendMouseEvent(MouseEvent(MouseEventType::MouseMove, MouseKeyEnum::NoKey, this->lastMousePosition));
+	}
+}
+
+void DsrWindow::setFullScreen(bool enabled) {
+	if (this->backend->isFullScreen() != enabled) {
+		this->backend->setFullScreen(enabled);
+		// TODO: The mouse moves relative to the canvas when the window moves, but the new mouse location was never given.
+		// How can mouse-move events be made consistent in applications when toggling full-screen without resorting to hacks?
+		// Return the moved pixel offset from backend's setFullScreen?
+	}
+}
+
+bool DsrWindow::isFullScreen() {
+	return this->backend->isFullScreen();
+}
+
+void DsrWindow::drawComponents() {
+	auto canvas = this->getCanvas();
+	this->mainPanel->draw(canvas, IVector2D(0, 0));
+}
+
+AlignedImageRgbaU8 DsrWindow::getCanvas() {
+	// TODO: Query if the backend has an optimized upload for a smaller canvas
+	//       Useful if a window backend has GPU accelerated or native upscaling
+	auto fullResolutionCanvas = this->backend->getCanvas();
+	if (this->pixelScale > 1) {
+		// Get low resolution canvas in deterministic RGBA pack order
+		int smallWidth = getCanvasWidth();
+		int smallHeight = getCanvasHeight();
+		if (!image_exists(this->lowResolutionCanvas)
+		 || image_getWidth(this->lowResolutionCanvas) != smallWidth
+ 		 || image_getHeight(this->lowResolutionCanvas) != smallHeight) {
+			this->lowResolutionCanvas = image_create_RgbaU8_native(smallWidth, smallHeight, image_getPackOrderIndex(fullResolutionCanvas));
+		}
+		return this->lowResolutionCanvas;
+	} else {
+		// Get full resolution canvas in arbitrary pack order
+		return fullResolutionCanvas;
+	}
+}
+
+void DsrWindow::showCanvas() {
+	if (this->pixelScale > 1 && image_exists(this->lowResolutionCanvas)) {
+		// Use an exact pixel size, by cutting into the last row and column when not even
+		//   This makes it easy to convert mouse coordinates using multiplication and division with pixelScale
+		auto target = this->backend->getCanvas();
+		auto source = this->getCanvas();
+		filter_blockMagnify(target, source, this->pixelScale, this->pixelScale);
+	}
+	this->backend->showCanvas();
+}
+
+String DsrWindow::getTitle() {
+	return this->backend->getTitle();
+}
+
+void DsrWindow::setTitle(const String &newTitle) {
+}
+
+void DsrWindow::applyTheme(VisualTheme theme) {
+	this->mainPanel->applyTheme(theme);
+}
+
+VisualTheme DsrWindow::getTheme() {
+	return this->mainPanel->getTheme();
+}
+

+ 156 - 0
Source/DFPSR/gui/DsrWindow.h

@@ -0,0 +1,156 @@
+
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_DSRWINDOW
+#define DFPSR_GUI_DSRWINDOW
+
+#include <memory>
+#include "../gui/VisualComponent.h"
+#include "../gui/BackendWindow.h"
+#include "../base/text.h"
+#include "../api/types.h"
+
+// The DSR window is responsible for connecting visual interfaces with the backend window.
+//   An optional depth buffer is allocated on demand when requested, and kept until the window resizes.
+
+namespace dsr {
+
+class DsrWindow {
+private:
+	// Window backend
+	std::shared_ptr<BackendWindow> backend;
+	// The root component
+	std::shared_ptr<VisualComponent> mainPanel;
+	AlignedImageF32 depthBuffer;
+	// The inner window dimensions that are synchronized with the canvas.
+	//   The backend on the contrary may have its size changed before the resize event has been fetched.
+	//   Getting the asynchronous window dimensions directly wouldn't be synchronized with the canvas.
+	int innerWidth, innerHeight;
+	// The last mouse position is used to create new mouse-move events when pixelScale changes.
+	IVector2D lastMousePosition;
+public:
+	// Constructor
+	explicit DsrWindow(std::shared_ptr<BackendWindow> backend);
+	// Destructor
+	virtual ~DsrWindow();
+public:
+	// GUI layout
+		void applyLayout();
+
+		// Component getters
+		std::shared_ptr<VisualComponent> findComponentByName(ReadableString name, bool mustExist = true) const;
+		template <typename T>
+		std::shared_ptr<T> findComponentByName(ReadableString name, bool mustExist = true) const {
+			return std::dynamic_pointer_cast<T>(this->findComponentByName(name, mustExist));
+		}
+		std::shared_ptr<VisualComponent> findComponentByNameAndIndex(ReadableString name, int index, bool mustExist = true) const;
+		template <typename T>
+		std::shared_ptr<T> findComponentByNameAndIndex(ReadableString name, int index, bool mustExist = true) const {
+			return std::dynamic_pointer_cast<T>(this->findComponentByNameAndIndex(name, index, mustExist));
+		}
+
+		// Get the root component that contains all other components in the window
+		std::shared_ptr<VisualComponent> getRootComponent() const;
+		void resetInterface();
+		void loadInterfaceFromString(String layout);
+		String saveInterfaceToString();
+
+public:
+	// Events
+		// Call to listen for all events given to the window
+		//   This will interact with components and call registered events
+		//   Returns true if any event was processed
+		bool executeEvents();
+
+		// Callback for any mouse event given to the window, before components receive the event
+		DECLARE_CALLBACK(windowMouseEvent, mouseCallback);
+		// Send a mouse event directly to the visual components
+		//   Can be called manually for automatic testing
+		void sendMouseEvent(const MouseEvent& event);
+
+		// Callback for any keyboard event given to the window, before components receive the event
+		DECLARE_CALLBACK(windowKeyboardEvent, keyboardCallback);
+		// Send a keyboard event directly to the visual components
+		//   Can be called manually for automatic testing
+		void sendKeyboardEvent(const KeyboardEvent& event);
+
+		// Callback for when the user tries to close the window
+		DECLARE_CALLBACK(windowCloseEvent, emptyCallback);
+		// Send a close event directly
+		//   Can be called manually for automatic testing
+		void sendCloseEvent();
+
+private:
+	// Upscaling information
+		int pixelScale = 1;
+		AlignedImageRgbaU8 lowResolutionCanvas;
+public:
+	// Upscaling interface
+		int getPixelScale() const;
+		void setPixelScale(int scale);
+
+public:
+	// Graphics
+		// Get the color buffer for drawing or 3D rendering
+		//   The resulting color buffer may be outdated after resizing the window and calling executeEvents()
+		AlignedImageRgbaU8 getCanvas();
+		// Get the depth buffer for 3D rendering
+		//   The resulting depth buffer may be outdated after resizing the window and calling executeEvents()
+		AlignedImageF32 getDepthBuffer();
+		// Detach the depth buffer so that it can be freed
+		//   Called automatically when the canvas resizes
+		void removeDepthBuffer();
+		// Draw components directly to the canvas in full resolution
+		void drawComponents();
+		// Show the canvas when an image is ready
+		void showCanvas();
+		// Canvas width in the pre-upscale resolution
+		int getCanvasWidth();
+		// Canvas height in the pre-upscale resolution
+		int getCanvasHeight();
+
+public:
+	// Full-screen
+	void setFullScreen(bool enabled);
+	bool isFullScreen();
+
+public:
+	// Theme
+		void applyTheme(VisualTheme theme);
+		VisualTheme getTheme();
+
+public:
+	// Access to backend window
+		// Full width after upscaling
+		int getInnerWidth();
+		// Full height after upscaling
+		int getInnerHeight();
+		String getTitle();
+		void setTitle(const String &newTitle);
+};
+
+}
+
+#endif
+

+ 68 - 0
Source/DFPSR/gui/FlexRegion.cpp

@@ -0,0 +1,68 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "FlexRegion.h"
+
+using namespace dsr;
+
+PERSISTENT_DEFINITION(FlexValue)
+
+bool FlexValue::assignValue(const ReadableString &text) {
+	int perCentIndex = text.findFirst('%');
+	if (perCentIndex > -1) {
+		// Explicit %
+		ReadableString leftSide = text.before(perCentIndex);
+		ReadableString rightSide = text.after(perCentIndex);
+		this->ratio = leftSide.toInteger();
+		this->offset = rightSide.toInteger();
+	} else {
+		// Implicitly 0%
+		this->ratio = 0;
+		this->offset = text.toInteger();
+	}
+	return true; // TODO: Discriminate bad input
+}
+
+String& FlexValue::toStreamIndented(String& out, const ReadableString& indentation) const {
+	string_append(out, indentation);
+	if (this->ratio != 0) {
+		string_append(out, this->ratio, U"%");
+	}
+	if (this->ratio == 0 || this->offset != 0) {
+		if (this->offset > 0) {
+			string_append(out, U"+");
+		}
+		string_append(out, this->offset);
+	}
+	return out;
+}
+
+IRect FlexRegion::getNewLocation(const IVector2D &parentSize) {
+	return IRect::FromBounds(
+		this->sides[0].getValue(parentSize.x),
+		this->sides[1].getValue(parentSize.y),
+		this->sides[2].getValue(parentSize.x),
+		this->sides[3].getValue(parentSize.y)
+	);
+}
+

+ 107 - 0
Source/DFPSR/gui/FlexRegion.h

@@ -0,0 +1,107 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_FLEXREGION
+#define DFPSR_GUI_FLEXREGION
+
+#include <stdint.h>
+#include "../math/IVector.h"
+#include "../math/IRect.h"
+#include "../base/text.h"
+#include "../persistent/ClassFactory.h"
+
+namespace dsr {
+
+struct FlexValue : public Persistent {
+PERSISTENT_DECLARATION(FlexValue)
+private:
+	int32_t ratio = 0; // 0% to 100%
+	int32_t offset = 0; // +- offset
+public:
+	FlexValue() {}
+	FlexValue(int ratio, int offset) : ratio(std::min(std::max(0, ratio), 100)), offset(offset) {}
+public:
+	bool assignValue(const ReadableString &text) override;
+	String& toStreamIndented(String& out, const ReadableString& indentation) const override;
+public:
+	int32_t getRatio() const { return this->ratio; }
+	int32_t getOffset() const { return this->offset; }
+	int32_t getValue(int32_t parentValue) const { return (parentValue * this->ratio) / 100 + this->offset; }
+};
+inline bool operator==(const FlexValue &left, const FlexValue &right) {
+	return left.getRatio() == right.getRatio() && left.getOffset() == right.getOffset();
+}
+inline bool operator!=(const FlexValue &left, const FlexValue &right) {
+	return !(left == right);
+}
+
+struct FlexRegion {
+public:
+	// Indices: 0 = left, 1 = top, 2 = right, 3 = bottom
+	FlexValue sides[4];
+public:
+	void setLeft(const FlexValue &left) { this->sides[0] = left; }
+	void setTop(const FlexValue &top) { this->sides[1] = top; }
+	void setRight(const FlexValue &right) { this->sides[2] = right; }
+	void setBottom(const FlexValue &bottom) { this->sides[3] = bottom; }
+	void setLeft(const ReadableString &left) { this->sides[0] = FlexValue(left); }
+	void setTop(const ReadableString &top) { this->sides[1] = FlexValue(top); }
+	void setRight(const ReadableString &right) { this->sides[2] = FlexValue(right); }
+	void setBottom(const ReadableString &bottom) { this->sides[3] = FlexValue(bottom); }
+public:
+	// Full region
+	FlexRegion() {
+		this->sides[0] = FlexValue(0, 0);
+		this->sides[1] = FlexValue(0, 0);
+		this->sides[2] = FlexValue(100, 0);
+		this->sides[3] = FlexValue(100, 0);
+	}
+	// Upper left aligned region
+	explicit FlexRegion(const IRect &location) {
+		this->sides[0] = FlexValue(0, location.left());
+		this->sides[1] = FlexValue(0, location.top());
+		this->sides[2] = FlexValue(0, location.right());
+		this->sides[3] = FlexValue(0, location.bottom());
+	}
+	// Flexible region
+	FlexRegion(int leftRatio, int leftOffset, int topRatio, int topOffset, int rightRatio, int rightOffset, int bottomRatio, int bottomOffset) {
+		this->sides[0] = FlexValue(leftRatio, leftOffset);
+		this->sides[1] = FlexValue(topRatio, topOffset);
+		this->sides[2] = FlexValue(rightRatio, rightOffset);
+		this->sides[3] = FlexValue(bottomRatio, bottomOffset);
+	}
+	// Parse individual flex values from text
+	FlexRegion(const ReadableString &left, const ReadableString &top, const ReadableString &right, const ReadableString &bottom) {
+		this->setLeft(left);
+		this->setTop(top);
+		this->setRight(right);
+		this->setBottom(bottom);
+	}
+public:
+	virtual IRect getNewLocation(const IVector2D &parentSize);
+};
+
+}
+
+#endif
+

+ 157 - 0
Source/DFPSR/gui/Font.cpp

@@ -0,0 +1,157 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include <stdint.h>
+#include "Font.h"
+#include "defaultFont.h"
+#include "../api/imageAPI.h"
+#include "../api/drawAPI.h"
+
+using namespace dsr;
+
+std::shared_ptr<RasterFont> defaultFont = RasterFont::createLatinOne(U"UbuntuMono", image_fromAscii(defaultFontAscii));;
+
+std::shared_ptr<RasterFont> dsr::font_getDefault() {
+	return defaultFont;
+}
+
+
+RasterCharacter::RasterCharacter(const ImageU8& image, DsrChar unicodeValue, int32_t offsetY)
+: image(image), unicodeValue(unicodeValue), width(image_getWidth(image)), offsetY(offsetY) {}
+
+RasterFont::RasterFont(const String& name, int32_t size, int32_t spacing, int32_t spaceWidth)
+ : name(name), size(size), spacing(spacing), spaceWidth(spaceWidth), tabWidth(spaceWidth * 4) {
+	for (int i = 0; i < 65536; i++) {
+		this->indices[i] = -1;
+	}
+}
+
+RasterFont::~RasterFont() {}
+
+std::shared_ptr<RasterFont> RasterFont::createLatinOne(const String& name, const ImageU8& atlas) {
+	int32_t size = image_getHeight(atlas) / 16;
+	std::shared_ptr<RasterFont> result = std::make_shared<RasterFont>(name, size, size / 16, size / 2);
+	result->registerLatinOne16x16(atlas);
+	return result;
+}
+
+void RasterFont::registerCharacter(const ImageU8& image, DsrChar unicodeValue, int32_t offsetY) {
+	if (this->indices[unicodeValue] == -1) {
+		// Add the unicode character
+		this->characters.pushConstruct(image, unicodeValue, offsetY);
+		// Add to latin-1 table if inside the range
+		if (unicodeValue < 65536) {
+			this->indices[unicodeValue] = this->characters.length() - 1;
+		}
+	}
+}
+
+static IRect getCharacterBound(const ImageU8& image, const IRect& searchRegion) {
+	// Inclusive intervals for speed
+	int32_t minX = searchRegion.right();
+	int32_t maxX = searchRegion.left();
+	int32_t minY = searchRegion.bottom();
+	int32_t maxY = searchRegion.top();
+	for (int y = searchRegion.top(); y < searchRegion.bottom(); y++) {
+		for (int x = searchRegion.left(); x < searchRegion.right(); x++) {
+			if (image_readPixel_border(image, x, y)) {
+				if (x < minX) minX = x;
+				if (x > maxX) maxX = x;
+				if (y < minY) minY = y;
+				if (y > maxY) maxY = y;
+			}
+		}
+	}
+	// Convert to width and height
+	return IRect(minX, minY, (maxX + 1) - minX, (maxY + 1) - minY);
+}
+
+// Call after construction to register up to 256 characters in a 16x16 grid from the atlas
+void RasterFont::registerLatinOne16x16(const ImageU8& atlas) {
+	int32_t charWidth = image_getWidth(atlas) / 16;
+	int32_t charHeight = image_getWidth(atlas) / 16;
+	for (int y = 0; y < 16; y++) {
+		for (int x = 0; x < 16; x++) {
+			IRect searchRegion = IRect(x * charWidth, y * charHeight, charWidth, charHeight);
+			IRect croppedRegion = getCharacterBound(atlas, searchRegion);
+			if (croppedRegion.hasArea()) {
+				int32_t offsetY = croppedRegion.top() - searchRegion.top();
+				ImageU8 fullImage = image_getSubImage(atlas, croppedRegion);
+				this->registerCharacter(fullImage, y * 16 + x, offsetY);
+			}
+		}
+	}
+}
+
+int32_t RasterFont::getCharacterWidth(DsrChar unicodeValue) const {
+	int32_t index = this->indices[unicodeValue];
+	if (index > -1) {
+		return this->characters[index].width + this->spacing;
+	} else {
+		return spaceWidth;
+	}
+}
+
+// Prints a character and returns the horizontal stride in pixels
+int32_t RasterFont::printCharacter(ImageRgbaU8& target, DsrChar unicodeValue, const IVector2D& location, const ColorRgbaI32& color) const {
+	if (unicodeValue < 65536) {
+		int32_t index = this->indices[unicodeValue];
+		if (index > -1) {
+			const RasterCharacter *source = &(this->characters[index]);
+			draw_silhouette(target, source->image, color, location.x, location.y + source->offsetY);
+		}
+		return this->getCharacterWidth(unicodeValue);
+	} else {
+		// TODO: Look up characters outside of the 16-bit range from a sparse data structure
+		return 0;
+	}
+}
+
+void RasterFont::printLine(ImageRgbaU8& target, const ReadableString& content, const IVector2D& location, const ColorRgbaI32& color) const {
+	IVector2D currentLocation = location;
+	for (int i = 0; i < (int)(content.length()); i++) {
+		DsrChar code = (DsrChar)(content[i]);
+		if (code == 9) { // Tab
+			// TODO: Jump to the next tab-stop in pixels relative to a tab line given from the caller
+			currentLocation.x += this->tabWidth;
+		} else {
+			// TODO: Would right to left printing of Arabic text be too advanced to have in the core framework?
+			currentLocation.x += this->printCharacter(target, code, currentLocation, color);
+		}
+	}
+}
+
+int32_t RasterFont::getLineWidth(const ReadableString& content) const {
+	int32_t result = 0;
+	for (int i = 0; i < content.length(); i++) {
+		DsrChar code = content[i];
+		if (code == 9) { // Tab
+			// TODO: Jump to the next tab-stop in pixels relative to a tab line given from the caller
+			result += this->tabWidth;
+		} else {
+			result += this->getCharacterWidth(code);
+		}
+	}
+	return result;
+}
+

+ 93 - 0
Source/DFPSR/gui/Font.h

@@ -0,0 +1,93 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_FONT
+#define DFPSR_GUI_FONT
+
+//#include "../persistent/includePersistent.h"
+#include "../api/types.h"
+
+namespace dsr {
+
+struct RasterCharacter {
+public:
+	// Image to draw
+	ImageU8 image;
+	// Look-up value
+	DsrChar unicodeValue = 0;
+	// The width of the character
+	int32_t width = 0;
+	// Y offset
+	int32_t offsetY = 0;
+public:
+	// Constructor
+	RasterCharacter() {}
+	RasterCharacter(const ImageU8& image, DsrChar unicodeValue, int32_t offsetY);
+	// Destructor
+	~RasterCharacter() {}
+};
+
+class RasterFont {
+public:
+	// Font identity
+	const String name;
+	const int32_t size = 0; // From the top of one row to another
+	// Settings
+	int32_t spacing = 0; // The extra pixels between each character
+	int32_t spaceWidth = 0; // The size of a whole space character including spacing
+	int32_t tabWidth = 0; // The size of a whole tab including spacing
+	// A list of character images with their unicode keys
+	List<RasterCharacter> characters;
+	// TODO: A way to map all UTF-32 characters
+	// Indices to characters for UTF16
+	//  indices[x] = -1 for non-existing character codes
+	//  The indices[0..255] contains the Latin-1 subset
+	int32_t indices[65536];
+public:
+	// Constructor
+	RasterFont(const String& name, int32_t size, int32_t spacing, int32_t spaceWidth);
+	static std::shared_ptr<RasterFont> createLatinOne(const String& name, const ImageU8& atlas);
+	// Destructor
+	~RasterFont();
+public:
+	// Allready registered unicode characters will be ignored if reused, so load overlapping sets in order of priority
+	void registerCharacter(const ImageU8& image, DsrChar unicodeValue, int32_t offsetY);
+	// Call after construction to register up to 256 characters in a 16x16 grid from the atlas
+	void registerLatinOne16x16(const ImageU8& atlas);
+	// Gets the width of a character including spacing
+	int32_t getCharacterWidth(DsrChar unicodeValue) const;
+	// Gets the width of a whole line
+	// Precondition: No linebreaks in content, just a single line
+	int32_t getLineWidth(const ReadableString& content) const;
+	// Prints a character and returns the horizontal stride in pixels
+	int32_t printCharacter(ImageRgbaU8& target, DsrChar unicodeValue, const IVector2D& location, const ColorRgbaI32& color) const;
+	void printLine(ImageRgbaU8& target, const ReadableString& content, const IVector2D& location, const ColorRgbaI32& color) const;
+};
+
+// Font API
+std::shared_ptr<RasterFont> font_getDefault();
+
+}
+
+#endif
+

+ 169 - 0
Source/DFPSR/gui/InputEvent.cpp

@@ -0,0 +1,169 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "InputEvent.h"
+
+using namespace dsr;
+
+inline String dsr::getName(DsrKey v) {
+	if (v == DsrKey_Unhandled) {
+		return U"Other";
+	} else if (v == DsrKey_Escape) {
+		return U"Escape";
+	} else if (v == DsrKey_F1) {
+		return U"F1";
+	} else if (v == DsrKey_F2) {
+		return U"F2";
+	} else if (v == DsrKey_F3) {
+		return U"F3";
+	} else if (v == DsrKey_F4) {
+		return U"F4";
+	} else if (v == DsrKey_F5) {
+		return U"F5";
+	} else if (v == DsrKey_F6) {
+		return U"F6";
+	} else if (v == DsrKey_F7) {
+		return U"F7";
+	} else if (v == DsrKey_F8) {
+		return U"F8";
+	} else if (v == DsrKey_F9) {
+		return U"F9";
+	} else if (v == DsrKey_F10) {
+		return U"F10";
+	} else if (v == DsrKey_F11) {
+		return U"F11";
+	} else if (v == DsrKey_F12) {
+		return U"F12";
+	} else if (v == DsrKey_Pause) {
+		return U"Pause";
+	} else if (v == DsrKey_Space) {
+		return U"Space";
+	} else if (v == DsrKey_Tab) {
+		return U"Tab";
+	} else if (v == DsrKey_Return) {
+		return U"Return";
+	} else if (v == DsrKey_BackSpace) {
+		return U"BackSpace";
+	} else if (v == DsrKey_LeftShift) {
+		return U"LeftShift";
+	} else if (v == DsrKey_RightShift) {
+		return U"RightShift";
+	} else if (v == DsrKey_LeftControl) {
+		return U"LeftControl";
+	} else if (v == DsrKey_RightControl) {
+		return U"RightControl";
+	} else if (v == DsrKey_LeftAlt) {
+		return U"LeftAlt";
+	} else if (v == DsrKey_RightAlt) {
+		return U"RightAlt";
+	} else if (v == DsrKey_Delete) {
+		return U"Delete";
+	} else if (v == DsrKey_LeftArrow) {
+		return U"LeftArrow";
+	} else if (v == DsrKey_RightArrow) {
+		return U"RightArrow";
+	} else if (v == DsrKey_UpArrow) {
+		return U"UpArrow";
+	} else if (v == DsrKey_DownArrow) {
+		return U"DownArrow";
+	} else if (v == DsrKey_0) {
+		return U"0";
+	} else if (v == DsrKey_1) {
+		return U"1";
+	} else if (v == DsrKey_2) {
+		return U"2";
+	} else if (v == DsrKey_3) {
+		return U"3";
+	} else if (v == DsrKey_4) {
+		return U"4";
+	} else if (v == DsrKey_5) {
+		return U"5";
+	} else if (v == DsrKey_6) {
+		return U"6";
+	} else if (v == DsrKey_7) {
+		return U"7";
+	} else if (v == DsrKey_8) {
+		return U"8";
+	} else if (v == DsrKey_9) {
+		return U"9";
+	} else if (v == DsrKey_A) {
+		return U"A";
+	} else if (v == DsrKey_B) {
+		return U"B";
+	} else if (v == DsrKey_C) {
+		return U"C";
+	} else if (v == DsrKey_D) {
+		return U"D";
+	} else if (v == DsrKey_E) {
+		return U"E";
+	} else if (v == DsrKey_F) {
+		return U"F";
+	} else if (v == DsrKey_G) {
+		return U"G";
+	} else if (v == DsrKey_H) {
+		return U"H";
+	} else if (v == DsrKey_I) {
+		return U"I";
+	} else if (v == DsrKey_J) {
+		return U"J";
+	} else if (v == DsrKey_K) {
+		return U"K";
+	} else if (v == DsrKey_L) {
+		return U"L";
+	} else if (v == DsrKey_M) {
+		return U"M";
+	} else if (v == DsrKey_N) {
+		return U"N";
+	} else if (v == DsrKey_O) {
+		return U"O";
+	} else if (v == DsrKey_P) {
+		return U"P";
+	} else if (v == DsrKey_Q) {
+		return U"Q";
+	} else if (v == DsrKey_R) {
+		return U"R";
+	} else if (v == DsrKey_S) {
+		return U"S";
+	} else if (v == DsrKey_T) {
+		return U"T";
+	} else if (v == DsrKey_U) {
+		return U"U";
+	} else if (v == DsrKey_V) {
+		return U"V";
+	} else if (v == DsrKey_W) {
+		return U"W";
+	} else if (v == DsrKey_X) {
+		return U"X";
+	} else if (v == DsrKey_Y) {
+		return U"Y";
+	} else if (v == DsrKey_Z) {
+		return U"Z";
+	} else {
+		return U"Invalid virtual key code";
+	}
+}
+
+String& dsr::string_toStreamIndented(String& target, const DsrKey& source, const ReadableString& indentation) {
+	string_append(target, indentation, getName(source));
+	return target;
+}

+ 132 - 0
Source/DFPSR/gui/InputEvent.h

@@ -0,0 +1,132 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_INPUT_EVENT
+#define DFPSR_GUI_INPUT_EVENT
+
+#include "../math/IVector.h"
+#include <functional>
+
+namespace dsr {
+
+class InputEvent {
+public:
+	InputEvent() {}
+	virtual ~InputEvent() {}
+};
+
+enum class KeyboardEventType { KeyDown, KeyUp, KeyType };
+
+// The DsrKey enumeration is convertible to integers allow certain well defined math operations
+// Safe assumptions:
+//   * DsrKey_0 to DsrKey_9 are guaranteed to be in an increasing serial order (so that "key - DsrKey_0" is the key's number)
+//   * DsrKey_F1 to DsrKey_F12 are guaranteed to be in an increasing serial order (so that "key - (DsrKey_F1 - 1)" is the key's number)
+//   * DsrKey_A to DsrKey_Z are guaranteed to be in an increasing serial order
+enum DsrKey {
+	DsrKey_LeftArrow, DsrKey_RightArrow, DsrKey_UpArrow, DsrKey_DownArrow,
+	DsrKey_LeftControl, DsrKey_RightControl, DsrKey_LeftShift, DsrKey_RightShift, DsrKey_LeftAlt, DsrKey_RightAlt,
+	DsrKey_Escape, DsrKey_Pause, DsrKey_Space, DsrKey_Tab, DsrKey_Return, DsrKey_BackSpace, DsrKey_Delete,
+	DsrKey_0, DsrKey_1, DsrKey_2, DsrKey_3, DsrKey_4, DsrKey_5, DsrKey_6, DsrKey_7, DsrKey_8, DsrKey_9,
+	DsrKey_F1, DsrKey_F2, DsrKey_F3, DsrKey_F4, DsrKey_F5, DsrKey_F6, DsrKey_F7, DsrKey_F8, DsrKey_F9, DsrKey_F10, DsrKey_F11, DsrKey_F12,
+	DsrKey_A, DsrKey_B, DsrKey_C, DsrKey_D, DsrKey_E, DsrKey_F, DsrKey_G, DsrKey_H, DsrKey_I, DsrKey_J, DsrKey_K, DsrKey_L, DsrKey_M,
+	DsrKey_N, DsrKey_O, DsrKey_P, DsrKey_Q, DsrKey_R, DsrKey_S, DsrKey_T, DsrKey_U, DsrKey_V, DsrKey_W, DsrKey_X, DsrKey_Y, DsrKey_Z,
+	// TODO: Add any missing essential keys.
+	DsrKey_Unhandled
+};
+
+inline String getName(DsrKey v);
+String& string_toStreamIndented(String& target, const DsrKey& source, const ReadableString& indentation);
+
+class KeyboardEvent : public InputEvent {
+public:
+	// What the user did to the key
+	KeyboardEventType keyboardEventType;
+	// Actual characters for non-latin characters
+	char character;
+	// Minimal set of keys for portability
+	DsrKey dsrKey;
+	KeyboardEvent(KeyboardEventType keyboardEventType, char character, DsrKey dsrKey)
+	 : keyboardEventType(keyboardEventType), character(character), dsrKey(dsrKey) {}
+};
+
+enum class MouseKeyEnum { NoKey, Left, Right, Middle, ScrollUp, ScrollDown };
+enum class MouseEventType { MouseDown, MouseUp, MouseMove, Scroll };
+class MouseEvent : public InputEvent {
+public:
+	MouseEventType mouseEventType;
+	MouseKeyEnum key;
+	IVector2D position; // Pixel coordinates relative to upper left corner of parent container
+	MouseEvent(MouseEventType mouseEventType, MouseKeyEnum key, IVector2D position)
+	: mouseEventType(mouseEventType), key(key), position(position) {}
+};
+inline MouseEvent operator+(const MouseEvent &old, const IVector2D &offset) {
+	MouseEvent result = old;
+	result.position = result.position + offset;
+	return result;
+}
+inline MouseEvent operator-(const MouseEvent &old, const IVector2D &offset) {
+	MouseEvent result = old;
+	result.position = result.position - offset;
+	return result;
+}
+inline MouseEvent operator*(const MouseEvent &old, int scale) {
+	MouseEvent result = old;
+	result.position = result.position * scale;
+	return result;
+}
+inline MouseEvent operator/(const MouseEvent &old, int scale) {
+	MouseEvent result = old;
+	result.position = result.position / scale;
+	return result;
+}
+
+enum class WindowEventType { Close, Redraw };
+class WindowEvent : public InputEvent {
+public:
+	WindowEventType windowEventType;
+	int width, height;
+	WindowEvent(WindowEventType windowEventType, int width, int height)
+	: windowEventType(windowEventType), width(width), height(height) {}
+};
+
+// A macro for declaring a virtual callback from the base method
+//   Use the getter for registering methods so that they can be forwarded to a wrapper without inheritance
+//   Use the actual variable beginning with `callback_` when calling the method from inside
+#define DECLARE_CALLBACK(NAME, LAMBDA) \
+	decltype(LAMBDA) callback_##NAME = LAMBDA; \
+	decltype(LAMBDA)& NAME() { return callback_##NAME; }
+
+// The callback templates and types
+static std::function<void()> emptyCallback = []() {};
+using EmptyCallback = decltype(emptyCallback) ;
+static std::function<void(int, int)> sizeCallback = [](int width, int height) {};
+using SizeCallback = decltype(sizeCallback);
+static std::function<void(const KeyboardEvent&)> keyboardCallback = [](const KeyboardEvent& event) {};
+using KeyboardCallback = decltype(keyboardCallback);
+static std::function<void(const MouseEvent&)> mouseCallback = [](const MouseEvent& event) {};
+using MouseCallback = decltype(mouseCallback);
+
+}
+
+#endif
+

+ 352 - 0
Source/DFPSR/gui/VisualComponent.cpp

@@ -0,0 +1,352 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include <stdint.h>
+#include "VisualComponent.h"
+#include "../image/internal/imageInternal.h"
+
+using namespace dsr;
+
+PERSISTENT_DEFINITION(VisualComponent)
+
+VisualComponent::VisualComponent() {}
+
+VisualComponent::~VisualComponent() {
+	// Let the children know that the parent component no longer exists.
+	for (int i = 0; i < this->getChildCount(); i++) {
+		this->children[i]->parent = nullptr;
+	}
+}
+
+bool VisualComponent::isContainer() const {
+	return true;
+}
+
+IRect VisualComponent::getLocation() const {
+	return this->location;
+}
+
+IVector2D VisualComponent::getSize() const {
+	return this->location.size();
+}
+
+void VisualComponent::setRegion(FlexRegion newRegion) {
+	this->region = newRegion;
+}
+
+FlexRegion VisualComponent::getRegion() const {
+	return this->region;
+}
+
+void VisualComponent::setHidden(bool hidden) {
+	this->hidden.value = hidden;
+}
+
+bool VisualComponent::getHidden() const {
+	return this->hidden.value;
+}
+
+void VisualComponent::setName(const String& newName) {
+	this->name.value = newName;
+}
+
+String VisualComponent::getName() const {
+	return this->name.value;
+}
+
+void VisualComponent::setIndex(int newIndex) {
+	this->index.value = newIndex;
+}
+
+int VisualComponent::getIndex() const {
+	return this->index.value;
+}
+
+void VisualComponent::setLocation(IRect newLocation) {
+	IRect oldLocation = this->location;
+	this->location = newLocation;
+	if (oldLocation != newLocation) {
+		this->updateLocationEvent(oldLocation, newLocation);
+	}
+}
+
+void VisualComponent::applyLayout(IVector2D parentSize) {
+	this->setLocation(this->region.getNewLocation(parentSize));
+}
+
+void VisualComponent::updateLocationEvent(const IRect& oldLocation, const IRect& newLocation) {
+	// Place each child component
+	for (int i = 0; i < this->getChildCount(); i++) {
+		this->children[i]->applyLayout(newLocation.size());
+	}
+}
+
+// Offset may become non-zero when the origin is outside of targetImage from being clipped outside of the parent region
+void VisualComponent::draw(ImageRgbaU8& targetImage, const IVector2D& offset) {
+	if (!this->getHidden()) {
+		IRect containerBound = this->getLocation() + offset;
+		this->drawSelf(targetImage, containerBound);
+		// Draw each child component
+		for (int i = 0; i < this->getChildCount(); i++) {
+			this->children[i]->drawClipped(targetImage, containerBound.upperLeft(), containerBound);
+		}
+	}
+}
+
+void VisualComponent::drawClipped(ImageRgbaU8& targetImage, const IVector2D& offset, const IRect& clipRegion) {
+	IRect finalRegion = IRect::cut(clipRegion, IRect(0, 0, image_getWidth(targetImage), image_getHeight(targetImage)));
+	if (finalRegion.hasArea()) {
+		ImageRgbaU8 target = image_getSubImage(targetImage, finalRegion);
+		this->draw(target, offset - finalRegion.upperLeft());
+	}
+}
+
+// A red rectangle is drawn as a placeholder if the class couldn't be found
+// TODO: Should the type name be remembered in the base class for serializing missing components?
+void VisualComponent::drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation) {
+	draw_rectangle(targetImage, relativeLocation, ColorRgbaI32(200, 50, 50, 255));
+}
+
+// Manual use with the correct type
+void VisualComponent::addChildComponent(std::shared_ptr<VisualComponent> child) {
+	if (!this->isContainer()) {
+		throwError(U"Cannot attach a child to a non-container parent component!\n");
+	} else if (child.get() == this) {
+		throwError(U"Cannot attach a component to itself!\n");
+	} else if (child->hasChild(this)) {
+		throwError(U"Cannot attach to its own parent as a child component!\n");
+	} else {
+		// Remove from any previous parent
+		child->detachFromParent();
+		// Update layout based on the new parent size
+		child->applyLayout(this->getSize());
+		// Connect to the new parent
+		this->children.push(child);
+		child->parent = this;
+	}
+}
+
+// Automatic insertion from loading
+bool VisualComponent::addChild(std::shared_ptr<Persistent> child) {
+	// Try to cast from base class Persistent to derived class VisualComponent
+	std::shared_ptr<VisualComponent> visualComponent = std::dynamic_pointer_cast<VisualComponent>(child);
+	if (visualComponent.get() == nullptr) {
+		return false; // Wrong type!
+	} else {
+		this->addChildComponent(visualComponent);
+		return true; // Success!
+	}
+}
+
+int VisualComponent::getChildCount() const {
+	return this->children.length();
+}
+
+std::shared_ptr<Persistent> VisualComponent::getChild(int index) const {
+	return this->children[index];
+}
+
+void VisualComponent::detachFromParent() {
+	// Check if there's a parent component
+	VisualComponent *parent = this->parent;
+	if (parent != nullptr) {
+		// If the removed component is focused from the parent, then remove focus
+		if (parent->focusComponent.get() == this) {
+			parent->focusComponent = std::shared_ptr<VisualComponent>();
+		}
+		// Iterate over all children in the parent component
+		for (int i = 0; i < parent->getChildCount(); i++) {
+			std::shared_ptr<VisualComponent> current = parent->children[i];
+			if (current.get() == this) {
+				current->parent = nullptr; // Assign null
+				parent->children.remove(i);
+				return;
+			}
+		}
+	}
+}
+
+bool VisualComponent::hasChild(VisualComponent *child) const {
+	for (int i = 0; i < this->getChildCount(); i++) {
+		std::shared_ptr<VisualComponent> current = this->children[i];
+		if (current.get() == child) {
+			return true; // Found the component
+		} else {
+			if (current->hasChild(child)) {
+				return true; // Found the component recursively
+			}
+		}
+	}
+	return false; // Could not find the component
+}
+
+bool VisualComponent::hasChild(std::shared_ptr<VisualComponent> child) const {
+	return this->hasChild(child.get());
+}
+
+std::shared_ptr<VisualComponent> VisualComponent::findChildByName(ReadableString name, bool mustExist) const {
+	for (int i = 0; i < this->getChildCount(); i++) {
+		std::shared_ptr<VisualComponent> current = this->children[i];
+		if (string_match(current->getName(), name)) {
+			return current; // Found the component
+		} else {
+			std::shared_ptr<VisualComponent> searchResult = current->findChildByName(name, mustExist);
+			if (searchResult.get() != nullptr) {
+				return searchResult; // Found the component recursively
+			}
+		}
+	}
+	return std::shared_ptr<VisualComponent>(); // Could not find the component
+}
+
+std::shared_ptr<VisualComponent> VisualComponent::findChildByNameAndIndex(ReadableString name, int index, bool mustExist) const {
+	for (int i = 0; i < this->getChildCount(); i++) {
+		std::shared_ptr<VisualComponent> current = this->children[i];
+		if (string_match(current->getName(), name) && current->getIndex() == index) {
+			return current; // Found the component
+		} else {
+			std::shared_ptr<VisualComponent> searchResult = current->findChildByNameAndIndex(name, index, mustExist);
+			if (searchResult.get() != nullptr) {
+				return searchResult; // Found the component recursively
+			}
+		}
+	}
+	return std::shared_ptr<VisualComponent>(); // Could not find the component
+}
+
+bool VisualComponent::pointIsInside(const IVector2D& pixelPosition) {
+	return pixelPosition.x > this->location.left() && pixelPosition.x < this->location.right()
+	    && pixelPosition.y > this->location.top() && pixelPosition.y < this->location.bottom();
+}
+
+// Non-recursive top-down search
+std::shared_ptr<VisualComponent> VisualComponent::getDirectChild(const IVector2D& pixelPosition, bool includeInvisible) {
+	// Iterate child components in reverse drawing order
+	for (int i = this->getChildCount() - 1; i >= 0; i--) {
+		std::shared_ptr<VisualComponent> currentChild = this->children[i];
+		// Check if the point is inside the child component
+		if ((!currentChild->getHidden() || includeInvisible) && currentChild->pointIsInside(pixelPosition)) {
+			return currentChild;
+		}
+	}
+	// Return nothing if the point missed all child components
+	return std::shared_ptr<VisualComponent>();
+}
+
+// Recursive top-down search
+std::shared_ptr<VisualComponent> VisualComponent::getTopChild(const IVector2D& pixelPosition, bool includeInvisible) {
+	// Iterate child components in reverse drawing order
+	for (int i = this->getChildCount() - 1; i >= 0; i--) {
+		std::shared_ptr<VisualComponent> currentChild = this->children[i];
+		// Check if the point is inside the child component
+		if ((!currentChild->getHidden() || includeInvisible) && currentChild->pointIsInside(pixelPosition)) {
+			// Check if a component inside the child component is even higher up
+			std::shared_ptr<VisualComponent> subChild = currentChild->getTopChild(pixelPosition - this->getLocation().upperLeft(), includeInvisible);
+			if (subChild.get() != nullptr) {
+				return subChild;
+			} else {
+				return currentChild;
+			}
+		}
+	}
+	// Return nothing if the point missed all child components
+	return std::shared_ptr<VisualComponent>();
+}
+
+void VisualComponent::sendMouseEvent(const MouseEvent& event) {
+	// Convert to local coordinates recursively
+	MouseEvent localEvent = event - this->getLocation().upperLeft();
+	std::shared_ptr<VisualComponent> childComponent;
+	// Grab a component on mouse down
+	if (event.mouseEventType == MouseEventType::MouseDown) {
+		childComponent = this->dragComponent = this->focusComponent = this->getDirectChild(localEvent.position, false);
+		this->holdCount++;
+	}
+	if (this->holdCount > 0) {
+		// If we're grabbing a component, keep sending events to it
+		childComponent = this->dragComponent;
+	} else if (!this->getHidden() && this->pointIsInside(event.position)) {
+		// If we're not grabbing a component, see if we can send the action to another component
+		childComponent = this->getDirectChild(localEvent.position, false);
+	}
+	// Send the signal to a child component or itself
+	if (childComponent.get() != nullptr) {
+		childComponent->sendMouseEvent(localEvent);
+	} else {
+		this->receiveMouseEvent(event);
+	}
+	// Release a component on mouse up
+	if (event.mouseEventType == MouseEventType::MouseUp) {
+		this->dragComponent = std::shared_ptr<VisualComponent>(); // Abort drag
+		this->holdCount--;
+		if (this->holdCount < 0) {
+			this->holdCount = 0;
+		}
+	}
+}
+
+void VisualComponent::receiveMouseEvent(const MouseEvent& event) {
+	if (event.mouseEventType == MouseEventType::MouseDown) {
+		this->callback_mouseDownEvent(event);
+	} else if (event.mouseEventType == MouseEventType::MouseUp) {
+		this->callback_mouseUpEvent(event);
+	} else if (event.mouseEventType == MouseEventType::MouseMove) {
+		this->callback_mouseMoveEvent(event);
+	} else if (event.mouseEventType == MouseEventType::Scroll) {
+		this->callback_mouseScrollEvent(event);
+	}
+}
+
+void VisualComponent::sendKeyboardEvent(const KeyboardEvent& event) {
+	// Send the signal to a focused component or itself
+	if (this->focusComponent.get() != nullptr) {
+		this->focusComponent->sendKeyboardEvent(event);
+	} else {
+		this->receiveKeyboardEvent(event);
+	}
+}
+
+void VisualComponent::receiveKeyboardEvent(const KeyboardEvent& event) {
+	if (event.keyboardEventType == KeyboardEventType::KeyDown) {
+		this->callback_keyDownEvent(event);
+	} else if (event.keyboardEventType == KeyboardEventType::KeyUp) {
+		this->callback_keyUpEvent(event);
+	} else if (event.keyboardEventType == KeyboardEventType::KeyType) {
+		this->callback_keyTypeEvent(event);
+	}
+}
+
+void VisualComponent::applyTheme(VisualTheme theme) {
+	this->theme = theme;
+	this->changedTheme(theme);
+	for (int i = 0; i < this->getChildCount(); i++) {
+		this->children[i] -> applyTheme(theme);
+	}
+}
+
+VisualTheme VisualComponent::getTheme() const {
+	return this->theme;
+}
+
+void VisualComponent::changedTheme(VisualTheme newTheme) {}
+

+ 209 - 0
Source/DFPSR/gui/VisualComponent.h

@@ -0,0 +1,209 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_VISUALCOMPONENT
+#define DFPSR_GUI_VISUALCOMPONENT
+
+#include "../persistent/includePersistent.h"
+#include "BackendWindow.h" // TODO: Separate event types from the window module
+#include "FlexRegion.h"
+#include "InputEvent.h"
+#include "VisualTheme.h"
+#include "../api/imageAPI.h"
+#include "../api/drawAPI.h"
+
+namespace dsr {
+
+class VisualComponent : public Persistent {
+PERSISTENT_DECLARATION(VisualComponent)
+protected:
+	// Parent component
+	VisualComponent *parent = nullptr;
+	// Child components
+	List<std::shared_ptr<VisualComponent>> children;
+	// Remember the component used for a drag event
+	//   Ensures that mouse down events are followed by mouse up events on the same component
+	int holdCount = 0;
+	std::shared_ptr<VisualComponent> dragComponent;
+	// Remember the focused component for keyboard input
+	std::shared_ptr<VisualComponent> focusComponent;
+	// Saved properties
+	FlexRegion region;
+	PersistentString name;
+	PersistentInteger index;
+	PersistentBoolean hidden = PersistentBoolean(false);
+	void declareAttributes(StructureDefinition &target) const override {
+		target.declareAttribute(U"Name");
+		target.declareAttribute(U"Index");
+		target.declareAttribute(U"Hidden");
+		target.declareAttribute(U"Left");
+		target.declareAttribute(U"Top");
+		target.declareAttribute(U"Right");
+		target.declareAttribute(U"Bottom");
+	}
+public:
+	Persistent* findAttribute(const ReadableString &name) override {
+		if (string_caseInsensitiveMatch(name, U"Name")) {
+			return &(this->name);
+		} else if (string_caseInsensitiveMatch(name, U"Index")) {
+			return &(this->index);
+		} else if (string_caseInsensitiveMatch(name, U"Hidden")) {
+			return &(this->hidden);
+		} else if (string_caseInsensitiveMatch(name, U"Left")) {
+			return &(this->region.sides[0]);
+		} else if (string_caseInsensitiveMatch(name, U"Top")) {
+			return &(this->region.sides[1]);
+		} else if (string_caseInsensitiveMatch(name, U"Right")) {
+			return &(this->region.sides[2]);
+		} else if (string_caseInsensitiveMatch(name, U"Bottom")) {
+			return &(this->region.sides[3]);
+		} else {
+			return nullptr;
+		}
+	}
+protected:
+	// Generated automatically from region in applyLayout
+	IRect location;
+	void setLocation(IRect newLocation);
+	// Applied reqursively while selecting the correct theme
+	VisualTheme theme;
+public:
+	void applyTheme(VisualTheme theme);
+	VisualTheme getTheme() const;
+public:
+	// Constructor
+	VisualComponent();
+	// Destructor
+	virtual ~VisualComponent();
+public:
+	virtual bool isContainer() const;
+	IRect getLocation() const;
+	IVector2D getSize() const;
+	void setRegion(FlexRegion newRegion);
+	FlexRegion getRegion() const;
+	void setHidden(bool hidden);
+	bool getHidden() const;
+	void setName(const String& newName);
+	String getName() const;
+	void setIndex(int index);
+	int getIndex() const;
+public:
+	// Callbacks
+	DECLARE_CALLBACK(pressedEvent, emptyCallback);
+	DECLARE_CALLBACK(mouseDownEvent, mouseCallback);
+	DECLARE_CALLBACK(mouseUpEvent, mouseCallback);
+	DECLARE_CALLBACK(mouseMoveEvent, mouseCallback);
+	DECLARE_CALLBACK(mouseScrollEvent, mouseCallback);
+	DECLARE_CALLBACK(keyDownEvent, keyboardCallback);
+	DECLARE_CALLBACK(keyUpEvent, keyboardCallback);
+	DECLARE_CALLBACK(keyTypeEvent, keyboardCallback);
+private:
+	std::shared_ptr<VisualComponent> getDirectChild(const IVector2D& pixelPosition, bool includeInvisible);
+public:
+	// Draw the component
+	//   The component is responsible for drawing the component at this->location + offset.
+	//   The caller is responsible for drawing the background for any pixels in the component that might not be fully opaque.
+	//   If drawing out of bound, the pixels that are outside should be skipped without any warning nor crash.
+	//   To clip the drawing of a component when calling this, give a sub-image and adjust for the new coordinate system using offset.
+	//   If not implemented, a rectangle will mark the region where the component will be drawn as a reference.
+	// targetImage is the image being drawn to.
+	// offset is the upper left corner of the parent container relative to the image.
+	//   Clipping will affect the offset by being relative to the new sub-image.
+	void draw(ImageRgbaU8& targetImage, const IVector2D& offset);
+	// A basic request to have the component itself drawn to targetImage at relativeLocation.
+	//   The method is responsible for clipping without a warning when bound is outside of targetImage.
+	//   Clipping will be common if the component is drawn using multiple dirty rectangles to save time.
+	virtual void drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation);
+	// Draw the component while skipping pixels outside of clipRegion
+	//   Multiple calls with non-overlapping clip regions should be equivalent to one call with the union of all clip regions.
+	//     This means that the draw methods should handle border clipping so that no extra borderlines or rounded edges appear from nowhere.
+	//     Example:
+	//       drawClipped(i, o, IRect(0, 0, 20, 20)) // Full region
+	//           <=>
+	//       drawClipped(i, o, IRect(0, 0, 10, 20)) // Left half
+	//       drawClipped(i, o, IRect(10, 0, 10, 20)) // Right half
+	//   Drawing with the whole target image as a clip region should be equivalent to a corresponding call to draw with the same targetImage and offset.
+	//     draw(i, o) <=> drawClipped(i, o, IRect(0, 0, i.width(), i.height()))
+	void drawClipped(ImageRgbaU8& targetImage, const IVector2D& offset, const IRect& clipRegion);
+
+// TODO: Distinguish from the generic version
+	// Add a child component
+	//   Preconditions:
+	//     The parent's component type is a container.
+	//     The child does not already have a parent.
+	void addChildComponent(std::shared_ptr<VisualComponent> child);
+	// Called with any persistent type when constructing child components from text
+	bool addChild(std::shared_ptr<Persistent> child) override;
+	// Called when saving to text
+	int getChildCount() const override;
+	std::shared_ptr<Persistent> getChild(int index) const override;
+
+// TODO: Reuse in Persistent
+	// Returns true iff child is a member of the component
+	//   Searches recursively
+	bool hasChild(VisualComponent *child) const;
+	bool hasChild(std::shared_ptr<VisualComponent> child) const;
+
+	// Find the first child component with the requested name using a case sensitive match.
+	//   If mustExist is true, failure will raise an exception directly.
+	//   Returns: A shared pointer to the child or null if not found.
+	std::shared_ptr<VisualComponent> findChildByName(ReadableString name, bool mustExist) const;
+	std::shared_ptr<VisualComponent> findChildByNameAndIndex(ReadableString name, int index, bool mustExist) const;
+	// Detach the component from any parent
+	void detachFromParent();
+
+	// Adapt the location based on the region
+	//   parentWidth must be the current width of the parent container
+	//   parentHeight must be the current height of the parent container
+	// Override to apply a custom behaviour, which may be useful for fixed size components.
+	virtual void applyLayout(IVector2D parentSize);
+	// Called after the component has been created, moved or resized.
+	virtual void updateLocationEvent(const IRect& oldLocation, const IRect& newLocation);
+	// Returns true iff the pixel with its upper left corner at pixelPosition is inside the component.
+	// A rectangular bound check with location is used by default.
+	// The caller is responsible for checking if the component is visible when needed.
+	virtual bool pointIsInside(const IVector2D& pixelPosition);
+	// Get a reference to the topmost child
+	// Invisible components are ignored by default, but includeInvisible can be enabled to change that.
+	// Returns an empty reference if the pixel position didn't hit anything inside.
+	// Since the root component might not be heap allocated, it cannot return itself by reference.
+	//   Use pointIsInside if your root component doesn't cover the whole window.
+	std::shared_ptr<VisualComponent> getTopChild(const IVector2D& pixelPosition, bool includeInvisible = false);
+	// Send a mouse down event to the component
+	//   pixelPosition is relative to the parent container.
+	//   The component is reponsible for bound checking, which can be used to either block the signal or pass to components below.
+	void sendMouseEvent(const MouseEvent& event);
+	void sendKeyboardEvent(const KeyboardEvent& event);
+	// Defines what the component does when it has received an event that didn't hit any sub components on the way.
+	//   pixelPosition is relative to the parent container.
+	//   This is not a callback event.
+	virtual void receiveMouseEvent(const MouseEvent& event);
+	virtual void receiveKeyboardEvent(const KeyboardEvent& event);
+	// Notifies when the theme has been changed, so that temporary data depending on the theme can be replaced
+	virtual void changedTheme(VisualTheme newTheme);
+};
+
+}
+
+#endif
+

+ 142 - 0
Source/DFPSR/gui/VisualTheme.cpp

@@ -0,0 +1,142 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include <stdint.h>
+#include "VisualTheme.h"
+#include "../api/imageAPI.h"
+#include "../api/drawAPI.h"
+
+namespace dsr {
+
+// TODO: A sub-routine call drawing a rounded rectangle with a solid background
+// The default theme
+//   Copy and modify and compile with theme_create to get a custom theme
+static const ReadableString defaultThemeCode =
+UR"QUOTE(
+# Helper methods
+BEGIN: generate_rounded_rectangle
+	# Dimensions of the result image
+	INPUT: FixedPoint, width
+	INPUT: FixedPoint, height
+	# The whole pixel radius from center points to the end of the image
+	INPUT: FixedPoint, corner
+	# The subtracted offset from the radius to create a border on certain channels
+	INPUT: FixedPoint, border
+	# Create the result image
+	OUTPUT: ImageU8, resultImage
+	CREATE: resultImage, width, height
+	# Limit outer radius to half of the image's minimum dimension
+	MIN: radius<FixedPoint>, width, height
+	MUL: radius, radius, 0.5
+	MIN: radius, radius, corner
+	ROUND: radius, radius
+	# Place the inner radius for drawing
+	SUB: innerRadius<FixedPoint>, corner, border
+	# Use +- 0.5 pixel offsets for fake anti-aliasing
+	ADD: radiusOut<FixedPoint>, innerRadius, 0.5
+	ADD: radiusIn<FixedPoint>, innerRadius, -0.5
+	# Calculate dimensions for drawing
+	SUB: w2<FixedPoint>, width, radius
+	SUB: w3<FixedPoint>, w2, radius
+	SUB: w4<FixedPoint>, width, border
+	SUB: w4, w4, border
+	SUB: h2<FixedPoint>, height, radius
+	SUB: h3<FixedPoint>, h2, radius
+	SUB: r2<FixedPoint>, radius, border
+	# Draw
+	FADE_REGION_RADIAL: resultImage,   0,  0,  radius, radius,  radius, radius,  radiusIn, 255,  radiusOut, 0
+	FADE_REGION_RADIAL: resultImage,  w2,  0,  radius, radius,       0, radius,  radiusIn, 255,  radiusOut, 0
+	FADE_REGION_RADIAL: resultImage,   0, h2,  radius, radius,  radius,      0,  radiusIn, 255,  radiusOut, 0
+	FADE_REGION_RADIAL: resultImage,  w2, h2,  radius, radius,       0,      0,  radiusIn, 255,  radiusOut, 0
+	RECTANGLE: resultImage, radius, border, w3, r2, 255
+	RECTANGLE: resultImage, radius, h2, w3, r2, 255
+	RECTANGLE: resultImage, border, radius, w4, h3, 255
+	END:
+
+BEGIN: Button
+	INPUT: FixedPoint, width
+	INPUT: FixedPoint, height
+	INPUT: FixedPoint, pressed
+	INPUT: FixedPoint, red
+	INPUT: FixedPoint, green
+	INPUT: FixedPoint, blue
+	OUTPUT: ImageRgbaU8, colorImage
+	# Scale by 2 / 255 so that 127.5 represents full intensity in patternImage
+	MUL: normRed<FixedPoint>, red, 0.007843138
+	MUL: normGreen<FixedPoint>, green, 0.007843138
+	MUL: normBlue<FixedPoint>, blue, 0.007843138
+	CREATE: patternImage<ImageU8>, width, height
+	MUL: pressDarknessHigh<FixedPoint>, pressed, 80
+	MUL: pressDarknessLow<FixedPoint>, pressed, 10
+	SUB: highLuma<FixedPoint>, 150, pressDarknessHigh
+	SUB: lowLuma<FixedPoint>, 100, pressDarknessLow
+	FADE_LINEAR: patternImage,  0, 0, highLuma,  0, height, lowLuma
+	CALL: generate_rounded_rectangle, lumaImage<ImageU8>, width, height, 12, 2
+	MUL: lumaImage, lumaImage, patternImage, 0.003921569
+	CALL: generate_rounded_rectangle, visImage<ImageU8>, width, height, 12, 0
+	MUL: redImage<ImageU8>, lumaImage, normRed
+	MUL: greenImage<ImageU8>, lumaImage, normGreen
+	MUL: blueImage<ImageU8>, lumaImage, normBlue
+	PACK_RGBA: colorImage, redImage, greenImage, blueImage, visImage
+	END:
+
+BEGIN: Panel
+	INPUT: FixedPoint, width
+	INPUT: FixedPoint, height
+	INPUT: FixedPoint, red
+	INPUT: FixedPoint, green
+	INPUT: FixedPoint, blue
+	OUTPUT: ImageRgbaU8, colorImage
+	CREATE: colorImage, width, height
+	SUB: w2<FixedPoint>, width, 2
+	SUB: h2<FixedPoint>, height, 2
+	RECTANGLE: colorImage, 1, 1, w2, h2, red, green, blue, 255
+	END:
+)QUOTE";
+
+class VisualThemeImpl {
+public:
+	MediaMachine machine;
+	// Constructor
+	VisualThemeImpl(const ReadableString& mediaCode) : machine(machine_create(mediaCode)) {}
+	// Destructor
+	virtual ~VisualThemeImpl() {}
+};
+
+static VisualTheme defaultTheme;
+VisualTheme theme_getDefault() {
+	if (!(defaultTheme.get())) {
+		defaultTheme = theme_create(defaultThemeCode);
+	}
+	return defaultTheme;
+}
+
+VisualTheme theme_create(const ReadableString& mediaCode) {
+	return std::make_shared<VisualThemeImpl>(mediaCode);
+}
+
+MediaMethod theme_getScalableImage(const VisualTheme& theme, const ReadableString &name) {
+	return machine_getMethod(theme->machine, name);
+}
+
+}

+ 40 - 0
Source/DFPSR/gui/VisualTheme.h

@@ -0,0 +1,40 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_VISUALTHEME
+#define DFPSR_GUI_VISUALTHEME
+
+#include "../api/mediaMachineAPI.h"
+
+namespace dsr {
+
+// TODO: Move to the API folder
+
+// Theme API
+VisualTheme theme_create(const ReadableString& mediaCode);
+VisualTheme theme_getDefault();
+MediaMethod theme_getScalableImage(const VisualTheme& theme, const ReadableString &name);
+
+}
+
+#endif

+ 121 - 0
Source/DFPSR/gui/components/Button.cpp

@@ -0,0 +1,121 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "Button.h"
+#include <math.h>
+
+using namespace dsr;
+
+PERSISTENT_DEFINITION(Button)
+
+void Button::declareAttributes(StructureDefinition &target) const {
+	VisualComponent::declareAttributes(target);
+	target.declareAttribute(U"Color");
+	target.declareAttribute(U"Text");
+}
+
+Persistent* Button::findAttribute(const ReadableString &name) {
+	if (string_caseInsensitiveMatch(name, U"Color")) {
+		return &(this->color);
+	} else if (string_caseInsensitiveMatch(name, U"Text")) {
+		return &(this->text);
+	} else {
+		return VisualComponent::findAttribute(name);
+	}
+}
+
+Button::Button() {}
+
+bool Button::isContainer() const {
+	return true;
+}
+
+static OrderedImageRgbaU8 generateButtonImage(MediaMethod imageGenerator, int pressed, int width, int height, ColorRgbI32 backColor, String text, const std::shared_ptr<RasterFont>& font) {
+	// Create a scaled image
+	OrderedImageRgbaU8 result;
+ 	imageGenerator(width, height, pressed, backColor.red, backColor.green, backColor.blue)(result);
+	if (text.length() > 0) {
+		int left = (image_getWidth(result) - font->getLineWidth(text)) / 2;
+		int top = (image_getHeight(result) - font->size) / 2;
+		if (pressed) {
+			top += 1;
+		}
+		font->printLine(result, text, IVector2D(left, top), ColorRgbaI32(0, 0, 0, 255));
+	}
+	return result;
+}
+
+void Button::generateGraphics() {
+	int width = this->location.width();
+	int height = this->location.height();
+	if (!this->hasImages || this->lastWidth != width || this->lastHeight != height || !string_match(this->text.value, this->lastText) || this->color.value != this->lastColor) {
+		completeAssets();
+		this->imageUp = generateButtonImage(this->button, 0, width, height, this->color.value, this->text.value, this->font);
+		this->imageDown = generateButtonImage(this->button, 1, width, height, this->color.value, this->text.value, this->font);
+		this->lastWidth = width;
+		this->lastHeight = height;
+		this->lastText = this->text.value;
+		this->lastColor = this->color.value;
+		this->hasImages = true;
+	}
+}
+
+void Button::drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation) {
+	this->generateGraphics();
+	draw_alphaFilter(targetImage, (this->pressed && this->inside) ? this->imageDown : this->imageUp, relativeLocation.left(), relativeLocation.top());
+}
+
+void Button::receiveMouseEvent(const MouseEvent& event) {
+	if (event.mouseEventType == MouseEventType::MouseDown) {
+		this->pressed = true;
+	} else if (this->pressed && event.mouseEventType == MouseEventType::MouseUp) {
+		this->pressed = false;
+		if (this->inside) {
+			this->callback_pressedEvent();
+		}
+	}
+	this->inside = this->pointIsInside(event.position);
+	VisualComponent::receiveMouseEvent(event);
+}
+
+bool Button::pointIsInside(const IVector2D& pixelPosition) {
+	this->generateGraphics();
+	// Get the point relative to the component instead of its direct container
+	IVector2D localPoint = pixelPosition - this->location.upperLeft();
+	// Sample opacity at the location
+	return dsr::image_readPixel_border(this->imageUp, localPoint.x, localPoint.y).alpha > 127;
+}
+
+void Button::changedTheme(VisualTheme newTheme) {
+	this->button = theme_getScalableImage(newTheme, U"Button");
+	this->hasImages = false;
+}
+
+void Button::completeAssets() {
+	if (this->button.methodIndex == -1) {
+		this->button = theme_getScalableImage(theme_getDefault(), U"Button");
+	}
+	if (this->font.get() == nullptr) {
+		this->font = font_getDefault();
+	}
+}

+ 70 - 0
Source/DFPSR/gui/components/Button.h

@@ -0,0 +1,70 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_COMPONENT_BUTTON
+#define DFPSR_GUI_COMPONENT_BUTTON
+
+#include "../VisualComponent.h"
+#include "../Font.h"
+
+namespace dsr {
+
+class Button : public VisualComponent {
+PERSISTENT_DECLARATION(Button)
+public:
+	// Attributes
+	PersistentColor color;
+	PersistentString text;
+	void declareAttributes(StructureDefinition &target) const override;
+	Persistent* findAttribute(const ReadableString &name) override;
+private:
+	// Temporary
+	bool pressed = false;
+	bool inside = false;
+	// Given from the style
+	MediaMethod button;
+	std::shared_ptr<RasterFont> font;
+	void completeAssets();
+	void generateGraphics();
+	// Generated
+	bool hasImages = false;
+	int lastWidth = 0;
+	int lastHeight = 0;
+	String lastText;
+	ColorRgbI32 lastColor;
+	OrderedImageRgbaU8 imageUp;
+	OrderedImageRgbaU8 imageDown;
+public:
+	Button();
+public:
+	bool isContainer() const;
+	void drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation) override;
+	void receiveMouseEvent(const MouseEvent& event) override;
+	bool pointIsInside(const IVector2D& pixelPosition) override;
+	void changedTheme(VisualTheme newTheme) override;
+};
+
+}
+
+#endif
+

+ 83 - 0
Source/DFPSR/gui/components/Panel.cpp

@@ -0,0 +1,83 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "Panel.h"
+
+using namespace dsr;
+
+PERSISTENT_DEFINITION(Panel)
+
+void Panel::declareAttributes(StructureDefinition &target) const {
+	VisualComponent::declareAttributes(target);
+	target.declareAttribute(U"Solid");
+	target.declareAttribute(U"Color");
+}
+
+Persistent* Panel::findAttribute(const ReadableString &name) {
+	if (string_caseInsensitiveMatch(name, U"Solid")) {
+		return &(this->solid);
+	} else if (string_caseInsensitiveMatch(name, U"Color")) {
+		return &(this->color);
+	} else {
+		return VisualComponent::findAttribute(name);
+	}
+}
+
+Panel::Panel() {}
+
+bool Panel::isContainer() const {
+	return true;
+}
+
+void Panel::generateGraphics() {
+	int width = this->location.width();
+	int height = this->location.height();
+	if (!this->hasImages || this->lastWidth != width || this->lastHeight != height || this->color.value != this->lastColor) {
+		completeAssets();
+		this->background(width, height, this->color.value.red, this->color.value.green, this->color.value.blue)(this->imageBackground);
+		this->lastWidth = width;
+		this->lastHeight = height;
+		this->lastColor = this->color.value;
+		this->hasImages = true;
+	}
+}
+
+// Fill the background with a solid color
+void Panel::drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation) {
+	if (this->solid.value) {
+		this->generateGraphics();
+		draw_copy(targetImage, this->imageBackground, relativeLocation.left(), relativeLocation.top());
+		//draw_rectangle(targetImage, relativeLocation, ColorRgbaI32(this->color.value, 255));
+	}
+}
+
+void Panel::changedTheme(VisualTheme newTheme) {
+	this->background = theme_getScalableImage(newTheme, U"Panel");
+	this->hasImages = false;
+}
+
+void Panel::completeAssets() {
+	if (this->background.methodIndex == -1) {
+		this->background = theme_getScalableImage(theme_getDefault(), U"Panel");
+	}
+}

+ 61 - 0
Source/DFPSR/gui/components/Panel.h

@@ -0,0 +1,61 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GUI_COMPONENT_PANEL
+#define DFPSR_GUI_COMPONENT_PANEL
+
+#include "../VisualComponent.h"
+
+namespace dsr {
+
+class Panel : public VisualComponent {
+PERSISTENT_DECLARATION(Panel)
+public:
+	// Attributes
+	PersistentBoolean solid; // If true, the panel itself will be drawn.
+	PersistentColor color; // The color being used when drawn is set to true.
+	void declareAttributes(StructureDefinition &target) const override;
+	Persistent* findAttribute(const ReadableString &name) override;
+private:
+	void completeAssets();
+	void generateGraphics();
+	MediaMethod background;
+	OrderedImageRgbaU8 imageBackground; // Alpha is copied to the target and should be 255
+	//OrderedImageRgbaU8 imagePassive; // TODO: Use for passive rendering of child components in cluttered panels
+	// Generated
+	bool hasImages = false;
+	int lastWidth = 0;
+	int lastHeight = 0;
+	ColorRgbI32 lastColor;
+public:
+	Panel();
+public:
+	bool isContainer() const;
+	void drawSelf(ImageRgbaU8& targetImage, const IRect &relativeLocation) override;
+	void changedTheme(VisualTheme newTheme) override;
+};
+
+}
+
+#endif
+

+ 266 - 0
Source/DFPSR/gui/defaultFont.h

@@ -0,0 +1,266 @@
+
+// A rasterization of Ubuntu Mono size 16, with some minor pixel alignment for increased sharpness in the specific resolution.
+// The font was selected because it has a reusable generic style and permits both modification and embedding.
+// See the official website for full license terms
+//   https://www.ubuntu.com/legal/font-licence
+
+static const char* defaultFontAscii =
+R"QUOTE(< .,-_':;!+~=^?*abcdefghijklmnopqrstuvwxyz()[]{}|&@#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ>
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                      [  [                             g[                                              g[                                                                                                                                'y     >
+<                       [g             X  X            [f h(            [Z            jx'  q*          j(o.             )X                g            g                f)                                                                                t3     >
+<                       Z[             R  R           !Zt 0R          !6UZQj         dR79 bM,         &ZZZ7             xU               {Xe          hX]               vS                                                                                Fh     >
+<                       Z[             D  D           cZk 9H          DU@]Al         y|_V Fm          V8-5W             qM              yXj            pZy            3|rFmEc                                                                            ^O      >
+<                       Z)             k  k          [ZZZZZZ[         X&             y|_VqB           N2 C5             hD             ^Xy              {X^           &GSXNAg           Z                                                                [{      >
+<                       Wy                           gAZ}[VGg         FX2h           dR7DO?           rYIB'                            #R.              ,S#            'EIq             Z                                                                O?      >
+<                       Qs                            )X  X)          +AZZG=          jx22xi          &UXe)M                           N3                4M            A9cXf            Z                              [[[g                             fG       >
+<                       Gj                            0R !Zs            =)VF           ?OE7Rd        aZc6UAA                           X]                ]X            ^~ h          ZZZZZZZ                           ZZZ[                             1v       >
+<                                                    [ZZZZZZ[             &W           BqV-&y        xZ..9Zp                           U&                |U                             Z                                                              ,S:       >
+<                      ,ue                           gRK[6Z#g         92[@UH          mE V-&y        uZu=9Z#                           DF                FC                             Z               kj                              kk             o8        >
+<                      vZS                            R0 tZ!          }OXZHc         ,Mb A7Rd        :QZZNzX!                          sZc              aZr                             Z               UU                              UU             Am        >
+<                      eS6                            (h f[             [Z           *q  :xj          ;vu'-[b                          .IK_            ,HI.                                             )M                              kk            ;R.        >
+<                                                                       [Z                                                              =QG;          'CR=                                             @Ri                                            x#         >
+<                                                                                                                                        ~Da          *D~                                              p+                                             Ie         >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<      ^ww^              vg            fzt;           ,ozs'              ~[g          '[[[[g             aub          [[[[[[g          ^wyc            =xu!                                                                                            fxzh      >
+<     mXZZXm           '5Z[           #ZZZR!          IZZZQ;            ;NZ[          *ZZZZ[           !CZZy          ZZZZZZ[         mXZZZx          eVZZT^                                                                                          pZZZZ|     >
+<    ,SG++GR,         hS9Z[           #j,wZv          g+.pZv            E#Z[          eZ?             -LMn;               sU=         OG;+GT          DO+?S4                                                                                          ;g-!GV     >
+<    jZi  iZj         !g Z[              .Zw             'Zt           }A Z[          kZ-             xU!                ~Vt          W0  &O          W}  1R            kk              kk                jAl                         B);                 0I     >
+<    wZ:EE:Zw            Z[              jU!            ]IB,          fQ: Z[          sZUGr           FROWA;             3J           AX]qRk          U2  ]X            UU              UU             ,tKOw-        [ZZZZZZ[         e6V2b              rRa     >
+<    (Z FG.Z(            Z[             ~Qm             ZZ3:         .Ln  Z[          f}5RZw          UQ&1X5            ~Xp           +UZZL~          6X1&PU            kk              kk            {SGo                              =&UCd           uWd      >
+<    wZ:  :Zw            Z[            +Mr              .eOB         rV[[[ZDg            ,GP          X[  3T            ]S.           {K?yVC          !BWPRH                                          VOj                                ;@Zy           U0       >
+<    jZi  iZj            Z[           -Kv                 {X         [ZZZZZZ[             }X          R1  |W            M0            S{  &W             :T]                                          :zQKs,         [ZZZZZZ[          *2V8h                     >
+<    ,SG++GS,            Z[           {8              i: aMM              Z[          i: cOH          6S?+OD           ^Zn            TG;!HP           'lIR;            kk             :w_              .oGRh                         6U&=             :x:       >
+<     mXZZXn           ZZZZZ[         RZZZZZ          RZZZWl              Z[          RZZZUh          *UZZVe           pZ=            zZZZZt          (ZZKa             UU             xZw                 e+                         i_               xZx       >
+<      ^ww^            [[[[[g         )[[[[[          !s(s+               [g          +s(s;            +vx=            f[              eyyc           eve               kk             !Ro                                                             :x:       >
+<                                                                                                                                                                                     gFA                                                                        >
+<                                                                                                                                                                                     ?k                                                                         >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<      ,ozl            i[).           pz(p_             gywc          ozyj            g[[[[[          g[[[[[            hyxf         g[    [g         g[[[[[           [[[[g          [g  mv          g[              tt  tu          [t  g[           ^ww=      >
+<     !LZZZ7           GWZi           ZZZZQ;          :EZZZZo         ZZZZL=          [ZZZZZ          [ZZZZZ          :EZZZZo        [Z    Z[         [ZZZZZ           ZZZZ[          Z[ ~Vu          [Z              OV,,WR          ZX+ [Z          nXZZXl     >
+<     7Tf,[Zh         ;Y&V&           Z[.rZv          9Wu_'k;         Z[.jRI          [Z              [Z              9Xv'-i'        [Z    Z[           [Z                Z[          Z[ AG           [Z              XQhhQZ.         ZU[ [Z         ,SE;+GR.    >
+<    !Yy  'Zz         oZhBK           Z[ 'Zt         *Zz              Z[  oZe         [Z              [Z             *Z)             [Z    Z[           [Z                Z[          Z[tV?           [Z             :Z1{{0Z+         Z1O [Z         jZh  iZi    >
+<    mZ*?GXZ[         1W,(Z;          ZD{IA,         uZ+              Z[  :Zv         [Z[[[g          [Z[[[g         uZ+             [Z[[[[Z[           [Z                Z[          Z5Vu            [Z             ?ZmGGlZb         Z[9i[Z         wZ_  'Zw    >
+<    wZ,GT}Z[         ME gZo          ZZZY4:         (Z               Z[   Z(         [ZZZZ[          [ZZZZ[         (Z    [g        [ZZZZZZ[           [Z                Z[          ZXZj            [Z             gZ+RR!Zh         Z[m2[Z         (Z    Z(    >
+<    (Z X| Z[        !Z[ ,X@          Z[ *LD         uZ!              Z[  ;Zv         [Z              [Z             uZ!   Z[        [Z    Z[           [Z                Z[          Z{8V*           [Z             lZ;pq:Zl         Z[.M{Z         wZ_  'Zw    >
+<    vZ;RA,Z[        nZZZZZI          Z[  {X         dZw              Z[  pZe         [Z              [Z             dZv   Z[        [Z    Z[           [Z               _Zz          Z[,HL,          [Z             qZ'  -Zq         Z[ #FZ         jZg  iZi    >
+<    hZl(ZZZ[        &X[[[BY'         Z[ aMM          GVr--g!         Z].iRI          [Z              [Z              GVq,'Z[        [Z    Z[           [Z            g!-{Zm          Z[ ?W]          [Z             uZ,  ,Zu         Z[ gYZ         ,SE;+GR.    >
+<    .RE i(u~        GE   hZj         ZZZZWm          =MZZZZq         ZZZZM^          [ZZZZZ[         [Z              ~LZZZZ[        [Z    Z[         [ZZZZZ          KZZZH.          Z[  }V!         [ZZZZZ[        yZ.   Zx         Z[  RZ          nXZZXm     >
+<     tZ7?.!         zj   ,[e         pz(s+            .lzwe          ozyk.           g[[[[[g         g[               .lzym'        g[    [g         g[[[[[          'r(o,           [g  :[c         g[[[[[g        f[    [f         [g  n[           ?ww^      >
+<      )XZZT                                                                                                                                                                                                                                                     >
+<       ^uzm                                                                                                                                                                                                                                                     >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                     y'                                                         >
+<     nx)wc            ^ww=           oy(s;            bxxg           [[[[[[g         [g  g[         zj   ,[e        i)    (i        a[:  :[a        c[-   lx         [[[[[[           g[[[g          3t              g[[[g             ez                       >
+<     ZZZZZ2          mXZZXl          ZZZZUf          tZZZZq          ZZZZZZ[         Z[  [Z         EE   eZi        }X    V}        -R&  &R-        ~Yu  ,S0         ZZZZZZ           [Z[[g          hF              g[[Z[             LYp                      >
+<     Z[ '{Zj        ,SE;+GR.         Z[ aNG          QF+,g;            [Z            Z[  [Z         [X-  zW,        }X    V{         tX==Xt          9L  sZa            -O@           [Z              O=                Z[            tOwO.                     >
+<     Z[  ,Zz        jZh  iZh         Z[  {W          W2                [Z            Z[  [Z         fZn  BC         }X    V]          K55K           fZq M9             0O-           [Z              {[                Z[           ,Rq Lw                     >
+<     Z[  aZt        wZ_  'Zw         Z[  4R          6Z6g              [Z            Z[  [Z          T4 ,Vx         }X nm V)          lZZl            COuZc            gZj            [Z              ?O                Z[           (I  mS'                    >
+<     ZD[#RT;        (Z    Z(         ZD]8Z}          -0XZC!            [Z            Z[  [Z          5U,jZ?         }X PO V(           QQ             dZX6            .M6             [Z               Hf               Z[           6i   9f                    >
+<     ZZZUAb         xZ_  'Zw         ZZZZ7.            ![XA            [Z            Z[  [Z          pZm@M          }XfCCfVy          oZZp             6Z^            zV+             [Z               v1               Z[                                      >
+<     Z[             mZg  iZi         Z[ 2J.              @W            [Z            V@  #V          'X8P}          }X&uu&Vw         -Q10R-            [Z            !V)              [Z               :S,              Z[                                      >
+<     Z[             :WE;+GR,         Z[ :Tw          i~ ~GR            [Z            GQ~~QG           BZZc          }XP!;PWu         )U;:U]            [Z            0R,              [Z                8o              Z[                                      >
+<     Z[              yZZZXm          Z[  )S_         MZZZZt            [Z            lXZZXl           tZN           }ZF  DZs        'U]  {V:           [Z            XZZZZZ           [Z                mA              Z[                                      >
+<     [g               iVH?           [g  '[b         -nzw*             g[             *xx*            :[l           i[h  g[b        b['  :[b           g[            [[[[[[           [Z                .R;             Z[                                      >
+<                       [ZG}                                                                                                                                                           [Z                 #w             Z[                                      >
+<                        lAB                                                                                                                                                           [ZZZ[              eI          [ZZZ[                          ZZZZZZZZ    >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<       1c                            oe                                  ?v                            -p(s+                         oe                                              oe              [[[g                                                       >
+<       wK,                           Z[                                  [Z                           -LZZZR                         Z[               kUl              kUl           Z[              ZZZ[                                                       >
+<        @h                           Z[                                  [Z                           rZu,;i                         Z[               lUl              lUl           Z[                Z[                                                       >
+<                                     Z[                                  [Z                           )Z                             Z[                                              Z[                Z[                                                       >
+<                      BUXF*          ZKRWB?           mGWU9          ?BWRKZ          '#RWGf          ZZZZZZ          ~6UXRB          ZQUV8;          ZZZ[             ZZZZ           Z[ !Q#            Z[           oOXM2UE-         BQWU6;          !5UU5!     >
+<                      B}#WG          ZU#&TP,         nZM||6         ,PT&#UZ          GV1|PU;         [DZ[[[         ,NU@[HZ          ZK}3Z5          [[Z[             [[DZ           Z['I8             Z[           [Z}OX5Zs         ZH]3Z6         .KU@@VK.    >
+<                         &X          Z[  iZn         IK,            nZi  [Z         kZi  =Zs          [Z            oZh  [Z          Z[  7T            Z[               [Z           Z&HA,             Z[           [Z ]X Z)         Z[  6U         lZl  lZl    >
+<                     *CUWTZ          Z[  ,Zz         W{             zZ,  [Z         zZZZZZZ)          [Z            (Z.  [Z          Z[  ]Z            Z[               [Z           ZXR-              Z[           [Z [Z Z[         Z[  [Z         zZ.  .Zz    >
+<                     MB; |Z          Z[  +Zu         T1             uZ+  [Z         uZ~               [Z            sZe  [Z          Z[  [Z            Z[               [Z           Z|8D_             Z[           [Z g[ Z[         Z[  [Z         uZ!  +Zu    >
+<                     VA: {Z          Z| ~9X=         4Wr--g         =X9+ }Z         =XEb 'h           [Z            ;UR&@PZ          Z[  [Z            UB,d             [Z           Z[ 3H-            UB,d         [Z    Z[         Z[  [Z         =X5::6X=    >
+<                     &ZZZZZ          ZZZZXv          !LZZZU          vXZZZZ          rWZZZU           [Z             eGXRMZ          Z[  [Z            5ZZU             [Z           Z[  AB            5ZZU         [Z    Z[         Z[  [Z          wYZZYw     >
+<                      gwztj          fu(v^            .mzvb           ^v(uf           !r(u*           g[                 5T          [g  g[            ,szd             |X           [g  ;[?           ,syd         g[    [g         [g  g[           *xw*      >
+<                                                                                                                     B#[6Z4                                          i6|UG                                                                                      >
+<                                                                                                                     6SXR0_                                          fIXLc                                                                                      >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                      .o([             g[             [(o.                                      >
+<                                                                      ?v                                                                                                              5S}[             [Z             [}T5                                      >
+<                                                                      [Z                                                                                                              V|               [Z               |V                                      >
+<                                                                      [Z                                                                                                              Z[               [Z               [Z                                      >
+<    oKVXIo           oIXVKn          hAQXWH           ;s(ue          ZZZZZZ          Z[  [Z         qZ?  ^Zq        wZ,  ,Zw        *Vs  rV*         O4  ~Zt         [ZZZZZ           Z[               [Z               [Z                                      >
+<    [Z|]IZo         oZI{|Z[          [Z5{[t          ^TZZZR          [DZ[[[          Z[  [Z         !Zw  vZ!        oZ=??=Zn         tV??Vt          2R  pZb         g[[[RJ           Zz               [Z               yZ                                      >
+<    [Z  .II         II.  X[          [Z              yZe ;h           [Z             Z[  [Z          HD  DH         dZg))hZb          6II6           nZd #V             {O!          jZf               [Z               dZi          1WEl-An                    >
+<    [Z   {W         X{   X[          [Z              fXR#g            [Z             Z[  [Z          )X;;Xz         'ZrAAsZ-          'SS'           -W| KC            tU*          [Z0                [Z                1Z[        jU|HZZU+                    >
+<    [Z   1T         T2   X[          [Z               f9XZB-          )Z             X&  [Z          ?ZvvZ=          R@AB@O           pXOp            6Q!Z(           gXl            iZg               [Z               jZi         =h  hza                     >
+<    [Zd.gU6         5Vg.dY[          [Z                 -tZv          wZk.e          KP= |Z           GIIF           BU(]U7          aVwdUa           lZ8Ze          ;R&              Z(               [Z               )Z                                      >
+<    [ZZZZR=         =RZZZZ[          [Z              nF|[7Zq          ?XZZU          oYZZZZ           tZZs           {ZchZ(         ,K9  1K,           PZR           (ZZZZZ           Z[               [Z               [Z                                      >
+<    [Zizt'           'szhZ[          g[              fFVXO{            *xyc           ax(uf           -[[-           b( .[?         *[;  ;[*           4Z0           g[[[[[           Z[               [Z               [Z                                      >
+<    [Z                   Z[                                                                                                                          (2ZU~                            R9,              [Z              ,9R                                      >
+<    [Z                   Z[                                                                                                                          KXGd                             mNYZ             [Z             ZXMl                                      >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                       [g                                                                                                                                                       >
+<                                                       cxyc                         b[_   mv           Z[             +vzl,          MqrM                             r(m                                                                             ZZZZ      >
+<                                       [Z             qZZZR                         !Xu  :Uy           Z[            ^UZZZt          q==q                             }{Rk                                                                            [[[[      >
+<                                       [Z             KM~_g         ,f   ,e          5J  |Q,           Z[            yZb.d~                                           jzC)                                                                                      >
+<                      FO             +5UZQk           X}            fWCVMIA          dZj^Xr            Z[            kZ9o,                           ;4UU5;          rM]E[            -l -l                                          ;4UU5;                     >
+<                      FO            .LY7]6l           Z[             D&-dRf           CFAG             Z[            -IXZRk                          I|::|I          sK[E[            {E {E                                          I|::|I                     >
+<                                    mZu             [ZZZZZ[          V,  }y          [9ZZ7[g                         AA-oQR                         lE+MK El          l(xa           +Vh+Vi          ZZZZZZZ          g[[[          lEyXOcEl                    >
+<                      ~r            zZ-             g[ZC[[g          Oa  7q          ZZZZZZ[                         W9. &O                         z{x1, {z                         nX'nX:               [Z          [ZZZ          z{[DIs{z                    >
+<                      nK            wZ-              ,Zx            ,JR|9Zq            [Z              [g            2ZR3Kc                         u#nMz 0u                          I) I[               [Z                        u#[HG 0u                    >
+<                      vT            aZu              +Zo            eAgyqr6          ZZZZZZ[           Z[             qDXM;                         =S~qt~R~                          i8 i8               [Z                        =SqgliR~                    >
+<                      (X             )X7]4r          gZZZZZ[                         [[DZ[[g           Z[               *Zx                          vR&&Rt                                               gg                         vR&&Rt                     >
+<                      [Z              bBZx+          a[[[[[g                           g[              Z[            A#[8Zo                           ?ww^                                                                            ?ww^                      >
+<                      [Z               [Z                                                              Z[            #RXR}                                                                                                                                      >
+<                      [Z               g[                                                              Z[                                                                                                                                                       >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<      -uu-                                                             d1                                                                                                                                                                                       >
+<      988A                                            axx=            ,Lw                            .jGHF                                             hg             !vv!                           -y   n*         -z   n*         ~wv-  n*                   >
+<      X--X             2              cMV|            A&#P            h@                            ^MZZSR                                            7WZ            !R11Q!                         hOZ  =K,        hOZ  =K,         ^[Jw =K,                   >
+<      988A             Z              ;b+V             [67                                          CZZZ I                                            i]Z            w1  1w                         ~^Z  Ak         ~^Z  Ak           ZZf Al                    >
+<      ,uu,             Z                C{             ]CE                           Z[  wZ         WZZZ I                                              Z            w1  1v          _h f!            Z l9            Z l9           *[Kul9            kUk      >
+<                    2ZZZZZ2            B)             h.*V                           Z[  wZ         TZZZ I                                              Z            !R11Q!          dV!#B            [,K=            [,K=           ?zs_L=            lUl      >
+<                       Z              nQEE            DXRt                           Z[  wZ         1ZZZ I             :x:                            ZZZZ            !ww!            C#-My            [[              [[               [[                      >
+<                       Z                                                             Z[  wZ         ,|RZ I             wZw                                                            }K 2F           =K,i[           =K:tt.           =L,i[           h[       >
+<                       2                                                             Z&  wZ            Z I             :x:                                                           -TmhU=           9l+BZ           9lh3Iv           9m+BZ           IH       >
+<                                                                                     ZO~ (Z            Z I                                                                           hA 6q           lA 4dZ          lA  :4^          lA 4dZ          4I+       >
+<                                                                                     ZZZZZZ            Z I                                                                                          ,K= ZZZ[        ,K= cK}g         ,K^ ZZZ[        &I-        >
+<                    2ZZZZZ2                                                          Z|w(te            Z I                             =8                                                           *n    [         *n  h||i         *n    [         V&         >
+<                                                                                     Z)                Z I                            ,gHi                                                                                                           KW0}Eh     >
+<                                                                                     Z[                Z I                            pWRh                                                                                                           aEWU9*     >
+<      b2               d1                                                                                                              1c               d1             .c                              1c               3b              .c                      >
+<      :Mr             ,Lw              g6            =MO&6            MqrM            =xn                                              wK,             ,Lw             6Ue            qM Mq            wK,             rM:              6Ue           MqrM      >
+<       ?7             h@              bO83           ?=ey~            q==q            R~2t                                              @h             h@             h#c6            =q q=             @h             7?              h#c6           q==q      >
+<      i[).            i[).            ;? f            i[).            i[).            I2Mk            t[[[[            ;rzo_         g[[[[[          g[[[[[          g[[[[[          g[[[[[          g[[[[[          g[[[[[           g[[[[[         g[[[[[     >
+<      GWZi            GWZi            ~ZZ1            GWZi            GWZi            KSZj           ,WYZZZ           rVZZZL         [ZZZZZ          [ZZZZZ          [ZZZZZ          [ZZZZZ          [ZZZZZ          [ZZZZZ           [ZZZZZ         [ZZZZZ     >
+<     ;Y&V&           ;Y&V&            vZHR           ;Y&V&           ;Y&V&           ~Z)U#           iZJZ            dXDa.^i         [Z              [Z              [Z              [Z                [Z              [Z               [Z             [Z       >
+<     oZhBK           oZhBK            ARqZc          oZhBK           oZhBK           tZe9M           ]Z#Z            6R-             [Z              [Z              [Z              [Z                [Z              [Z               [Z             [Z       >
+<     1W,(Z;          1W,(Z;          .V3!Zy          1W,(Z;          1W,(Z;          4V.zZ+          EO[Z[g          R1              [Z[[[g          [Z[[[g          [Z[[[g          [Z[[[g            [Z              [Z               [Z             [Z       >
+<     ME gZo          ME gZo          fZr NA          ME gZo          ME gZo          RC fZq         .W4[ZZ[          X]              [ZZZZ[          [ZZZZ[          [ZZZZ[          [ZZZZ[            [Z              [Z               [Z             [Z       >
+<    !Z[ ,X@         !Z[ ,X@          (Z= 3U         !Z[ ,X@         !Z[ ,X@         =ZF[}Z1         cZBDZ            S0              [Z              [Z              [Z              [Z                [Z              [Z               [Z             [Z       >
+<    nZZZZZI         nZZZZZI          AX[[BZc        nZZZZZI         nZZZZZI         qZZZZZK         tZZZZ            BR,             [Z              [Z              [Z              [Z                [Z              [Z               [Z             [Z       >
+<    &X[[[BY'        &X[[[BY'         UZZZZZw        &X[[[BY'        &X[[[BY'        #W,  [Z'        1V [Z            lZA*.~h         [Z              [Z              [Z              [Z                [Z              [Z               [Z             [Z       >
+<    GE   hZj        GE   hZj        *Zs   P6        GE   hZj        GE   hZj        HD   hZi        IC [ZZZ           [YZZZN         [ZZZZZ[         [ZZZZZ[         [ZZZZZ[         [ZZZZZ[         [ZZZZZ          [ZZZZZ           [ZZZZZ         [ZZZZZ     >
+<    zj   ,[e        zj   ,[e        sZ=   3Q        zj   ,[e        zj   ,[e        zj   ,[e        (j g[[[            ^|Gq:         g[[[[[g         g[[[[[g         g[[[[[g         g[[[[[g         g[[[[[          g[[[[[           g[[[[[         g[[[[[     >
+<                                                                                                                       ,gHi                                                                                                                                     >
+<                                                                                                                       pWRh                                                                                                                                     >
+<                                       1c               3b             c.                                                                             b2                3b             c.                               3b                                      >
+<                     =MO&6             wK,             rM:            eU6            =MO&6           qM Mq                                            :Mr              rM:            eU6             MqrM             rM:                                      >
+<                     ?=ey~              @h             7?             6c#h           ?=ey~           =q q=                                             ?7              7?             6c#h            q==q             7?                             bzv=      >
+<     pzyk.           [t  g[           ^ww=            ^ww=            ^ww=            ^ww=           .nzm                             *yx04          [g  g[          [g  g[          [g  g[          [g  g[         c[-   lx          [g             nYZZV*     >
+<     ZZZZM^          ZX+ [Z          nXZZXl          nXZZXl          nXZZXl          nXZZXl         ,FZZZE.                          oXZTZ4          Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z         ~Yu  ,S0          Z[,            KM;bZz     >
+<     Z],kRI          ZU[ [Z         ,SE;+GR.        ,SE;+GR.        ,SE;+GR.        ,SE;+GR.        xZp,sZw                         ,SF;tZU,         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          9L  sZa          ZZZWCf         X| sXd     >
+<     Z[  pZe         Z1O [Z         jZh  iZi        jZh  iZi        jZh  iZi        jZh  iZi        GE   FF          d9- -9d        jZi L7Zk         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          fZq M9           ZC(@RW^        Z[wVm      >
+<    [ZD[ :Zv         Z[9i[Z         wZ_  'Zw        wZ_  'Zw        wZ_  'Zw        wZ_  'Zw        U|   &T          ,EG?GE,        wZ'kZcZw         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z           COuZc           Z[  ^Zx        Z[V@       >
+<    ZZZZ  Z(         Z[m2[Z         (Z    Z(        (Z    Z(        (Z    Z(        (Z    Z(        X[   [X           'IZI'         (Z CC Z(         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z           dZX6            Z[  'Zx        Z[4X}'     >
+<     Z[  :Zv         Z[.M{Z         wZ_  'Zw        wZ_  'Zw        wZ_  'Zw        wZ_  'Zw        U|   &T           'IYI'         xZcZj:Zw         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z            6Z^            Z[ +3Zd        Z[ mNQ+    >
+<     Z[  oZe         Z[ #FZ         jZg  iZi        jZg  iZi        jZg  iZi        jZg  iZi        GE   FF          ,EG^GE,        nZ6K jZi         V@  #V          V@  #V          V@  #V          V@  #V            [Z             ZZZZZ}         Z[  !Zy    >
+<     Z[.iRI          Z[ gYZ         ,SE;+GR.        ,SE;+GR.        ,SE;+GR.        ,SE;+GR.        yZo.sZw          d9- -9d        'WZs!GS,         GQ~~QG          GQ~~QG          GQ~~QG          GQ~~QG            [Z             ZD)u*          Z[d,hZu    >
+<     ZZZZM^          Z[  RZ          nXZZXm          nXZZXm          nXZZXm          nXZZXm         ,GZZZE,                          6ZRZXq          lXZZXl          lXZZXl          lXZZXl          lXZZXl            [Z             Z[             Z[SZZS!    >
+<     pzyk.           [g  n[           ?ww^            ?ww^            ?ww^            ?ww^           .ozn.                           43zya            *xx*            *xx*            *xx*            *xx*             g[             [g             [gdyu!     >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<       1c               3b             .c                                                                                             b2                3b             .c                             b2               d1               c.                      >
+<       wK,             rM:             6Ue            [VA3j           MqrM             ]}e                                            :Mr              rM:             6Ue            MqrM            :Mr             ,Lw              eU6           qM Mq      >
+<        @h             7?             h#c6            j:so            q==q             }~v                                             ?7              7?             h#c6            q==q             ?7             h@               6c#h          =q q=      >
+<                                                                                       ev,                                                                                                                                                                      >
+<      BUXF*           BUXF*           BUXF*           BUXF*           BUXF*           BUXF*         oUJ?IOc           :0RXMk         '#RWGf          '#RWGf          '#RWGf          '#RWGf          ZZZ[            ZZZ[             ZZZ[           ZZZ[       >
+<      B}#WG           B}#WG           B}#WG           B}#WG           B}#WG           B}#WG         o|RS9RB          .IX5]6k         GV1|PU;         GV1|PU;         GV1|PU;         GV1|PU;         [[Z[            [[Z[             [[Z[           [[Z[       >
+<         &X              &X              &X              &X              &X              &X           [Z;|U          lZq            kZi  =Zs        kZi  =Zs        kZi  =Zs        kZi  =Zs           Z[              Z[               Z[             Z[       >
+<     *CUWTZ          *CUWTZ          *CUWTZ          *CUWTZ          *CUWTZ          *CUWTZ         oUUZZZZ          zZ,            zZZZZZZ)        zZZZZZZ)        zZZZZZZ)        zZZZZZZ)           Z[              Z[               Z[             Z[       >
+<     MB; |Z          MB; |Z          MB; |Z          MB; |Z          MB; |Z          MB; |Z         O2yZ'            wZ+            uZ~             uZ~             uZ~             uZ~                Z]              Z]               Z]             Z]       >
+<     VA: {Z          VA: {Z          VA: {Z          VA: {Z          VA: {Z          VA: {Z         V3jZ},e          aZB?.=^        =XEb 'h         =XEb 'h         =XEb 'h         =XEb 'h            UE_!;           UE_!;            UE_!;          UE_!;    >
+<     &ZZZZZ          &ZZZZZ          &ZZZZZ          &ZZZZZ          &ZZZZZ          &ZZZZZ         7ZZUZZU           (ZZZZw         rWZZZU          rWZZZU          rWZZZU          rWZZZU            1ZZZv           1ZZZv            1ZZZv          1ZZZv    >
+<      gwztj           gwztj           gwztj           gwztj           gwztj           gwztj         ,vr-pze            *J[m,          !r(u*           !r(u*           !r(u*           !r(u*            .o(p-           .o(p-            .o(p-          .o(p-    >
+<                                                                                                                       -@A                                                                                                                                      >
+<                                                                                                                       KX7                                                                                                                                      >
+<                                                                                                                                                                                                                                                                >
+<                                                                                                                                                                                                                                                                >
+<       e!                              1c               3b             c.                                                                             b2                3b             .c                               d1            oe                        >
+<       |T[Gm         =MO&6             wK,             rM:            eU6            =MO&6           qM Mq                                            :Mr              rM:             6Ue            MqrM             ,Lw            Z[              qM Mq     >
+<      gKXXo,         ?=ey~              @h             7?             6c#h           ?=ey~           =q q=                                             ?7              7?             h#c6            q==q             h@             Z[              =q q=     >
+<      +j?Yo                                                                                                                               b'                                                                                          Z[                        >
+<      bznF7          BQWU6;          !5UU5!          !5UU5!          !5UU5!          !5UU5!          qLXKq             UU            ;4UV9Md         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          O4  ~Zt          ZKPWB^         O4  ~Zt    >
+<     jXZZXP          ZH]3Z6         .KU@@VK.        .KU@@VK.        .KU@@VK.        .KU@@VK.        oZE]FZo            kk           .JV#&VP.         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          2R  pZb          ZS#&UO,        2R  pZb    >
+<     FQ~;BW          Z[  6U         lZl  lZl        lZl  lZl        lZl  lZl        lZl  lZl        IG   HI                         lZm uJZm         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          nZd #V           Z[  jZn        nZd #V     >
+<     V&  ]W          Z[  [Z         zZ.  .Zz        zZ.  .Zz        zZ.  .Zz        zZ.  .Zz        X{   {X         gZZZZZZg        zZ.qG'Zz         Z[  [Z          Z[  [Z          Z[  [Z          Z[  [Z          -W| KC           Z[  .Zz        -W| KC     >
+<     W|  4M          Z[  [Z         uZ!  +Zu        uZ!  +Zu        uZ!  +Zu        uZ!  +Zu        S0   1S                         vZsH'+Zu         X&  [Z          X&  [Z          X&  [Z          X&  [Z           6Q!Z(           Z[  !Zv         6Q!Z(     >
+<     CP+bU@          Z[  [Z         =X5::6X=        =X5::6X=        =X5::6X=        =X5::6X=        2Vi.jV2            kk           ?ZU=;7X=         KP= |Z          KP= |Z          KP= |Z          KP= |Z           lZ8Ze           Z5''3Z*         lZ8Ze     >
+<     dVZZQ+          Z[  [Z          wYZZYw          wYZZYw          wYZZYw          wYZZYw         ;KZZZK;            UU           ,PZZZXu          oYZZZZ          oYZZZZ          oYZZZZ          oYZZZZ            PZR            ZZZZZ|           PZR      >
+<      =xu:           [g  g[           *xw*            *xw*            *xw*            *xw*           ,ozo,                          g|byw?            ax(uf           ax(uf           ax(uf           ax(uf            4Z0            Z#uzf            4Z0      >
+<                                                                                                                                                                                                                     (2ZU~            Z[             (2ZU~      >
+<                                                                                                                                                                                                                     KXGd             Z[             KXGd       >
+)QUOTE";
+

+ 9 - 0
Source/DFPSR/gui/includeGui.h

@@ -0,0 +1,9 @@
+
+// Header for including the most commonly needed parts of the framework
+
+#include "VisualTheme.h"
+#include "VisualComponent.h"
+#include "BackendWindow.h"
+#include "DsrWindow.h"
+#include "components/Panel.h"
+#include "components/Button.h"

+ 104 - 0
Source/DFPSR/image/Color.cpp

@@ -0,0 +1,104 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "Color.h"
+
+using namespace dsr;
+
+ColorRgbI32 ColorRgbI32::saturate() const {
+	int32_t red = this->red;
+	int32_t green = this->green;
+	int32_t blue = this->blue;
+	if (red < 0) { red = 0; }
+	if (red > 255) { red = 255; }
+	if (green < 0) { green = 0; }
+	if (green > 255) { green = 255; }
+	if (blue < 0) { blue = 0; }
+	if (blue > 255) { blue = 255; }
+	return ColorRgbI32(red, green, blue);
+}
+ColorRgbI32 ColorRgbI32::mix(const ColorRgbI32& colorA, const ColorRgbI32& colorB, float weight) {
+	float invWeight = 1.0f - weight;
+	return (colorA * invWeight) + (colorB * weight);
+}
+ColorRgbI32::ColorRgbI32(const ReadableString &content) : red(0), green(0), blue(0) {
+	List<ReadableString> elements = content.split(U',');
+	int givenChannels = elements.length();
+	if (givenChannels >= 1) {
+		this-> red = string_parseInteger(elements[0]);
+		if (givenChannels >= 2) {
+			this-> green = string_parseInteger(elements[1]);
+			if (givenChannels >= 3) {
+				this-> blue = string_parseInteger(elements[2]);
+			}
+		}
+	}
+}
+ColorRgbaI32 ColorRgbaI32::saturate() const {
+	int32_t red = this->red;
+	int32_t green = this->green;
+	int32_t blue = this->blue;
+	int32_t alpha = this->alpha;
+	if (red < 0) { red = 0; }
+	if (red > 255) { red = 255; }
+	if (green < 0) { green = 0; }
+	if (green > 255) { green = 255; }
+	if (blue < 0) { blue = 0; }
+	if (blue > 255) { blue = 255; }
+	if (alpha < 0) { alpha = 0; }
+	if (alpha > 255) { alpha = 255; }
+	return ColorRgbaI32(red, green, blue, alpha);
+}
+ColorRgbaI32 ColorRgbaI32::mix(const ColorRgbaI32& colorA, const ColorRgbaI32& colorB, float weight) {
+	float invWeight = 1.0f - weight;
+	return (colorA * invWeight) + (colorB * weight);
+}
+ColorRgbaI32::ColorRgbaI32(const ReadableString &content) : red(0), green(0), blue(0), alpha(255) {
+	List<ReadableString> elements = content.split(U',');
+	int givenChannels = elements.length();
+	if (givenChannels >= 1) {
+		this-> red = string_parseInteger(elements[0]);
+		if (givenChannels >= 2) {
+			this-> green = string_parseInteger(elements[1]);
+			if (givenChannels >= 3) {
+				this-> blue = string_parseInteger(elements[2]);
+				if (givenChannels >= 4) {
+					this-> alpha = string_parseInteger(elements[3]);
+				}
+			}
+		}
+	}
+}
+
+String& dsr::string_toStreamIndented(String& target, const ColorRgbI32& source, const ReadableString& indentation) {
+	string_append(target, indentation, source.red, U",", source.green, U",", source.blue);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const ColorRgbaI32& source, const ReadableString& indentation) {
+	string_append(target, indentation, source.red, U",", source.green, U",", source.blue, U",", source.alpha);
+	return target;
+}
+String& dsr::string_toStreamIndented(String& target, const Color4xU8& source, const ReadableString& indentation) {
+	string_append(target, indentation, source.channels[0], U",", source.channels[1], U",", source.channels[2], U",", source.channels[3]);
+	return target;
+}

+ 120 - 0
Source/DFPSR/image/Color.h

@@ -0,0 +1,120 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_COLOR
+#define DFPSR_IMAGE_COLOR
+
+#include <stdint.h>
+#include "../base/text.h"
+
+namespace dsr {
+
+// RGB color with 32 bits per channel
+//   Values outside of the 0..255 byte range may cause unexpected behaviour
+struct ColorRgbI32 {
+	int32_t red, green, blue;
+	ColorRgbI32() : red(0), green(0), blue(0) {}
+	explicit ColorRgbI32(int32_t uniform) : red(uniform), green(uniform), blue(uniform) {}
+	ColorRgbI32(int32_t red, int32_t green, int32_t blue) : red(red), green(green), blue(blue) {}
+	// Clamp to the valid range
+	ColorRgbI32 saturate() const;
+	static ColorRgbI32 mix(const ColorRgbI32& colorA, const ColorRgbI32& colorB, float weight);
+	// Create a color from a string
+	explicit ColorRgbI32(const ReadableString &content);
+};
+inline ColorRgbI32 operator*(const ColorRgbI32& left, float right) {
+	return ColorRgbI32((float)left.red * right, (float)left.green * right, (float)left.blue * right);
+}
+inline ColorRgbI32 operator*(const ColorRgbI32& left, int32_t right) {
+	return ColorRgbI32(left.red * right, left.green * right, left.blue * right);
+}
+inline ColorRgbI32 operator+(const ColorRgbI32& left, const ColorRgbI32& right) {
+	return ColorRgbI32(left.red + right.red, left.green + right.green, left.blue + right.blue);
+}
+inline bool operator== (const ColorRgbI32& a, const ColorRgbI32& b) {
+	return a.red == b.red && a.green == b.green && a.blue == b.blue;
+}
+inline bool operator!= (const ColorRgbI32& a, const ColorRgbI32& b) {
+	return !(a == b);
+}
+
+// RGBA color with 32 bits per channel
+//   Values outside of the 0..255 byte range may cause unexpected behaviour
+struct ColorRgbaI32 {
+	int32_t red, green, blue, alpha;
+	ColorRgbaI32() : red(0), green(0), blue(0), alpha(0) {}
+	ColorRgbaI32(ColorRgbI32 rgb, int32_t alpha) : red(rgb.red), green(rgb.green), blue(rgb.blue), alpha(alpha) {}
+	explicit ColorRgbaI32(int32_t uniform) : red(uniform), green(uniform), blue(uniform), alpha(uniform) {}
+	ColorRgbaI32(int32_t red, int32_t green, int32_t blue, int32_t alpha) : red(red), green(green), blue(blue), alpha(alpha) {}
+	// Clamp to the valid range
+	ColorRgbaI32 saturate() const;
+	static ColorRgbaI32 mix(const ColorRgbaI32& colorA, const ColorRgbaI32& colorB, float weight);
+	// Create a color from a string
+	explicit ColorRgbaI32(const ReadableString &content);
+};
+inline ColorRgbaI32 operator*(const ColorRgbaI32& left, float right) {
+	return ColorRgbaI32((float)left.red * right, (float)left.green * right, (float)left.blue * right, (float)left.alpha * right);
+}
+inline ColorRgbaI32 operator*(const ColorRgbaI32& left, int32_t right) {
+	return ColorRgbaI32(left.red * right, left.green * right, left.blue * right, left.alpha * right);
+}
+inline ColorRgbaI32 operator+(const ColorRgbaI32& left, const ColorRgbaI32& right) {
+	return ColorRgbaI32(left.red + right.red, left.green + right.green, left.blue + right.blue, left.alpha + right.alpha);
+}
+inline bool operator== (const ColorRgbaI32& a, const ColorRgbaI32& b) {
+	return a.red == b.red && a.green == b.green && a.blue == b.blue && a.alpha == b.alpha;
+}
+inline bool operator!= (const ColorRgbaI32& a, const ColorRgbaI32& b) {
+	return !(a == b);
+}
+
+// TODO: Can this type be hidden from the external API?
+// RGBA color in arbitrary pack order for speed
+// Use ImageRgbaU8Impl::packRgba to construct for a specific pack order
+union Color4xU8 {
+	uint32_t packed;
+	uint8_t channels[4];
+	Color4xU8() : packed(0) {}
+	explicit Color4xU8(uint32_t packed) : packed(packed) {}
+	Color4xU8(uint8_t first, uint8_t second, uint8_t third, uint8_t fourth) : channels{first, second, third, fourth} {}
+	bool isUniformByte() {
+		int first = this->channels[0];
+		return this->channels[1] == first && this->channels[2] == first && this->channels[3] == first;
+	}
+};
+inline bool operator== (const Color4xU8& a, const Color4xU8& b) {
+	return a.packed == b.packed;
+}
+inline bool operator!= (const Color4xU8& a, const Color4xU8& b) {
+	return !(a == b);
+}
+
+// Serialization
+String& string_toStreamIndented(String& target, const ColorRgbI32& source, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const ColorRgbaI32& source, const ReadableString& indentation);
+String& string_toStreamIndented(String& target, const Color4xU8& source, const ReadableString& indentation);
+
+}
+
+#endif
+

+ 36 - 0
Source/DFPSR/image/Image.cpp

@@ -0,0 +1,36 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "Image.h"
+
+using namespace dsr;
+
+ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, std::shared_ptr<Buffer> buffer, intptr_t startOffset) :
+  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(buffer), startOffset(startOffset), isSubImage(true) {
+	this->validate();
+}
+
+ImageImpl::ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize) :
+  width(width), height(height), stride(stride), pixelSize(pixelSize), buffer(Buffer::create(stride * height)), startOffset(0), isSubImage(false) {
+	this->validate();
+}

+ 70 - 0
Source/DFPSR/image/Image.h

@@ -0,0 +1,70 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE
+#define DFPSR_IMAGE
+
+#include <cassert>
+#include <stdint.h>
+#include "../base/SafePointer.h"
+#include "../base/Buffer.h"
+#include "../math/scalar.h"
+#include "../math/IRect.h"
+#include "PackOrder.h"
+
+namespace dsr {
+
+// See imageAPI.h for public methods
+// See imageInternal.h for protected methods
+class ImageImpl {
+public:
+	const int32_t width, height, stride, pixelSize;
+	std::shared_ptr<Buffer> buffer; // Content
+	const intptr_t startOffset; // Byte offset of the first pixel
+	bool isSubImage = false;
+private:
+	void validate() {
+		// Preconditions:
+		assert(this->width > 0);
+		assert(this->height > 0);
+		assert(this->stride >= this->width * this->pixelSize);
+		assert(this->pixelSize > 0);
+		// TODO: Assert that the buffer is large enough to fit padding after each row
+	}
+public:
+	// Sub-images
+	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize, std::shared_ptr<Buffer> buffer, intptr_t startOffset);
+	// New images
+	ImageImpl(int32_t width, int32_t height, int32_t stride, int32_t pixelSize);
+};
+
+#define IMAGE_DECLARATION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \
+	static void writePixel(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color); \
+	static void writePixel_unsafe(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color); \
+	static COLOR_TYPE readPixel_clamp(const IMAGE_TYPE &image, int32_t x, int32_t y); \
+	static COLOR_TYPE readPixel_unsafe(const IMAGE_TYPE &image, int32_t x, int32_t y);
+
+}
+
+#endif
+

+ 39 - 0
Source/DFPSR/image/ImageF32.cpp

@@ -0,0 +1,39 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "ImageF32.h"
+#include "internal/imageInternal.h"
+#include "internal/imageTemplate.h"
+
+using namespace dsr;
+
+ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset) :
+  ImageImpl(newWidth, newHeight, newStride, sizeof(float), buffer, startOffset) {
+	assert(buffer->size - startOffset >= imageInternal::getUsedBytes(this));
+}
+
+ImageF32Impl::ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(float), alignment), sizeof(float)) {
+}
+
+IMAGE_DEFINITION(ImageF32Impl, 1, float, float);

+ 46 - 0
Source/DFPSR/image/ImageF32.h

@@ -0,0 +1,46 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_F32
+#define DFPSR_IMAGE_F32
+
+#include "Image.h"
+
+namespace dsr {
+
+class ImageF32Impl : public ImageImpl {
+public:
+	static const int32_t channelCount = 1;
+	static const int32_t pixelSize = 4;
+	// Inherit constructors
+	using ImageImpl::ImageImpl;
+	ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset);
+	ImageF32Impl(int32_t newWidth, int32_t newHeight, int32_t alignment = 16);
+	// Macro defined functions
+	IMAGE_DECLARATION(ImageF32Impl, 1, float, float);
+};
+
+}
+
+#endif
+

+ 51 - 0
Source/DFPSR/image/ImageLoader.h

@@ -0,0 +1,51 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_LOADER
+#define DFPSR_IMAGE_LOADER
+
+#include "ImageRgbaU8.h"
+#include "../base/text.h"
+#include <stdio.h>
+
+namespace dsr {
+
+// When you want to load an image and be able to edit the content,
+// the image loader can be called directly instead of using the
+// resource pool where everything has to be write-protected for reuse.
+class ImageLoader {
+public:
+	// Load an image from a file. PNG support is a minimum requirement.
+	virtual ImageRgbaU8Impl loadAsRgba(const String& filename) const = 0;
+	// Save an image in the PNG format with the given filename.
+	// Returns true on success and false on failure.
+	virtual bool saveAsPng(const ImageRgbaU8Impl &image, const String& filename) const {
+		printText("saveAsPng is not yet implemented in the image loader!");
+		return false;
+	}
+};
+
+}
+
+#endif
+

+ 293 - 0
Source/DFPSR/image/ImageRgbaU8.cpp

@@ -0,0 +1,293 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "ImageRgbaU8.h"
+#include "internal/imageInternal.h"
+#include "internal/imageTemplate.h"
+#include <algorithm>
+
+using namespace dsr;
+
+IMAGE_DEFINITION(ImageRgbaU8Impl, 4, Color4xU8, uint8_t);
+
+ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset, PackOrder packOrder) :
+  ImageImpl(newWidth, newHeight, newStride, sizeof(Color4xU8), buffer, startOffset), packOrder(packOrder) {
+	assert(buffer->size - startOffset >= imageInternal::getUsedBytes(this));
+	this->initializeRgbaImage();
+}
+
+ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), alignment), sizeof(Color4xU8)) {
+	this->initializeRgbaImage();
+}
+
+// Native canvas constructor
+ImageRgbaU8Impl::ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex) :
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(Color4xU8), 16), sizeof(Color4xU8)) {
+	this->packOrder = PackOrder::getPackOrder(packOrderIndex);
+	this->initializeRgbaImage();
+}
+
+bool ImageRgbaU8Impl::isTexture() const {
+	return this->texture.exists();
+}
+
+bool ImageRgbaU8Impl::isTexture(const ImageRgbaU8Impl* image) {
+	return image ? image->texture.exists() : false;
+}
+
+ImageRgbaU8Impl ImageRgbaU8Impl::getWithoutPadding() const {
+	if (this->stride == this->width * this->pixelSize) {
+		// No padding
+		return *this;
+	} else {
+		// Copy each row without padding
+		ImageRgbaU8Impl result(this->width, this->height, 1);
+		const SafePointer<uint8_t> sourceRow = imageInternal::getSafeData<uint8_t>(*this);
+		int32_t sourceStride = this->stride;
+		SafePointer<uint8_t> targetRow = imageInternal::getSafeData<uint8_t>(result);
+		int32_t targetStride = result.stride;
+		for (int32_t y = 0; y < this->height; y++) {
+			safeMemoryCopy(targetRow, sourceRow, targetStride);
+			sourceRow += sourceStride;
+			targetRow += targetStride;
+		}
+		return result;
+	}
+}
+
+static void extractChannel(SafePointer<uint8_t> targetData, int targetStride, const SafePointer<uint8_t> sourceData, int sourceStride, int sourceChannels, int channelIndex, int width, int height) {
+	const SafePointer<uint8_t> sourceRow = sourceData + channelIndex;
+	SafePointer<uint8_t> targetRow = targetData;
+	for (int y = 0; y < height; y++) {
+		const SafePointer<uint8_t> sourceElement = sourceRow;
+		SafePointer<uint8_t> targetElement = targetRow;
+		for (int x = 0; x < width; x++) {
+			*targetElement = *sourceElement; // Copy one channel from the soruce
+			sourceElement += sourceChannels; // Jump to the same channel in the next source pixel
+			targetElement += 1; // Jump to the next monochrome target pixel
+		}
+		sourceRow.increaseBytes(sourceStride);
+		targetRow.increaseBytes(targetStride);
+	}
+}
+
+ImageU8Impl ImageRgbaU8Impl::getChannel(int32_t channelIndex) const {
+	// Warning for debug mode
+	assert(channelIndex >= 0 && channelIndex < channelCount);
+	// Safety for release mode
+	if (channelIndex < 0) { channelIndex = 0; }
+	if (channelIndex > channelCount) { channelIndex = channelCount; }
+	ImageU8Impl result(this->width, this->height);
+	extractChannel(imageInternal::getSafeData<uint8_t>(result), result.stride, imageInternal::getSafeData<uint8_t>(*this), this->stride, channelCount, channelIndex, this->width, this->height);
+	return result;
+}
+
+static int32_t getSizeGroup(int32_t size) {
+	int32_t group = -1;
+	if (size == 1) {
+		group = 0; // Too small for 16-byte alignment!
+	} else if (size == 2) {
+		group = 1; // Too small for 16-byte alignment!
+	} else if (size == 4) {
+		group = 2;
+	} else if (size == 8) {
+		group = 3;
+	} else if (size == 16) {
+		group = 4;
+	} else if (size == 32) {
+		group = 5;
+	} else if (size == 64) {
+		group = 6;
+	} else if (size == 128) {
+		group = 7;
+	} else if (size == 256) {
+		group = 8;
+	} else if (size == 512) {
+		group = 9;
+	} else if (size == 1024) {
+		group = 10;
+	} else if (size == 2048) {
+		group = 11;
+	} else if (size == 4096) {
+		group = 12;
+	} else if (size == 8192) {
+		group = 13;
+	} else if (size == 16384) {
+		group = 14; // Not recommended to use!
+	} else if (size == 32768) {
+		group = 15; // Exceeding the address space of 32-bit pointers!
+	}
+	return group;
+}
+
+static int32_t getPyramidSize(int32_t width, int32_t height, int32_t pixelSize, int32_t levels) {
+	uint32_t result = 0;
+	uint32_t byteCount = width * height * pixelSize;
+	for (int32_t l = 0; l < levels; l++) {
+		result += byteCount; // Add image size to pyramid size
+		byteCount = byteCount >> 2; // Divide size by 4
+	}
+	return (int32_t)result;
+}
+
+static void downScaleByTwo(SafePointer<uint8_t> targetData, const SafePointer<uint8_t> sourceData, int32_t targetWidth, int32_t targetHeight, int32_t pixelSize, int32_t targetStride) {
+	int32_t sourceStride = targetStride * 2;
+	int32_t doubleSourceStride = sourceStride * 2;
+	SafePointer<uint8_t> targetRow = targetData;
+	const SafePointer<uint8_t> sourceRow = sourceData;
+	for (int32_t y = 0; y < targetHeight; y++) {
+		const SafePointer<uint8_t> sourcePixel = sourceRow;
+		SafePointer<uint8_t> targetPixel = targetRow;
+		for (int32_t x = 0; x < targetWidth; x++) {
+			// TODO: Use pariwise and vector average functions for fixed channel counts (SSE has _mm_avg_epu8 for vector average)
+			for (int32_t c = 0; c < pixelSize; c++) {
+				uint8_t value = (uint8_t)((
+				    (uint16_t)(*sourcePixel)
+				  + (uint16_t)(*(sourcePixel + pixelSize))
+				  + (uint16_t)(*(sourcePixel + sourceStride))
+				  + (uint16_t)(*(sourcePixel + sourceStride + pixelSize))) / 4);
+				*targetPixel = value;
+				targetPixel += 1;
+				sourcePixel += 1;
+			}
+			sourcePixel += pixelSize;
+		}
+		targetRow += targetStride;
+		sourceRow += doubleSourceStride;
+	}
+}
+
+TextureRgbaLayer::TextureRgbaLayer() {}
+
+TextureRgbaLayer::TextureRgbaLayer(const uint8_t *data, int32_t width, int32_t height) :
+  data(data),
+  strideShift(getSizeGroup(width) + 2),
+  widthMask(width - 1),
+  heightMask(height - 1),
+  width(width),
+  height(height),
+  subWidth(width * 256),
+  subHeight(height * 256),
+  halfPixelOffsetU(1.0f - (0.5f / width)),
+  halfPixelOffsetV(1.0f - (0.5f / height)) {}
+
+void ImageRgbaU8Impl::generatePyramid() {
+	if (!this->isTexture()) {
+		if (this->width < 4 || this->height < 4) {
+			printText("Cannot generate a pyramid from an image smaller than 4x4 pixels.\n");
+		} else if (this->width > 32768 || this->height > 32768) {
+			printText("Cannot generate a pyramid from an image larger than 32768x32768 pixels.\n");
+		} else if (getSizeGroup(this->width) == -1 || getSizeGroup(this->height) == -1) {
+			printText("Cannot generate a pyramid from image dimensions that are not powers of two.\n");
+		} else if (this->stride > this->width * pixelSize) {
+			printText("Cannot generate a pyramid from an image that contains padding.\n");
+		} else if (this->stride < this->width * pixelSize) {
+			printText("Cannot generate a pyramid from an image with corrupted stride.\n");
+		} else {
+			printText("Cannot generate a pyramid from an image that has not been initialized correctly.\n");
+		}
+	} else {
+		int32_t pixelSize = this->pixelSize;
+		int32_t mipmaps = std::min(std::max(getSizeGroup(std::min(this->width, this->height)) - 1, 1), MIP_BIN_COUNT);
+		if (!this->texture.hasMipBuffer()) {
+			this->texture.pyramidBuffer = Buffer::create(getPyramidSize(this->width / 2, this->height / 2, pixelSize, mipmaps - 1));
+		}
+		// Point to the image's original buffer in mip level 0
+		SafePointer<uint8_t> currentStart = imageInternal::getSafeData<uint8_t>(*this);
+		int32_t currentWidth = this->width;
+		int32_t currentHeight = this->height;
+		this->texture.mips[0] = TextureRgbaLayer(currentStart.getUnsafe(), currentWidth, currentHeight);
+		// Create smaller pyramid images in the extra buffer
+		SafePointer<uint8_t> previousStart = currentStart;
+		currentStart = this->texture.pyramidBuffer->getSafeData<uint8_t>("Pyramid generation target");
+		for (int32_t m = 1; m < mipmaps; m++) {
+			currentWidth /= 2;
+			currentHeight /= 2;
+			this->texture.mips[m] = TextureRgbaLayer(currentStart.getUnsafe(), currentWidth, currentHeight);
+			int32_t size = currentWidth * currentHeight * pixelSize;
+			// In-place downscaling by two.
+			downScaleByTwo(currentStart, previousStart, currentWidth, currentHeight, pixelSize, currentWidth * pixelSize);
+			previousStart = currentStart;
+			currentStart.increaseBytes(size);
+		}
+		// Fill unused mip levels with duplicates of the last mip level
+		for (int32_t m = mipmaps; m < MIP_BIN_COUNT; m++) {
+			this->texture.mips[m] = this->texture.mips[m - 1];
+		}
+	}
+}
+
+void ImageRgbaU8Impl::initializeRgbaImage() {
+	// If the image fills the criterias of a texture
+	if (getSizeGroup(this->width) >= 2
+	 && getSizeGroup(this->height) >= 2
+	 && this->stride == this->width * this->pixelSize) {
+		// Initialize each mip bin to show the original image
+		for (int32_t m = 0; m < MIP_BIN_COUNT; m++) {
+			this->texture.mips[m] = TextureRgbaLayer(imageInternal::getSafeData<uint8_t>(*this).getUnsafe(), this->width, this->height);
+		}
+	}
+};
+
+Color4xU8 ImageRgbaU8Impl::packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const {
+	return Color4xU8(this->packOrder.packRgba(red, green, blue, alpha));
+}
+
+Color4xU8 ImageRgbaU8Impl::packRgba(ColorRgbaI32 color) const {
+	return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, color.alpha));
+}
+
+ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba, const PackOrder& order) {
+	return ColorRgbaI32(
+	  getRed(rgba.packed, order),
+	  getGreen(rgba.packed, order),
+	  getBlue(rgba.packed, order),
+	  getAlpha(rgba.packed, order)
+	);
+}
+
+ColorRgbaI32 ImageRgbaU8Impl::unpackRgba(Color4xU8 rgba) const {
+	return unpackRgba(rgba, this->packOrder);
+}
+
+Color4xU8 ImageRgbaU8Impl::packRgb(uint8_t red, uint8_t green, uint8_t blue) const {
+	return Color4xU8(this->packOrder.packRgba(red, green, blue, 255));
+}
+
+Color4xU8 ImageRgbaU8Impl::packRgb(ColorRgbI32 color) const {
+	return Color4xU8(this->packOrder.packRgba(color.red, color.green, color.blue, 255));
+}
+
+ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb, const PackOrder& order) {
+	return ColorRgbI32(
+	  getRed(rgb.packed, order),
+	  getGreen(rgb.packed, order),
+	  getBlue(rgb.packed, order)
+	);
+}
+
+ColorRgbI32 ImageRgbaU8Impl::unpackRgb(Color4xU8 rgb) const {
+	return unpackRgb(rgb, this->packOrder);
+}
+

+ 98 - 0
Source/DFPSR/image/ImageRgbaU8.h

@@ -0,0 +1,98 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_RGBA_U8
+#define DFPSR_IMAGE_RGBA_U8
+
+#include "Color.h"
+#include "Image.h"
+#include "ImageU8.h"
+
+namespace dsr {
+
+// Pointing to the parent image using raw pointers for fast rendering. May not exceed the lifetime of the parent image!
+struct TextureRgbaLayer {
+	const uint8_t *data = 0;
+	int32_t strideShift = 0;
+	uint32_t widthMask = 0, heightMask = 0;
+	int32_t width = 0, height = 0;
+	float subWidth = 0.0f, subHeight = 0.0f; // TODO: Better names?
+	float halfPixelOffsetU = 0.0f, halfPixelOffsetV = 0.0f;
+	TextureRgbaLayer();
+	TextureRgbaLayer(const uint8_t *data, int32_t width, int32_t height);
+	// Can it be sampled as a texture
+	bool exists() const { return this->data != nullptr; }
+};
+
+#define MIP_BIN_COUNT 5
+
+// Pointing to the parent image using raw pointers for fast rendering. Not not separate from the image!
+struct TextureRgba {
+	std::shared_ptr<Buffer> pyramidBuffer; // Storing the smaller mip levels
+	TextureRgbaLayer mips[MIP_BIN_COUNT]; // Pointing to all mip levels including the original image
+	// Can it be sampled as a texture
+	bool exists() const { return this->mips[0].exists(); }
+	// Does it have a mip pyramid generated for smoother sampling
+	bool hasMipBuffer() const { return this->pyramidBuffer.get() != nullptr; }
+};
+
+class ImageRgbaU8Impl : public ImageImpl {
+public:
+	static const int32_t channelCount = 4;
+	static const int32_t pixelSize = channelCount;
+	PackOrder packOrder;
+	// Macro defined functions
+	IMAGE_DECLARATION(ImageRgbaU8Impl, 4, Color4xU8, uint8_t);
+	// Constructors
+	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset, PackOrder packOrder);
+	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment = 16);
+	// Native canvas constructor
+	ImageRgbaU8Impl(int32_t newWidth, int32_t newHeight, PackOrderIndex packOrderIndex);
+	// Fast reading
+	TextureRgba texture; // The texture view
+	void initializeRgbaImage(); // Points to level 0 from all bins to allow rendering
+	void generatePyramid(); // Fills the following bins with smaller images
+	bool isTexture() const;
+	static bool isTexture(const ImageRgbaU8Impl* image); // Null cannot be sampled as a texture
+public:
+	// Conversion to monochrome by extracting a channel
+	ImageU8Impl getChannel(int32_t channelIndex) const;
+	// Clone the image without padding or return the same instance if there is no padding
+	// TODO: Return the unaligned image type, which is incompatible with SIMD operations
+	ImageRgbaU8Impl getWithoutPadding() const;
+	// Packs/unpacks the channels of an RGBA color in an unsigned 32-bit integer
+	Color4xU8 packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const;
+	Color4xU8 packRgba(ColorRgbaI32 rgba) const;
+	static ColorRgbaI32 unpackRgba(Color4xU8 rgba, const PackOrder& order);
+	ColorRgbaI32 unpackRgba(Color4xU8 rgba) const;
+	// Packs/unpacks the channels of an RGB color in an unsigned 32-bit integer
+	Color4xU8 packRgb(uint8_t red, uint8_t green, uint8_t blue) const;
+	Color4xU8 packRgb(ColorRgbI32 rgb) const;
+	static ColorRgbI32 unpackRgb(Color4xU8 rgb, const PackOrder& order);
+	ColorRgbI32 unpackRgb(Color4xU8 rgb) const;
+};
+
+}
+
+#endif
+

+ 39 - 0
Source/DFPSR/image/ImageU16.cpp

@@ -0,0 +1,39 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "ImageU16.h"
+#include "internal/imageInternal.h"
+#include "internal/imageTemplate.h"
+
+using namespace dsr;
+
+ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset) :
+  ImageImpl(newWidth, newHeight, newStride, sizeof(uint16_t), buffer, startOffset) {
+	assert(buffer->size - startOffset >= imageInternal::getUsedBytes(this));
+}
+
+ImageU16Impl::ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint16_t), alignment), sizeof(uint16_t)) {
+}
+
+IMAGE_DEFINITION(ImageU16Impl, 1, uint16_t, uint16_t);

+ 47 - 0
Source/DFPSR/image/ImageU16.h

@@ -0,0 +1,47 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_U16
+#define DFPSR_IMAGE_U16
+
+#include "Image.h"
+
+namespace dsr {
+
+// Warning! Reading or writing a 16-bit integer as a sequence of 8-bit integers may depend on endianness.
+class ImageU16Impl : public ImageImpl {
+public:
+	static const int32_t channelCount = 1;
+	static const int32_t pixelSize = 2;
+	// Inherit constructors
+	using ImageImpl::ImageImpl;
+	ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset);
+	ImageU16Impl(int32_t newWidth, int32_t newHeight, int32_t alignment = 16);
+	// Macro defined functions
+	IMAGE_DECLARATION(ImageU16Impl, 1, uint16_t, uint16_t);
+};
+
+}
+
+#endif
+

+ 39 - 0
Source/DFPSR/image/ImageU8.cpp

@@ -0,0 +1,39 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "ImageU8.h"
+#include "internal/imageInternal.h"
+#include "internal/imageTemplate.h"
+
+using namespace dsr;
+
+ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset) :
+  ImageImpl(newWidth, newHeight, newStride, sizeof(uint8_t), buffer, startOffset) {
+	assert(buffer->size - startOffset >= imageInternal::getUsedBytes(this));
+}
+
+ImageU8Impl::ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment) :
+  ImageImpl(newWidth, newHeight, roundUp(newWidth * sizeof(uint8_t), alignment), sizeof(uint8_t)) {
+}
+
+IMAGE_DEFINITION(ImageU8Impl, 1, uint8_t, uint8_t);

+ 47 - 0
Source/DFPSR/image/ImageU8.h

@@ -0,0 +1,47 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_U8
+#define DFPSR_IMAGE_U8
+
+#include "Image.h"
+
+namespace dsr {
+
+class ImageU8Impl : public ImageImpl {
+public:
+	static const int32_t channelCount = 1;
+	static const int32_t pixelSize = channelCount;
+	// Inherit constructors
+	using ImageImpl::ImageImpl;
+	// Constructors
+	ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t newStride, std::shared_ptr<Buffer> buffer, intptr_t startOffset);
+	ImageU8Impl(int32_t newWidth, int32_t newHeight, int32_t alignment = 16);
+	// Macro defined functions
+	IMAGE_DECLARATION(ImageU8Impl, 1, uint8_t, uint8_t);
+};
+
+}
+
+#endif
+

+ 213 - 0
Source/DFPSR/image/PackOrder.h

@@ -0,0 +1,213 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_PACK_ORDER
+#define DFPSR_IMAGE_PACK_ORDER
+
+#include <stdint.h>
+#include "../api/types.h"
+#include "../base/simd.h"
+#include "../base/endian.h"
+#include "../base/text.h"
+
+namespace dsr {
+
+// See types.h for the definition of PackOrderIndex
+
+struct PackOrder {
+public:
+	// The index that it was constructed from
+	PackOrderIndex packOrderIndex;
+	// Byte array indices for each channel
+	// Indices are the locations of each color, not which color that holds each location
+	//   Example:
+	//     The indices for ARGB are (1, 2, 3, 0)
+	//     Because red is second at byte[1], green is third byte[2], blue is last in byte[3] and alpha is first in byte[0]
+	int redIndex, greenIndex, blueIndex, alphaIndex;
+	// Pre-multipled bit offsets
+	int redOffset, greenOffset, blueOffset, alphaOffset;
+	uint32_t redMask, greenMask, blueMask, alphaMask;
+private:
+	PackOrder(PackOrderIndex packOrderIndex, int redIndex, int greenIndex, int blueIndex, int alphaIndex) :
+	  packOrderIndex(packOrderIndex),
+	  redIndex(redIndex), greenIndex(greenIndex), blueIndex(blueIndex), alphaIndex(alphaIndex),
+	  redOffset(redIndex * 8), greenOffset(greenIndex * 8), blueOffset(blueIndex * 8), alphaOffset(alphaIndex * 8),
+	  redMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->redOffset)),
+	  greenMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->greenOffset)),
+	  blueMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->blueOffset)),
+	  alphaMask(ENDIAN_POS_ADDR(ENDIAN32_BYTE_0, this->alphaOffset)) {}
+public:
+	// Constructors
+	PackOrder() :
+	  packOrderIndex(PackOrderIndex::RGBA),
+	  redIndex(0), greenIndex(1), blueIndex(2), alphaIndex(3),
+	  redOffset(0), greenOffset(8), blueOffset(16), alphaOffset(24),
+	  redMask(ENDIAN32_BYTE_0), greenMask(ENDIAN32_BYTE_1), blueMask(ENDIAN32_BYTE_2), alphaMask(ENDIAN32_BYTE_3) {}
+	static PackOrder getPackOrder(PackOrderIndex index) {
+		if (index == PackOrderIndex::RGBA) {
+			return PackOrder(index, 0, 1, 2, 3);
+		} else if (index == PackOrderIndex::BGRA) {
+			return PackOrder(index, 2, 1, 0, 3);
+		} else if (index == PackOrderIndex::ARGB) {
+			return PackOrder(index, 1, 2, 3, 0);
+		} else if (index == PackOrderIndex::ABGR) {
+			return PackOrder(index, 3, 2, 1, 0);
+		} else {
+			printText("Warning! Unknown packing order index ", index, ". Falling back on RGBA.");
+			return PackOrder(index, 0, 1, 2, 3);
+		}
+	}
+	uint32_t packRgba(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha) const {
+		uint32_t result;
+		uint8_t *channels = (uint8_t*)(&result);
+		channels[this->redIndex] = red;
+		channels[this->greenIndex] = green;
+		channels[this->blueIndex] = blue;
+		channels[this->alphaIndex] = alpha;
+		return result;
+	}
+};
+
+inline bool operator==(const PackOrder &left, const PackOrder &right) {
+	return left.packOrderIndex == right.packOrderIndex;
+}
+
+// Each input 32-bit element is from 0 to 255. Otherwise, the remainder will leak to other elements.
+inline static U32x4 packBytes(const U32x4 &s0, const U32x4 &s1, const U32x4 &s2) {
+	return s0 | ENDIAN_POS_ADDR(s1, 8) | ENDIAN_POS_ADDR(s2, 16);
+}
+// Using a specified packing order
+inline U32x4 packBytes(const U32x4 &s0, const U32x4 &s1, const U32x4 &s2, const PackOrder &order) {
+	return ENDIAN_POS_ADDR(s0, order.redOffset)
+	     | ENDIAN_POS_ADDR(s1, order.greenOffset)
+	     | ENDIAN_POS_ADDR(s2, order.blueOffset);
+}
+
+// Each input 32-bit element is from 0 to 255. Otherwise, the remainder will leak to other elements.
+inline static U32x4 packBytes(const U32x4 &s0, const U32x4 &s1, const U32x4 &s2, const U32x4 &s3) {
+	return s0 | ENDIAN_POS_ADDR(s1, 8) | ENDIAN_POS_ADDR(s2, 16) | ENDIAN_POS_ADDR(s3, 24);
+}
+// Using a specified packing order
+inline U32x4 packBytes(const U32x4 &s0, const U32x4 &s1, const U32x4 &s2, const U32x4 &s3, const PackOrder &order) {
+	return ENDIAN_POS_ADDR(s0, order.redOffset)
+	     | ENDIAN_POS_ADDR(s1, order.greenOffset)
+	     | ENDIAN_POS_ADDR(s2, order.blueOffset)
+	     | ENDIAN_POS_ADDR(s3, order.alphaOffset);
+}
+
+// Pack separate floats into saturated bytes
+inline static U32x4 floatToSaturatedByte(const F32x4 &s0, const F32x4 &s1, const F32x4 &s2, const F32x4 &s3) {
+	return packBytes(
+	  truncateToU32(s0.clamp(0.1f, 255.1f)),
+	  truncateToU32(s1.clamp(0.1f, 255.1f)),
+	  truncateToU32(s2.clamp(0.1f, 255.1f)),
+	  truncateToU32(s3.clamp(0.1f, 255.1f))
+	);
+}
+// Using a specified packing order
+inline U32x4 floatToSaturatedByte(const F32x4 &s0, const F32x4 &s1, const F32x4 &s2, const F32x4 &s3, const PackOrder &order) {
+	return packBytes(
+	  truncateToU32(s0.clamp(0.1f, 255.1f)),
+	  truncateToU32(s1.clamp(0.1f, 255.1f)),
+	  truncateToU32(s2.clamp(0.1f, 255.1f)),
+	  truncateToU32(s3.clamp(0.1f, 255.1f)),
+	  order
+	);
+}
+
+inline uint32_t getRed(uint32_t color) {
+	return color & ENDIAN32_BYTE_0;
+}
+inline uint32_t getRed(uint32_t color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.redMask, order.redOffset);
+}
+inline uint32_t getGreen(uint32_t color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_1, 8);
+}
+inline uint32_t getGreen(uint32_t color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.greenMask, order.greenOffset);
+}
+inline uint32_t getBlue(uint32_t color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_2, 16);
+}
+inline uint32_t getBlue(uint32_t color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.blueMask, order.blueOffset);
+}
+inline uint32_t getAlpha(uint32_t color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_3, 24);
+}
+inline uint32_t getAlpha(uint32_t color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.alphaMask, order.alphaOffset);
+}
+
+inline U32x4 getRed(const U32x4 &color) {
+	return color & ENDIAN32_BYTE_0;
+}
+inline U32x4 getRed(const U32x4 &color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.redMask, order.redOffset);
+}
+inline U32x4 getGreen(const U32x4 &color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_1, 8);
+}
+inline U32x4 getGreen(const U32x4 &color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.greenMask, order.greenOffset);
+}
+inline U32x4 getBlue(const U32x4 &color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_2, 16);
+}
+inline U32x4 getBlue(const U32x4 &color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.blueMask, order.blueOffset);
+}
+inline U32x4 getAlpha(const U32x4 &color) {
+	return ENDIAN_NEG_ADDR(color & ENDIAN32_BYTE_3, 24);
+}
+inline U32x4 getAlpha(const U32x4 &color, const PackOrder &order) {
+	return ENDIAN_NEG_ADDR(color & order.alphaMask, order.alphaOffset);
+}
+
+inline String getName(PackOrderIndex index) {
+	if (index == PackOrderIndex::RGBA) {
+		return U"RGBA";
+	} else if (index == PackOrderIndex::BGRA) {
+		return U"BGRA";
+	} else if (index == PackOrderIndex::ARGB) {
+		return U"ARGB";
+	} else if (index == PackOrderIndex::ABGR) {
+		return U"ABGR";
+	} else {
+		return U"?";
+	}
+}
+inline String& string_toStreamIndented(String& target, const PackOrderIndex& source, const ReadableString& indentation) {
+	string_append(target, indentation, getName(source));
+	return target;
+}
+inline String& string_toStreamIndented(String& target, const PackOrder& source, const ReadableString& indentation) {
+	string_append(target, indentation, getName(source.packOrderIndex));
+	return target;
+}
+
+}
+
+#endif
+

+ 1226 - 0
Source/DFPSR/image/draw.cpp

@@ -0,0 +1,1226 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "../base/simdExtra.h"
+#include "draw.h"
+#include "internal/imageInternal.h"
+#include "../math/scalar.h"
+#include <limits>
+
+using namespace dsr;
+
+// -------------------------------- Drawing shapes --------------------------------
+
+template <typename COLOR_TYPE>
+static inline void drawSolidRectangleAssign(ImageImpl &target, int left, int top, int right, int bottom, COLOR_TYPE color) {
+	int leftBound = std::max(0, left);
+	int topBound = std::max(0, top);
+	int rightBound = std::min(right, target.width);
+	int bottomBound = std::min(bottom, target.height);
+	int stride = target.stride;
+	SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
+	rowData += leftBound;
+	for (int y = topBound; y < bottomBound; y++) {
+		SafePointer<COLOR_TYPE> pixelData = rowData;
+		for (int x = leftBound; x < rightBound; x++) {
+			pixelData.get() = color;
+			pixelData += 1;
+		}
+		rowData.increaseBytes(stride);
+	}
+}
+
+template <typename COLOR_TYPE>
+static inline void drawSolidRectangleMemset(ImageImpl &target, int left, int top, int right, int bottom, uint8_t uniformByte) {
+	int leftBound = std::max(0, left);
+	int topBound = std::max(0, top);
+	int rightBound = std::min(right, target.width);
+	int bottomBound = std::min(bottom, target.height);
+	if (rightBound > leftBound && bottomBound > topBound) {
+		int stride = target.stride;
+		SafePointer<COLOR_TYPE> rowData = imageInternal::getSafeData<COLOR_TYPE>(target, topBound);
+		rowData += leftBound;
+		int filledWidth = rightBound - leftBound;
+		int rowSize = filledWidth * sizeof(COLOR_TYPE);
+		int rowCount = bottomBound - topBound;
+		if (!target.isSubImage && filledWidth == target.width) {
+			// Write over any padding for parent images owning the whole buffer.
+			// Including parent images with sub-images using the same data
+			//   because no child image may display the parent-image's padding bytes.
+			safeMemorySet(rowData, uniformByte, (stride * (rowCount - 1)) + rowSize);
+		} else if (rowSize == stride) {
+			// When the filled row stretches all the way from left to right in the main allocation
+			//   there's no unseen pixels being overwritten in other images sharing the buffer.
+			// This case handles sub-images that uses the full width of
+			//   the parent image which doesn't have any padding.
+			safeMemorySet(rowData, uniformByte, rowSize * rowCount);
+		} else {
+			// Fall back on using one memset operation per row.
+			// This case is for sub-images that must preserve interleaved pixel rows belonging
+			//   to other images that aren't visible and therefore not owned by this image.
+			for (int y = topBound; y < bottomBound; y++) {
+				safeMemorySet(rowData, uniformByte, rowSize);
+				rowData.increaseBytes(stride);
+			}
+		}
+	}
+}
+
+void dsr::imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color) {
+	if (color < 0) { color = 0; }
+	if (color > 255) { color = 255; }
+	drawSolidRectangleMemset<uint8_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
+}
+
+void dsr::imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color) {
+	if (color < 0) { color = 0; }
+	if (color > 65535) { color = 65535; }
+	uint16_t uColor = color;
+	if (isUniformByteU16(uColor)) {
+		drawSolidRectangleMemset<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
+	} else {
+		drawSolidRectangleAssign<uint16_t>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), uColor);
+	}
+}
+
+void dsr::imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color) {
+	if (color == 0.0f) {
+		drawSolidRectangleMemset<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), 0);
+	} else {
+		drawSolidRectangleAssign<float>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), color);
+	}
+}
+
+void dsr::imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color) {
+	Color4xU8 packedColor = image.packRgba(color.saturate());
+	if (packedColor.isUniformByte()) {
+		drawSolidRectangleMemset<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor.channels[0]);
+	} else {
+		drawSolidRectangleAssign<Color4xU8>(image, bound.left(), bound.top(), bound.right(), bound.bottom(), packedColor);
+	}
+}
+
+template <typename IMAGE_TYPE, typename COLOR_TYPE>
+inline void drawLineSuper(IMAGE_TYPE &target, int x1, int y1, int x2, int y2, COLOR_TYPE color) {
+	if (y1 == y2) {
+		// Sideways
+		int left = std::min(x1, x2);
+		int right = std::max(x1, x2);
+		for (int x = left; x <= right; x++) {
+			IMAGE_TYPE::writePixel(target, x, y1, color);
+		}
+	} else if (x1 == x2) {
+		// Down
+		int top = std::min(y1, y2);
+		int bottom = std::max(y1, y2);
+		for (int y = top; y <= bottom; y++) {
+			IMAGE_TYPE::writePixel(target, x1, y, color);
+		}
+	} else {
+		if (std::abs(y2 - y1) >= std::abs(x2 - x1)) {
+			if (y2 < y1) {
+				swap(x1, x2);
+				swap(y1, y2);
+			}
+			assert(y2 > y1);
+			if (x2 > x1) {
+				// Down right
+				int x = x1;
+				int y = y1;
+				int tilt = (x2 - x1) * 2;
+				int maxError = y2 - y1;
+				int error = 0;
+				while (y <= y2) {
+					IMAGE_TYPE::writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						x++;
+						error -= maxError * 2;
+					}
+					y++;
+				}
+			} else {
+				// Down left
+				int x = x1;
+				int y = y1;
+				int tilt = (x1 - x2) * 2;
+				int maxError = y2 - y1;
+				int error = 0;
+				while (y <= y2) {
+					IMAGE_TYPE::writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						x--;
+						error -= maxError * 2;
+					}
+					y++;
+				}
+			}
+		} else {
+			if (x2 < x1) {
+				swap(x1, x2);
+				swap(y1, y2);
+			}
+			assert(x2 > x1);
+			if (y2 > y1) {
+				// Down right
+				int x = x1;
+				int y = y1;
+				int tilt = (y2 - y1) * 2;
+				int maxError = x2 - x1;
+				int error = 0;
+				while (x <= x2) {
+					IMAGE_TYPE::writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						y++;
+						error -= maxError * 2;
+					}
+					x++;
+				}
+			} else {
+				// Up right
+				int x = x1;
+				int y = y1;
+				int tilt = (y1 - y2) * 2;
+				int maxError = x2 - x1;
+				int error = 0;
+				while (x <= x2) {
+					IMAGE_TYPE::writePixel(target, x, y, color);
+					error += tilt;
+					if (error >= maxError) {
+						y--;
+						error -= maxError * 2;
+					}
+					x++;
+				}
+			}
+		}
+	}
+}
+
+void dsr::imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
+	if (color < 0) { color = 0; }
+	if (color > 255) { color = 255; }
+	drawLineSuper<ImageU8Impl, uint8_t>(image, x1, y1, x2, y2, color);
+}
+
+void dsr::imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color) {
+	if (color < 0) { color = 0; }
+	if (color > 65535) { color = 65535; }
+	drawLineSuper<ImageU16Impl, uint16_t>(image, x1, y1, x2, y2, color);
+}
+
+void dsr::imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color) {
+	drawLineSuper<ImageF32Impl, float>(image, x1, y1, x2, y2, color);
+}
+
+void dsr::imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color) {
+	drawLineSuper<ImageRgbaU8Impl, Color4xU8>(image, x1, y1, x2, y2, image.packRgba(color.saturate()));
+}
+
+// -------------------------------- Drawing images --------------------------------
+
+// A packet with the dimensions of an image
+struct ImageDimensions {
+	// width is the number of used pixels on each row.
+	// height is the number of rows.
+	// stride is the byte offset from one row to another including any padding.
+	// pixelSize is the byte offset from one pixel to another from left to right.
+	int32_t width, height, stride, pixelSize;
+	ImageDimensions() : width(0), height(0), stride(0), pixelSize(0) {}
+	ImageDimensions(const ImageImpl& image) :
+	  width(image.width), height(image.height), stride(image.stride), pixelSize(image.pixelSize) {}
+};
+
+struct ImageWriter : public ImageDimensions {
+	uint8_t *data;
+	ImageWriter(const ImageDimensions &dimensions, uint8_t *data) :
+	  ImageDimensions(dimensions), data(data) {}
+};
+
+struct ImageReader : public ImageDimensions {
+	const uint8_t *data;
+	ImageReader(const ImageDimensions &dimensions, const uint8_t *data) :
+	  ImageDimensions(dimensions), data(data) {}
+};
+
+static ImageWriter getWriter(ImageImpl &image) {
+	return ImageWriter(ImageDimensions(image), image.buffer->getUnsafeData() + image.startOffset);
+}
+
+static ImageReader getReader(const ImageImpl &image) {
+	return ImageReader(ImageDimensions(image), image.buffer->getUnsafeData() + image.startOffset);
+}
+
+static ImageImpl getGenericSubImage(const ImageImpl &image, int32_t left, int32_t top, int32_t width, int32_t height) {
+	assert(left >= 0 && top >= 0 && width >= 1 && height >= 1 && left + width <= image.width && top + height <= image.height);
+	intptr_t newOffset = image.startOffset + (left * image.pixelSize) + (top * image.stride);
+	return ImageImpl(width, height, image.stride, image.pixelSize, image.buffer, newOffset);
+}
+
+struct ImageIntersection {
+	ImageWriter subTarget;
+	ImageReader subSource;
+	ImageIntersection(const ImageWriter &subTarget, const ImageReader &subSource) :
+	  subTarget(subTarget), subSource(subSource) {}
+	static bool canCreate(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
+		int32_t targetRegionRight = left + source.width;
+		int32_t targetRegionBottom = top + source.height;
+		return left < target.width && top < target.height && targetRegionRight > 0 && targetRegionBottom > 0;
+	}
+	// Only call if canCreate passed with the same arguments
+	static ImageIntersection create(ImageImpl &target, const ImageImpl &source, int32_t left, int32_t top) {
+		int32_t targetRegionRight = left + source.width;
+		int32_t targetRegionBottom = top + source.height;
+		assert(ImageIntersection::canCreate(target, source, left, top));
+		// Check if the source has to be clipped
+		if (left < 0 || top < 0 || targetRegionRight > target.width || targetRegionBottom > target.height) {
+			int32_t clipLeft = std::max(0, -left);
+			int32_t clipTop = std::max(0, -top);
+			int32_t clipRight = std::max(0, targetRegionRight - target.width);
+			int32_t clipBottom = std::max(0, targetRegionBottom - target.height);
+			int32_t newWidth = source.width - (clipLeft + clipRight);
+			int32_t newHeight = source.height - (clipTop + clipBottom);
+			assert(newWidth > 0 && newHeight > 0);
+			// Partial drawing
+			ImageImpl subTarget = getGenericSubImage(target, left + clipLeft, top + clipTop, newWidth, newHeight);
+			ImageImpl subSource = getGenericSubImage(source, clipLeft, clipTop, newWidth, newHeight);
+			return ImageIntersection(getWriter(subTarget), getReader(subSource));
+		} else {
+			// Full drawing
+			ImageImpl subTarget = getGenericSubImage(target, left, top, source.width, source.height);
+			return ImageIntersection(getWriter(subTarget), getReader(source));
+		}
+	}
+};
+
+#define ITERATE_ROWS(WRITER, READER, OPERATION) \
+{ \
+	uint8_t *targetRow = WRITER.data; \
+	const uint8_t *sourceRow = READER.data; \
+	for (int32_t y = 0; y < READER.height; y++) { \
+		OPERATION; \
+		targetRow += WRITER.stride; \
+		sourceRow += READER.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS(WRITER, READER, OPERATION) \
+{ \
+	uint8_t *targetRow = WRITER.data; \
+	const uint8_t *sourceRow = READER.data; \
+	for (int32_t y = 0; y < READER.height; y++) { \
+		uint8_t *targetPixel = targetRow; \
+		const uint8_t *sourcePixel = sourceRow; \
+		for (int32_t x = 0; x < READER.width; x++) { \
+			{OPERATION;} \
+			targetPixel += WRITER.pixelSize; \
+			sourcePixel += READER.pixelSize; \
+		} \
+		targetRow += WRITER.stride; \
+		sourceRow += READER.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS_2(WRITER1, READER1, WRITER2, READER2, OPERATION) \
+{ \
+	uint8_t *targetRow1 = WRITER1.data; \
+	uint8_t *targetRow2 = WRITER2.data; \
+	const uint8_t *sourceRow1 = READER1.data; \
+	const uint8_t *sourceRow2 = READER2.data; \
+	int minWidth = std::min(READER1.width, READER2.width); \
+	int minHeight = std::min(READER1.height, READER2.height); \
+	for (int32_t y = 0; y < minHeight; y++) { \
+		uint8_t *targetPixel1 = targetRow1; \
+		uint8_t *targetPixel2 = targetRow2; \
+		const uint8_t *sourcePixel1 = sourceRow1; \
+		const uint8_t *sourcePixel2 = sourceRow2; \
+		for (int32_t x = 0; x < minWidth; x++) { \
+			{OPERATION;} \
+			targetPixel1 += WRITER1.pixelSize; \
+			targetPixel2 += WRITER2.pixelSize; \
+			sourcePixel1 += READER1.pixelSize; \
+			sourcePixel2 += READER2.pixelSize; \
+		} \
+		targetRow1 += WRITER1.stride; \
+		targetRow2 += WRITER2.stride; \
+		sourceRow1 += READER1.stride; \
+		sourceRow2 += READER2.stride; \
+	} \
+}
+
+#define ITERATE_PIXELS_3(WRITER1, READER1, WRITER2, READER2, WRITER3, READER3, OPERATION) \
+{ \
+	uint8_t *targetRow1 = WRITER1.data; \
+	uint8_t *targetRow2 = WRITER2.data; \
+	uint8_t *targetRow3 = WRITER3.data; \
+	const uint8_t *sourceRow1 = READER1.data; \
+	const uint8_t *sourceRow2 = READER2.data; \
+	const uint8_t *sourceRow3 = READER3.data; \
+	int minWidth = std::min(std::min(READER1.width, READER2.width), READER3.width); \
+	int minHeight = std::min(std::min(READER1.height, READER2.height), READER3.height); \
+	for (int32_t y = 0; y < minHeight; y++) { \
+		uint8_t *targetPixel1 = targetRow1; \
+		uint8_t *targetPixel2 = targetRow2; \
+		uint8_t *targetPixel3 = targetRow3; \
+		const uint8_t *sourcePixel1 = sourceRow1; \
+		const uint8_t *sourcePixel2 = sourceRow2; \
+		const uint8_t *sourcePixel3 = sourceRow3; \
+		for (int32_t x = 0; x < minWidth; x++) { \
+			{OPERATION;} \
+			targetPixel1 += WRITER1.pixelSize; \
+			targetPixel2 += WRITER2.pixelSize; \
+			targetPixel3 += WRITER3.pixelSize; \
+			sourcePixel1 += READER1.pixelSize; \
+			sourcePixel2 += READER2.pixelSize; \
+			sourcePixel3 += READER3.pixelSize; \
+		} \
+		targetRow1 += WRITER1.stride; \
+		targetRow2 += WRITER2.stride; \
+		targetRow3 += WRITER3.stride; \
+		sourceRow1 += READER1.stride; \
+		sourceRow2 += READER2.stride; \
+		sourceRow3 += READER3.stride; \
+	} \
+}
+
+static inline int saturateFloat(float value) {
+	if (!(value >= 0.0f)) {
+		// NaN or negative
+		return 0;
+	} else if (value > 255.0f) {
+		// Too large
+		return 255;
+	} else {
+		// Round to closest
+		return (int)(value + 0.5f);
+	}
+}
+
+// Copy data from one image region to another of the same size.
+//   Packing order is reinterpreted without conversion.
+static void copyImageData(ImageWriter writer, ImageReader reader) {
+	assert(writer.width == reader.width && writer.height == reader.height && writer.pixelSize == reader.pixelSize);
+	ITERATE_ROWS(writer, reader, std::memcpy(targetRow, sourceRow, reader.width * reader.pixelSize));
+}
+
+void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		if (target.packOrder == source.packOrder) {
+			// No conversion needed
+			copyImageData(intersection.subTarget, intersection.subSource);
+		} else {
+			// Read and repack to convert between different color formats
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
+				targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
+				targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
+				targetPixel[target.packOrder.alphaIndex] = sourcePixel[source.packOrder.alphaIndex];
+			);
+		}
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		copyImageData(intersection.subTarget, intersection.subSource);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			uint8_t luma = *sourcePixel;
+			targetPixel[target.packOrder.redIndex]   = luma;
+			targetPixel[target.packOrder.greenIndex] = luma;
+			targetPixel[target.packOrder.blueIndex]  = luma;
+			targetPixel[target.packOrder.alphaIndex] = 255;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			targetPixel[target.packOrder.redIndex]   = luma;
+			targetPixel[target.packOrder.greenIndex] = luma;
+			targetPixel[target.packOrder.blueIndex]  = luma;
+			targetPixel[target.packOrder.alphaIndex] = 255;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int luma = saturateFloat(*((const float*)sourcePixel));
+			targetPixel[target.packOrder.redIndex]   = luma;
+			targetPixel[target.packOrder.greenIndex] = luma;
+			targetPixel[target.packOrder.blueIndex]  = luma;
+			targetPixel[target.packOrder.alphaIndex] = 255;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*targetPixel = saturateFloat(*((const float*)sourcePixel));
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			*targetPixel = luma;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*((uint16_t*)targetPixel) = *sourcePixel;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int luma = *((const float*)sourcePixel);
+			if (luma < 0) { luma = 0; }
+			if (luma > 65535) { luma = 65535; }
+			*((uint16_t*)targetPixel) = *sourcePixel;
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			*((float*)targetPixel) = (float)(*sourcePixel);
+		);
+	}
+}
+void dsr::imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			int luma = *((const uint16_t*)sourcePixel);
+			if (luma > 255) { luma = 255; }
+			*((float*)targetPixel) = (float)luma;
+		);
+	}
+}
+
+void dsr::imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			// Optimized for anti-aliasing, where most alpha values are 0 or 255
+			uint32_t sourceRatio = sourcePixel[source.packOrder.alphaIndex];
+			if (sourceRatio > 0) {
+				if (sourceRatio == 255) {
+					targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
+					targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
+					targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
+					targetPixel[target.packOrder.alphaIndex] = 255;
+				} else {
+					uint32_t targetRatio = 255 - sourceRatio;
+					targetPixel[target.packOrder.redIndex]   = mulByte_8(targetPixel[target.packOrder.redIndex], targetRatio) + mulByte_8(sourcePixel[source.packOrder.redIndex], sourceRatio);
+					targetPixel[target.packOrder.greenIndex] = mulByte_8(targetPixel[target.packOrder.greenIndex], targetRatio) + mulByte_8(sourcePixel[source.packOrder.greenIndex], sourceRatio);
+					targetPixel[target.packOrder.blueIndex]  = mulByte_8(targetPixel[target.packOrder.blueIndex], targetRatio) + mulByte_8(sourcePixel[source.packOrder.blueIndex], sourceRatio);
+					targetPixel[target.packOrder.alphaIndex] = mulByte_8(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
+				}
+			}
+		);
+	}
+}
+
+void dsr::imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t sourceAlphaOffset) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		if (sourceAlphaOffset == 0) {
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
+				if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
+					targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
+					targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
+					targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
+					targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
+				}
+			);
+		} else {
+			ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+				int sourceAlpha = sourcePixel[source.packOrder.alphaIndex];
+				if (sourceAlpha > 0) {
+					sourceAlpha += sourceAlphaOffset;
+					if (sourceAlpha > targetPixel[target.packOrder.alphaIndex]) {
+						targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
+						targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
+						targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
+						if (sourceAlpha < 0) { sourceAlpha = 0; }
+						if (sourceAlpha > 255) { sourceAlpha = 255; }
+						targetPixel[target.packOrder.alphaIndex] = sourceAlpha;
+					}
+				}
+			);
+		}
+	}
+}
+
+void dsr::imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left, int32_t top, int32_t treshold) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			if (sourcePixel[source.packOrder.alphaIndex] > treshold) {
+				targetPixel[target.packOrder.redIndex]   = sourcePixel[source.packOrder.redIndex];
+				targetPixel[target.packOrder.greenIndex] = sourcePixel[source.packOrder.greenIndex];
+				targetPixel[target.packOrder.blueIndex]  = sourcePixel[source.packOrder.blueIndex];
+				targetPixel[target.packOrder.alphaIndex] = 255;
+			}
+		);
+	}
+}
+
+template <bool FULL_ALPHA>
+static void drawSilhouette_template(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (ImageIntersection::canCreate(target, source, left, top)) {
+		ImageIntersection intersection = ImageIntersection::create(target, source, left, top);
+		// Read and repack to convert between different color formats
+		ITERATE_PIXELS(intersection.subTarget, intersection.subSource,
+			uint32_t sourceRatio;
+			if (FULL_ALPHA) {
+				sourceRatio = *sourcePixel;
+			} else {
+				sourceRatio = mulByte_8(*sourcePixel, color.alpha);
+			}
+			if (sourceRatio > 0) {
+				if (sourceRatio == 255) {
+					targetPixel[target.packOrder.redIndex]   = color.red;
+					targetPixel[target.packOrder.greenIndex] = color.green;
+					targetPixel[target.packOrder.blueIndex]  = color.blue;
+					targetPixel[target.packOrder.alphaIndex] = 255;
+				} else {
+					uint32_t targetRatio = 255 - sourceRatio;
+					targetPixel[target.packOrder.redIndex]   = mulByte_8(targetPixel[target.packOrder.redIndex], targetRatio) + mulByte_8(color.red, sourceRatio);
+					targetPixel[target.packOrder.greenIndex] = mulByte_8(targetPixel[target.packOrder.greenIndex], targetRatio) + mulByte_8(color.green, sourceRatio);
+					targetPixel[target.packOrder.blueIndex]  = mulByte_8(targetPixel[target.packOrder.blueIndex], targetRatio) + mulByte_8(color.blue, sourceRatio);
+					targetPixel[target.packOrder.alphaIndex] = mulByte_8(targetPixel[target.packOrder.alphaIndex], targetRatio) + sourceRatio;
+				}
+			}
+		);
+	}
+}
+void dsr::imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left, int32_t top) {
+	if (color.alpha > 0) {
+		ColorRgbaI32 saturatedColor = color.saturate();
+		if (color.alpha < 255) {
+			drawSilhouette_template<false>(target, source, saturatedColor, left, top);
+		} else {
+			drawSilhouette_template<true>(target, source, saturatedColor, left, top);
+		}
+	}
+}
+
+void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
+			int32_t sourceHeight = *((const uint16_t*)sourcePixel);
+			if (sourceHeight > 0) {
+				sourceHeight += sourceHeightOffset;
+				int32_t targetHeight = *((uint16_t*)targetPixel);
+				if (sourceHeight < 0) { sourceHeight = 0; }
+				if (sourceHeight > 65535) { sourceHeight = 65535; }
+				if (sourceHeight > 0 && sourceHeight > targetHeight) {
+					*((uint16_t*)targetPixel) = sourceHeight;
+				}
+			}
+		);
+	}
+}
+void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	assert(sourceA.width == sourceHeight.width);
+	assert(sourceA.height == sourceHeight.height);
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
+			int32_t sourceHeight = *((const uint16_t*)sourcePixel1);
+			if (sourceHeight > 0) {
+				sourceHeight += sourceHeightOffset;
+				int32_t targetHeight = *((uint16_t*)targetPixel1);
+				if (sourceHeight < 0) { sourceHeight = 0; }
+				if (sourceHeight > 65535) { sourceHeight = 65535; }
+				if (sourceHeight > targetHeight) {
+					*((uint16_t*)targetPixel1) = sourceHeight;
+					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
+					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
+					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
+					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+void dsr::imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, int32_t sourceHeightOffset) {
+	assert(sourceA.width == sourceHeight.width);
+	assert(sourceA.height == sourceHeight.height);
+	assert(sourceB.width == sourceHeight.width);
+	assert(sourceB.height == sourceHeight.height);
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
+		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
+			int32_t sourceHeight = *((const uint16_t*)sourcePixel1);
+			if (sourceHeight > 0) {
+				sourceHeight += sourceHeightOffset;
+				int32_t targetHeight = *((uint16_t*)targetPixel1);
+				if (sourceHeight < 0) { sourceHeight = 0; }
+				if (sourceHeight > 65535) { sourceHeight = 65535; }
+				if (sourceHeight > targetHeight) {
+					*((uint16_t*)targetPixel1) = sourceHeight;
+					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
+					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
+					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
+					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
+					targetPixel3[targetB.packOrder.redIndex]   = sourcePixel3[sourceB.packOrder.redIndex];
+					targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
+					targetPixel3[targetB.packOrder.blueIndex]  = sourcePixel3[sourceB.packOrder.blueIndex];
+					targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+
+void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left, int32_t top, float sourceHeightOffset) {
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ITERATE_PIXELS(intersectionH.subTarget, intersectionH.subSource,
+			float sourceHeight = *((const float*)sourcePixel);
+			if (sourceHeight > -std::numeric_limits<float>::infinity()) {
+				sourceHeight += sourceHeightOffset;
+				float targetHeight = *((float*)targetPixel);
+				if (sourceHeight > targetHeight) {
+					*((float*)targetPixel) = sourceHeight;
+				}
+			}
+		);
+	}
+}
+void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  int32_t left, int32_t top, float sourceHeightOffset) {
+	assert(sourceA.width == sourceHeight.width);
+	assert(sourceA.height == sourceHeight.height);
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ITERATE_PIXELS_2(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource,
+			float sourceHeight = *((const float*)sourcePixel1);
+			if (sourceHeight > -std::numeric_limits<float>::infinity()) {
+				sourceHeight += sourceHeightOffset;
+				float targetHeight = *((float*)targetPixel1);
+				if (sourceHeight > targetHeight) {
+					*((float*)targetPixel1) = sourceHeight;
+					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
+					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
+					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
+					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+void dsr::imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left, int32_t top, float sourceHeightOffset) {
+	assert(sourceA.width == sourceHeight.width);
+	assert(sourceA.height == sourceHeight.height);
+	assert(sourceB.width == sourceHeight.width);
+	assert(sourceB.height == sourceHeight.height);
+	if (ImageIntersection::canCreate(targetHeight, sourceHeight, left, top)) {
+		ImageIntersection intersectionH = ImageIntersection::create(targetHeight, sourceHeight, left, top);
+		ImageIntersection intersectionA = ImageIntersection::create(targetA, sourceA, left, top);
+		ImageIntersection intersectionB = ImageIntersection::create(targetB, sourceB, left, top);
+		ITERATE_PIXELS_3(intersectionH.subTarget, intersectionH.subSource, intersectionA.subTarget, intersectionA.subSource, intersectionB.subTarget, intersectionB.subSource,
+			float sourceHeight = *((const float*)sourcePixel1);
+			if (sourceHeight > -std::numeric_limits<float>::infinity()) {
+				sourceHeight += sourceHeightOffset;
+				float targetHeight = *((float*)targetPixel1);
+				if (sourceHeight > targetHeight) {
+					*((float*)targetPixel1) = sourceHeight;
+					targetPixel2[targetA.packOrder.redIndex]   = sourcePixel2[sourceA.packOrder.redIndex];
+					targetPixel2[targetA.packOrder.greenIndex] = sourcePixel2[sourceA.packOrder.greenIndex];
+					targetPixel2[targetA.packOrder.blueIndex]  = sourcePixel2[sourceA.packOrder.blueIndex];
+					targetPixel2[targetA.packOrder.alphaIndex] = sourcePixel2[sourceA.packOrder.alphaIndex];
+					targetPixel3[targetB.packOrder.redIndex]   = sourcePixel3[sourceB.packOrder.redIndex];
+					targetPixel3[targetB.packOrder.greenIndex] = sourcePixel3[sourceB.packOrder.greenIndex];
+					targetPixel3[targetB.packOrder.blueIndex]  = sourcePixel3[sourceB.packOrder.blueIndex];
+					targetPixel3[targetB.packOrder.alphaIndex] = sourcePixel3[sourceB.packOrder.alphaIndex];
+				}
+			}
+		);
+	}
+}
+
+/*
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+*/
+
+// -------------------------------- Resize --------------------------------
+
+
+static inline U32x4 ColorRgbaI32_to_U32x4(const ColorRgbaI32& color) {
+	return U32x4(color.red, color.green, color.blue, color.alpha);
+}
+
+static inline ColorRgbaI32 U32x4_to_ColorRgbaI32(const U32x4& color) {
+	UVector4D vResult = color.get();
+	return ColorRgbaI32(vResult.x, vResult.y, vResult.z, vResult.w);
+}
+
+// Uniform linear interpolation of colors from a 16-bit sub-pixel weight
+// Pre-condition0 <= fineRatio <= 65536
+// Post-condition: Returns colorA * (1 - (fineRatio / 65536)) + colorB * (fineRatio / 65536)
+static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, uint32_t fineRatio) {
+	uint16_t ratio = fineRatio >> 8;
+	uint16_t invRatio = 256 - ratio;
+	ALIGN16 U16x8 weightA = U16x8(invRatio);
+	ALIGN16 U16x8 weightB = U16x8(ratio);
+	ALIGN16 U32x4 lowMask(0x00FF00FFu);
+	ALIGN16 U16x8 lowColorA = U16x8(colorA & lowMask);
+	ALIGN16 U16x8 lowColorB = U16x8(colorB & lowMask);
+	ALIGN16 U32x4 highMask(0xFF00FF00u);
+	ALIGN16 U16x8 highColorA = U16x8((colorA & highMask) >> 8);
+	ALIGN16 U16x8 highColorB = U16x8((colorB & highMask) >> 8);
+	ALIGN16 U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
+	ALIGN16 U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
+	return (((lowColor >> 8) & lowMask) | (highColor & highMask));
+}
+
+#define READ_CLAMP(X,Y) ImageRgbaU8Impl::unpackRgba(ImageRgbaU8Impl::readPixel_clamp(source, X, Y), source.packOrder)
+#define READ_CLAMP_SIMD(X,Y) ColorRgbaI32_to_U32x4(READ_CLAMP(X,Y))
+
+// Fixed-precision decimal system with 16-bit indices and 16-bit sub-pixel weights
+static const uint32_t interpolationFullPixel = 65536;
+static const uint32_t interpolationHalfPixel = interpolationFullPixel / 2;
+// Modulo mask for values greater than or equal to 0 and lesser than interpolationFullPixel
+static const uint32_t interpolationWeightMask = interpolationFullPixel - 1;
+
+// BILINEAR: Enables linear interpolation
+// scaleRegion:
+//     The stretched location of the source image in the target image
+//     Making it smaller than the target image will fill the outside with stretched pixels
+//     Allowing the caller to crop away parts of the source image that aren't interesting
+//     Can be used to round the region to a multiple of the input size for a fixed pixel size
+template <bool BILINEAR>
+static void resize_reference(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, const IRect& scaleRegion) {
+	// Reference implementation
+
+	// Offset in source pixels per target pixel
+	int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
+	int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
+	int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
+	int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
+	if (BILINEAR) {
+		startX -= interpolationHalfPixel;
+		startY -= interpolationHalfPixel;
+	}
+	SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
+	int32_t readY = startY;
+	for (int32_t y = 0; y < target.height; y++) {
+		int32_t naturalY = readY;
+		if (naturalY < 0) { naturalY = 0; }
+		uint32_t sampleY = (uint32_t)naturalY;
+		uint32_t upperY = sampleY >> 16;
+		uint32_t lowerY = upperY + 1;
+		uint32_t lowerRatio = sampleY & interpolationWeightMask;
+		uint32_t upperRatio = 65536 - lowerRatio;
+		SafePointer<uint32_t> targetPixel = targetRow;
+		int32_t readX = startX;
+		for (int32_t x = 0; x < target.width; x++) {
+			int32_t naturalX = readX;
+			if (naturalX < 0) { naturalX = 0; }
+			uint32_t sampleX = (uint32_t)naturalX;
+			uint32_t leftX = sampleX >> 16;
+			uint32_t rightX = leftX + 1;
+			uint32_t rightRatio = sampleX & interpolationWeightMask;
+			uint32_t leftRatio = 65536 - rightRatio;
+			ColorRgbaI32 finalColor;
+			if (BILINEAR) {
+				ALIGN16 U32x4 vUpperLeftColor = READ_CLAMP_SIMD(leftX, upperY);
+				ALIGN16 U32x4 vUpperRightColor = READ_CLAMP_SIMD(rightX, upperY);
+				ALIGN16 U32x4 vLowerLeftColor = READ_CLAMP_SIMD(leftX, lowerY);
+				ALIGN16 U32x4 vLowerRightColor = READ_CLAMP_SIMD(rightX, lowerY);
+				ALIGN16 U32x4 vLeftRatio = U32x4(leftRatio);
+				ALIGN16 U32x4 vRightRatio = U32x4(rightRatio);
+				ALIGN16 U32x4 vUpperColor = ((vUpperLeftColor * vLeftRatio) + (vUpperRightColor * vRightRatio)) >> 16;
+				ALIGN16 U32x4 vLowerColor = ((vLowerLeftColor * vLeftRatio) + (vLowerRightColor * vRightRatio)) >> 16;
+				ALIGN16 U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
+				finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
+			} else {
+				finalColor = READ_CLAMP(leftX, upperY);
+			}
+			*targetPixel = target.packRgba(finalColor).packed;
+			targetPixel += 1;
+			readX += offsetX;
+		}
+		targetRow.increaseBytes(target.stride);
+		readY += offsetY;
+	}
+}
+
+// BILINEAR: Enables linear interpolation
+// SIMD_ALIGNED: Each line starts 16-byte aligned, has a stride divisible with 16-bytes and is allowed to overwrite padding.
+template <bool BILINEAR, bool SIMD_ALIGNED>
+static void resize_optimized(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, const IRect& scaleRegion) {
+	// Get source information
+	// Compare dimensions
+	const bool sameWidth = source.width == scaleRegion.width() && scaleRegion.left() == 0;
+	const bool sameHeight = source.height == scaleRegion.height() && scaleRegion.top() == 0;
+	const bool samePackOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
+	if (sameWidth && sameHeight) {
+		// No need to resize, just make a copy to save time
+		imageImpl_drawCopy(target, source);
+	} else if (sameWidth && (samePackOrder || BILINEAR)) {
+		// Only vertical interpolation
+
+		// Offset in source pixels per target pixel
+		int32_t offsetY = interpolationFullPixel * source.height / scaleRegion.height();
+		int32_t startY = interpolationFullPixel * scaleRegion.top() + offsetY / 2;
+		if (BILINEAR) {
+			startY -= interpolationHalfPixel;
+		}
+		SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
+		int32_t readY = startY;
+		for (int32_t y = 0; y < target.height; y++) {
+			int32_t naturalY = readY;
+			if (naturalY < 0) { naturalY = 0; }
+			uint32_t sampleY = (uint32_t)naturalY;
+			uint32_t upperY = sampleY >> 16;
+			uint32_t lowerY = upperY + 1;
+			if (upperY >= (uint32_t)source.height) upperY = source.height - 1;
+			if (lowerY >= (uint32_t)source.height) lowerY = source.height - 1;
+			if (BILINEAR) {
+				uint32_t lowerRatio = sampleY & interpolationWeightMask;
+				uint32_t upperRatio = 65536 - lowerRatio;
+				SafePointer<uint32_t> targetPixel = targetRow;
+				if (SIMD_ALIGNED) {
+					const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
+					const SafePointer<uint32_t> sourceRowLower = imageInternal::getSafeData<uint32_t>(source, lowerY);
+					for (int32_t x = 0; x < target.width; x += 4) {
+						ALIGN16 U32x4 vUpperPackedColor = U32x4::readAligned(sourceRowUpper, "resize_optimized @ read vUpperPackedColor");
+						ALIGN16 U32x4 vLowerPackedColor = U32x4::readAligned(sourceRowLower, "resize_optimized @ read vLowerPackedColor");
+						ALIGN16 U32x4 vCenterColor = mixColorsUniform(vUpperPackedColor, vLowerPackedColor, lowerRatio);
+						vCenterColor.writeAligned(targetPixel, "resize_optimized @ write vCenterColor");
+						sourceRowUpper += 4;
+						sourceRowLower += 4;
+						targetPixel += 4;
+					}
+				} else {
+					for (int32_t x = 0; x < target.width; x++) {
+						ALIGN16 U32x4 vUpperColor = READ_CLAMP_SIMD(x, upperY);
+						ALIGN16 U32x4 vLowerColor = READ_CLAMP_SIMD(x, lowerY);
+						ALIGN16 U32x4 vCenterColor = ((vUpperColor * upperRatio) + (vLowerColor * lowerRatio)) >> 16;
+						ColorRgbaI32 finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
+						*targetPixel = target.packRgba(finalColor).packed;
+						targetPixel += 1;
+					}
+				}
+			} else {
+				const SafePointer<uint32_t> sourceRowUpper = imageInternal::getSafeData<uint32_t>(source, upperY);
+				// Nearest neighbor sampling from a same width can be done using one copy per row
+				safeMemoryCopy(targetRow, sourceRowUpper, source.width * 4);
+			}
+			targetRow.increaseBytes(target.stride);
+			readY += offsetY;
+		}
+	} else if (sameHeight) {
+		// Only horizontal interpolation
+
+		// Offset in source pixels per target pixel
+		int32_t offsetX = interpolationFullPixel * source.width / scaleRegion.width();
+		int32_t startX = interpolationFullPixel * scaleRegion.left() + offsetX / 2;
+		if (BILINEAR) {
+			startX -= interpolationHalfPixel;
+		}
+		SafePointer<uint32_t> targetRow = imageInternal::getSafeData<uint32_t>(target);
+		for (int32_t y = 0; y < target.height; y++) {
+			SafePointer<uint32_t> targetPixel = targetRow;
+			int32_t readX = startX;
+			for (int32_t x = 0; x < target.width; x++) {
+				int32_t naturalX = readX;
+				if (naturalX < 0) { naturalX = 0; }
+				uint32_t sampleX = (uint32_t)naturalX;
+				uint32_t leftX = sampleX >> 16;
+				uint32_t rightX = leftX + 1;
+				uint32_t rightRatio = sampleX & interpolationWeightMask;
+				uint32_t leftRatio = 65536 - rightRatio;
+				ColorRgbaI32 finalColor;
+				if (BILINEAR) {
+					ALIGN16 U32x4 vLeftColor = READ_CLAMP_SIMD(leftX, y);
+					ALIGN16 U32x4 vRightColor = READ_CLAMP_SIMD(rightX, y);
+					ALIGN16 U32x4 vCenterColor = ((vLeftColor * leftRatio) + (vRightColor * rightRatio)) >> 16;
+					finalColor = U32x4_to_ColorRgbaI32(vCenterColor);
+				} else {
+					finalColor = READ_CLAMP(leftX, y);
+				}
+				*targetPixel = target.packRgba(finalColor).packed;
+				targetPixel += 1;
+				readX += offsetX;
+			}
+			targetRow.increaseBytes(target.stride);
+		}
+	} else {
+		// Call the reference implementation
+		resize_reference<BILINEAR>(target, source, scaleRegion);
+	}
+}
+
+// Returns true iff each line start in image is aligned with 16 bytes
+//   Often not the case for sub-images, even if the parent image is aligned
+static bool imageIs16ByteAligned(const ImageImpl& image) {
+	return (uint32_t)((image.stride & 15) == 0 && ((uintptr_t)(imageInternal::getSafeData<uint8_t>(image).getUnsafe()) & 15) == 0);
+}
+
+// Converting run-time flags into compile-time constants
+static void resize_aux(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate, bool paddWrite, const IRect& scaleRegion) {
+	// If writing to padding is allowed and both images are 16-byte aligned with the same pack order
+	if (paddWrite && imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
+		// Optimized resize allowed
+		if (interpolate) {
+			resize_optimized<true, true>(target, source, scaleRegion);
+		} else {
+			resize_optimized<false, true>(target, source, scaleRegion);
+		}
+	} else {
+		// Non-optimized resize
+		if (interpolate) {
+			resize_optimized<true, false>(target, source, scaleRegion);
+		} else {
+			resize_optimized<false, false>(target, source, scaleRegion);
+		}
+	}
+}
+
+void dsr::imageImpl_resizeInPlace(ImageRgbaU8Impl& target, ImageRgbaU8Impl* wideTempImage, const ImageRgbaU8Impl& source, bool interpolate, const IRect& scaleRegion) {
+	if (target.width != source.width && target.height > source.height) {
+		// Upscaling is faster in two steps by both reusing the horizontal interpolation and vectorizing the vertical interpolation.
+		int tempWidth = target.width;
+		int tempHeight = source.height;
+		PackOrderIndex tempPackOrder = target.packOrder.packOrderIndex;
+		IRect tempScaleRegion = IRect(scaleRegion.left(), 0, scaleRegion.width(), source.height);
+		if (wideTempImage == nullptr
+		 || wideTempImage->width != tempWidth
+		 || wideTempImage->height != tempHeight
+		 || wideTempImage->packOrder.packOrderIndex != tempPackOrder) {
+			// Performance warnings
+			// TODO: Make optional
+			if (wideTempImage != nullptr) {
+				if (wideTempImage->width != tempWidth) { printText("Ignored temp buffer of wrong width! Found ", wideTempImage->width, " instead of ", tempWidth, "\n"); }
+				if (wideTempImage->height != tempHeight) { printText("Ignored temp buffer of wrong height! Found ", wideTempImage->height, " instead of ", tempHeight, "\n"); }
+				if (wideTempImage->packOrder.packOrderIndex != tempPackOrder) { printText("Ignored temp buffer of wrong pack order!\n"); }
+			}
+			// Create a new buffer
+			ImageRgbaU8Impl newTempImage = ImageRgbaU8Impl(tempWidth, tempHeight, tempPackOrder);
+			resize_aux(newTempImage, source, interpolate, true, tempScaleRegion);
+			resize_aux(target, newTempImage, interpolate, true, scaleRegion);
+		} else {
+			// Use existing buffer
+			resize_aux(*wideTempImage, source, interpolate, true, tempScaleRegion);
+			resize_aux(target, *wideTempImage, interpolate, true, scaleRegion);
+		}
+	} else {
+		// Downscaling or only changing one dimension is faster in one step
+		resize_aux(target, source, interpolate, true, scaleRegion);
+	}
+}
+
+void dsr::imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate) {
+	imageImpl_resizeInPlace(target, nullptr, source, interpolate, imageInternal::getBound(target));
+}
+
+template <bool CONVERT_COLOR>
+static inline Color4xU8 convertRead(const ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int x, int y) {
+	Color4xU8 result = ImageRgbaU8Impl::readPixel_clamp(source, x, y);
+	if (CONVERT_COLOR) {
+		result = target.packRgba(ImageRgbaU8Impl::unpackRgba(result, source.packOrder));
+	}
+	return result;
+}
+
+// Used for drawing large pixels
+static inline void fillRectangle(ImageRgbaU8Impl& target, int pixelLeft, int pixelRight, int pixelTop, int pixelBottom, const Color4xU8& packedColor) {
+	// TODO: Get target pointer in advance and add the correct offsets
+	SafePointer<Color4xU8> targetRow = imageInternal::getSafeData<Color4xU8>(target, pixelTop) + pixelLeft;
+	for (int y = pixelTop; y < pixelBottom; y++) {
+		SafePointer<Color4xU8> targetPixel = targetRow;
+		for (int x = pixelLeft; x < pixelRight; x++) {
+			*targetPixel = packedColor;
+			targetPixel += 1;
+		}
+		targetRow.increaseBytes(target.stride);
+	}
+}
+
+template <bool CONVERT_COLOR>
+static void blockMagnify_reference(
+  ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source,
+  int pixelWidth, int pixelHeight, int clipWidth, int clipHeight) {
+	int sourceY = 0;
+	int maxSourceX = source.width - 1;
+	int maxSourceY = source.height - 1;
+	if (clipWidth > target.width) { clipWidth = target.width; }
+	if (clipHeight > target.height) { clipHeight = target.height; }
+	for (int32_t pixelTop = 0; pixelTop < clipHeight; pixelTop += pixelHeight) {
+		int sourceX = 0;
+		for (int32_t pixelLeft = 0; pixelLeft < clipWidth; pixelLeft += pixelWidth) {
+			// Read the pixel once
+			Color4xU8 sourceColor = convertRead<CONVERT_COLOR>(target, source, sourceX, sourceY);
+			// Write to all target pixels in a conditionless loop
+			fillRectangle(target, pixelLeft, pixelLeft + pixelWidth, pixelTop, pixelTop + pixelHeight, sourceColor);
+			// Iterate and clamp the read coordinate
+			sourceX++;
+			if (sourceX > maxSourceX) { sourceX = maxSourceX; }
+		}
+		// Iterate and clamp the read coordinate
+		sourceY++;
+		if (sourceY > maxSourceY) { sourceY = maxSourceY; }
+	}
+}
+
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 2 == 0
+//   * clipHeight % 2 == 0
+static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
+	#ifdef USE_SIMD_EXTRA
+		const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
+		SafePointer<uint32_t> upperTargetRow = imageInternal::getSafeData<uint32_t>(target, 0);
+		SafePointer<uint32_t> lowerTargetRow = imageInternal::getSafeData<uint32_t>(target, 1);
+		int doubleTargetStride = target.stride * 2;
+		for (int upperTargetY = 0; upperTargetY + 2 <= clipHeight; upperTargetY+=2) {
+			// Carriage return
+			const SafePointer<uint32_t> sourcePixel = sourceRow;
+			SafePointer<uint32_t> upperTargetPixel = upperTargetRow;
+			SafePointer<uint32_t> lowerTargetPixel = lowerTargetRow;
+			// Write to whole multiples of 8 pixels
+			int writeLeftX = 0;
+			while (writeLeftX + 8 <= clipWidth) {
+				// Read pixels
+				ALIGN16 SIMD_U32x4 sourcePixels = U32x4::readAligned(sourcePixel, "blockMagnify_2x2 @ whole sourcePixels").v;
+				sourcePixel += 4;
+				// Double the pixels by zipping with itself
+				ALIGN16 SIMD_U32x4x2 doubledPixels = ZIP_U32_SIMD(sourcePixels, sourcePixels);
+				// Write lower part
+				U32x4(doubledPixels.val[0]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper left #1");
+				upperTargetPixel += 4;
+				U32x4(doubledPixels.val[0]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower left #1");
+				lowerTargetPixel += 4;
+				// Write upper part
+				U32x4(doubledPixels.val[1]).writeAligned(upperTargetPixel, "blockMagnify_2x2 @ write upper right #1");
+				upperTargetPixel += 4;
+				U32x4(doubledPixels.val[1]).writeAligned(lowerTargetPixel, "blockMagnify_2x2 @ write lower right #1");
+				lowerTargetPixel += 4;
+				// Count
+				writeLeftX += 8;
+			}
+			// Fill the last pixels using scalar operations to avoid going out of bound
+			while (writeLeftX + 2 <= clipWidth) {
+				// Read one pixel
+				uint32_t sourceColor = *sourcePixel;
+				// Write 2x2 pixels
+				*upperTargetPixel = sourceColor; upperTargetPixel += 1;
+				*upperTargetPixel = sourceColor; upperTargetPixel += 1;
+				*lowerTargetPixel = sourceColor; lowerTargetPixel += 1;
+				*lowerTargetPixel = sourceColor; lowerTargetPixel += 1;
+				// Count
+				writeLeftX += 2;
+			}
+			// Line feed
+			sourceRow.increaseBytes(source.stride);
+			upperTargetRow.increaseBytes(doubleTargetStride);
+			lowerTargetRow.increaseBytes(doubleTargetStride);
+		}
+	#else
+		blockMagnify_reference<false>(target, source, 2, 2, clipWidth, clipHeight);
+	#endif
+}
+
+static void blackEdges(ImageRgbaU8Impl& target, int excludedWidth, int excludedHeight) {
+	// Right side
+	drawSolidRectangleMemset<Color4xU8>(target, excludedWidth, 0, target.width, excludedHeight, 0);
+	// Bottom and corner
+	drawSolidRectangleMemset<Color4xU8>(target, 0, excludedHeight, target.width, target.height, 0);
+}
+
+void dsr::imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight) {
+	if (pixelWidth < 1) { pixelWidth = 1; }
+	if (pixelHeight < 1) { pixelHeight = 1; }
+	bool sameOrder = target.packOrder.packOrderIndex == source.packOrder.packOrderIndex;
+	// Find the part of source which fits into target with whole pixels
+	int clipWidth = roundDown(std::min(target.width, source.width * pixelWidth), pixelWidth);
+	int clipHeight = roundDown(std::min(target.height, source.height * pixelHeight), pixelHeight);
+	if (sameOrder) {
+		if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target) && pixelWidth == 2 && pixelHeight == 2) {
+			blockMagnify_2x2(target, source, clipWidth, clipHeight);
+		} else {
+			blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+		}
+	} else {
+		blockMagnify_reference<true>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+	}
+	blackEdges(target, clipWidth, clipHeight);
+}

+ 99 - 0
Source/DFPSR/image/draw.h

@@ -0,0 +1,99 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_DRAW
+#define DFPSR_IMAGE_DRAW
+
+#include "Image.h"
+#include "ImageU8.h"
+#include "ImageU16.h"
+#include "ImageF32.h"
+#include "ImageRgbaU8.h"
+
+namespace dsr {
+
+// An internal draw API to allow having multiple external APIs without code duplication
+
+void imageImpl_draw_solidRectangle(ImageU8Impl& image, const IRect& bound, int color);
+void imageImpl_draw_solidRectangle(ImageU16Impl& image, const IRect& bound, int color);
+void imageImpl_draw_solidRectangle(ImageF32Impl& image, const IRect& bound, float color);
+void imageImpl_draw_solidRectangle(ImageRgbaU8Impl& image, const IRect& bound, const ColorRgbaI32& color);
+
+void imageImpl_draw_line(ImageU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
+void imageImpl_draw_line(ImageU16Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, int color);
+void imageImpl_draw_line(ImageF32Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, float color);
+void imageImpl_draw_line(ImageRgbaU8Impl& image, int32_t x1, int32_t y1, int32_t x2, int32_t y2, const ColorRgbaI32& color);
+
+// Integer formats of different size are treated as having the same scale but different ranges
+void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU8Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU16Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageF32Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageRgbaU8Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU8Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU8Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU16Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageU16Impl& target, const ImageF32Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageF32Impl& target, const ImageU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawCopy(ImageF32Impl& target, const ImageU16Impl& source, int32_t left = 0, int32_t top = 0);
+
+void imageImpl_drawAlphaFilter(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0);
+void imageImpl_drawMaxAlpha(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0, int32_t sourceAlphaOffset = 0);
+void imageImpl_drawAlphaClip(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int32_t left = 0, int32_t top = 0, int32_t treshold = 0);
+void imageImpl_drawSilhouette(ImageRgbaU8Impl& target, const ImageU8Impl& source, const ColorRgbaI32& color, int32_t left = 0, int32_t top = 0);
+
+void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageU16Impl& targetHeight, const ImageU16Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left = 0, int32_t top = 0, int32_t sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+void imageImpl_drawHigher(ImageF32Impl& targetHeight, const ImageF32Impl& sourceHeight, ImageRgbaU8Impl& targetA, const ImageRgbaU8Impl& sourceA,
+  ImageRgbaU8Impl& targetB, const ImageRgbaU8Impl& sourceB, int32_t left = 0, int32_t top = 0, float sourceHeightOffset = 0);
+
+// Pre-conditions:
+//     * wideTempImage should be one of the following:
+//        * A nullptr (for allocating it automatically when needed)
+//          Can be preferred when down-scaling, because the two-step resize is only used when width changes and height increases
+//        * An image of dimensions target.width x source.height and the same pack order as target
+//          Wrong dimensions or pack order for wideTempImage is equivalent to passing nullptr
+//     * target must own its padding
+//       This is automatically true for aligned images
+//       If broken, visible pixels in a parent image may change outside of the sub-image's region
+// Side-effects:
+//     * Writes a resized version of source to target, including padding
+//     * May also write to any pixels in wideTempImage, including padding
+//     * May also change the pack order of wideTempImage
+void imageImpl_resizeInPlace(ImageRgbaU8Impl& target, ImageRgbaU8Impl* wideTempImage, const ImageRgbaU8Impl& source, bool interpolate, const IRect& scaleRegion);
+void imageImpl_resizeToTarget(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, bool interpolate);
+void imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight);
+void imageImpl_blockMagnify_aligned(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int pixelWidth, int pixelHeight);
+
+}
+
+#endif
+

+ 83 - 0
Source/DFPSR/image/internal/imageInternal.h

@@ -0,0 +1,83 @@
+
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_INTERNAL
+#define DFPSR_IMAGE_INTERNAL
+
+#include "../Image.h"
+#include "../ImageRgbaU8.h"
+
+namespace dsr {
+namespace imageInternal {
+
+//inline int32_t getWidth(const ImageImpl &image) { return image.width; }
+inline int32_t getWidth(const ImageImpl *image) { return image ? image->width : 0; }
+//inline int32_t getHeight(const ImageImpl &image) { return image.height; }
+inline int32_t getHeight(const ImageImpl *image) { return image ? image->height : 0; }
+//inline int32_t getStride(const ImageImpl &image) { return image.stride; }
+inline int32_t getStride(const ImageImpl *image) { return image ? image->stride : 0; }
+inline int32_t getRowSize(const ImageImpl &image) { return image.width * image.pixelSize; }
+inline int32_t getRowSize(const ImageImpl *image) { return image ? getRowSize(*image) : 0; }
+inline int32_t getUsedBytes(const ImageImpl &image) { return (image.stride * (image.height - 1)) + (image.width * image.pixelSize); }
+inline int32_t getUsedBytes(const ImageImpl *image) { return image ? getUsedBytes(*image) : 0; }
+//inline int32_t getPixelSize(const ImageImpl &image) { return image.pixelSize; }
+inline int32_t getPixelSize(const ImageImpl *image) { return image ? image->pixelSize : 0; }
+//inline int32_t getStartOffset(const ImageImpl &image) { return image.startOffset; }
+inline int32_t getStartOffset(const ImageImpl *image) { return image ? image->startOffset : 0; }
+inline std::shared_ptr<Buffer> getBuffer(const ImageImpl &image) { return image.buffer; }
+inline std::shared_ptr<Buffer> getBuffer(const ImageImpl *image) { return image ? getBuffer(*image) : std::shared_ptr<Buffer>(); }
+inline IRect getBound(const ImageImpl &image) { return IRect(0, 0, image.width, image.height); }
+inline IRect getBound(const ImageImpl *image) { return image ? getBound(*image) : IRect(); }
+inline PackOrder getPackOrder(const ImageRgbaU8Impl *image) { return image ? image->packOrder : PackOrder(); }
+
+
+// Get data
+//   The pointer has access to the whole parent buffer,
+//   to allow aligning SIMD vectors outside of the used region.
+template <typename T>
+static inline const SafePointer<T> getSafeData(const ImageImpl &image, int rowIndex = 0) {
+	auto result = image.buffer->getSafeData<T>("Image buffer");
+	result.increaseBytes(image.startOffset + image.stride * rowIndex);
+	return result;
+}
+template <typename T>
+inline const SafePointer<T> getSafeData(const ImageImpl *image, int rowIndex = 0) {
+	return image ? getSafeData<T>(*image, rowIndex) : SafePointer<T>("Null image buffer");
+}
+template <typename T>
+static inline SafePointer<T> getSafeData(ImageImpl &image, int rowIndex = 0) {
+	auto result = image.buffer->getSafeData<T>("Image buffer");
+	result.increaseBytes(image.startOffset + image.stride * rowIndex);
+	return result;
+}
+template <typename T>
+inline SafePointer<T> getSafeData(ImageImpl *image, int rowIndex = 0) {
+	return image ? getSafeData<T>(*image, rowIndex) : SafePointer<T>("Null image buffer");
+}
+
+}
+}
+
+#endif
+

+ 65 - 0
Source/DFPSR/image/internal/imageTemplate.h

@@ -0,0 +1,65 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_IMAGE_TEMPLATE
+#define DFPSR_IMAGE_TEMPLATE
+
+#include "imageInternal.h"
+#include "../../math/scalar.h"
+#include "../Image.h"
+#include <limits>
+
+namespace dsr {
+
+// TODO: Remove clamped pixel operation
+// Each image type must define initializeImage instead of a constructor;
+// These macros are used to compile instances of template functions because it's much safer than exposing header defined template classes.
+#define IMAGE_DEFINITION(IMAGE_TYPE,CHANNELS,COLOR_TYPE,ELEMENT_TYPE) \
+	void IMAGE_TYPE::writePixel(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color) { \
+		if (x >= 0 && x < image.width && y >= 0 && y < image.height) { \
+			*(COLOR_TYPE*)(image.buffer->getUnsafeData() + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)) = color; \
+		} \
+	} \
+	void IMAGE_TYPE::writePixel_unsafe(IMAGE_TYPE &image, int32_t x, int32_t y, COLOR_TYPE color) { \
+		*(COLOR_TYPE*)(image.buffer->getUnsafeData() + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)) = color; \
+	} \
+	COLOR_TYPE IMAGE_TYPE::readPixel_clamp(const IMAGE_TYPE &image, int32_t x, int32_t y) { \
+		if (image.width > 0 && image.height > 0) { \
+			if (x < 0) { x = 0; } \
+			if (y < 0) { y = 0; } \
+			if (x >= image.width) { x = image.width - 1; } \
+			if (y >= image.height) { y = image.height - 1; } \
+			return *(COLOR_TYPE*)(image.buffer->getUnsafeData() + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)); \
+		} else { \
+			return COLOR_TYPE(); \
+		} \
+	} \
+	COLOR_TYPE IMAGE_TYPE::readPixel_unsafe(const IMAGE_TYPE &image, int32_t x, int32_t y) { \
+		assert(x >= 0 && x < image.width && y >= 0 && y < image.height); \
+		return *(COLOR_TYPE*)(image.buffer->getUnsafeData() + image.startOffset + (x * sizeof(COLOR_TYPE)) + (y * image.stride)); \
+	}
+
+}
+
+#endif
+

+ 45 - 0
Source/DFPSR/image/stbImage/stbImageWrapper.cpp

@@ -0,0 +1,45 @@
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
+
+#include "stbImageWrapper.h"
+
+using namespace dsr;
+
+OrderedImageRgbaU8 dsr::image_stb_load_RgbaU8(const String& filename, bool mustExist) {
+	int width, height, bpp;
+	uint8_t *data = stbi_load(filename.toStdString().c_str(), &width, &height, &bpp, 4);
+	if (data == 0) {
+		if (mustExist) {
+			// TODO: Throw an optional runtime exception
+			printText("The image ", filename, " could not be loaded!\n");
+		}
+		return OrderedImageRgbaU8(); // Return null
+	}
+	// Create a padded buffer
+	OrderedImageRgbaU8 result = image_create_RgbaU8(width, height);
+	// Copy the data
+	int rowSize = width * 4;
+	int32_t targetStride = image_getStride(result);
+	const uint8_t *sourceRow = data;
+	uint8_t* targetRow = image_dangerous_getData(result);
+	for (int32_t y = 0; y < height; y++) {
+		// Copy a row without touching the padding
+		memcpy(targetRow, sourceRow, rowSize);
+		// Add stride using single byte elements
+		targetRow += targetStride;
+		sourceRow += rowSize;
+	}
+	// Free the unpadded image
+	free(data);
+	return result;
+}
+
+bool dsr::image_stb_save(const ImageRgbaU8 &image, const String& filename) {
+	// Remove all padding before saving to avoid crashing
+	ImageRgbaU8 unpadded = ImageRgbaU8(image_removePadding(image));
+	return stbi_write_png(filename.toStdString().c_str(), image_getWidth(unpadded), image_getHeight(unpadded), 4, image_dangerous_getData(unpadded), image_getStride(unpadded)) != 0;
+}

+ 15 - 0
Source/DFPSR/image/stbImage/stbImageWrapper.h

@@ -0,0 +1,15 @@
+
+#ifndef DFPSR_API_IMAGE_STB_WRAPPER
+#define DFPSR_API_IMAGE_STB_WRAPPER
+
+#include "../../api/imageAPI.h"
+#include "../../base/text.h"
+
+namespace dsr {
+
+OrderedImageRgbaU8 image_stb_load_RgbaU8(const String& filename, bool mustExist = true);
+bool image_stb_save(const ImageRgbaU8 &image, const String& filename);
+
+}
+
+#endif

+ 6712 - 0
Source/DFPSR/image/stbImage/stb_image.h

@@ -0,0 +1,6712 @@
+/* stb_image - v2.12 - public domain image loader - http://nothings.org/stb_image.h
+                                     no warranty implied; use at your own risk
+   Do this:
+      #define STB_IMAGE_IMPLEMENTATION
+   before you include this file in *one* C or C++ file to create the implementation.
+   // i.e. it should look like this:
+   #include ...
+   #include ...
+   #include ...
+   #define STB_IMAGE_IMPLEMENTATION
+   #include "stb_image.h"
+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+   QUICK NOTES:
+      Primarily of interest to game developers and other people who can
+          avoid problematic images and only need the trivial interface
+      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+      PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
+      TGA (not sure what subset, if a subset)
+      BMP non-1bpp, non-RLE
+      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+      GIF (*comp always reports as 4-channel)
+      HDR (radiance rgbE format)
+      PIC (Softimage PIC)
+      PNM (PPM and PGM binary only)
+      Animated GIF still needs a proper API, but here's one way to do it:
+          http://gist.github.com/urraka/685d9a6340b26b830d49
+      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+      - decode from arbitrary I/O callbacks
+      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+   Full documentation under "DOCUMENTATION" below.
+   Revision 2.00 release notes:
+      - Progressive JPEG is now supported.
+      - PPM and PGM binary formats are now supported, thanks to Ken Miller.
+      - x86 platforms now make use of SSE2 SIMD instructions for
+        JPEG decoding, and ARM platforms can use NEON SIMD if requested.
+        This work was done by Fabian "ryg" Giesen. SSE2 is used by
+        default, but NEON must be enabled explicitly; see docs.
+        With other JPEG optimizations included in this version, we see
+        2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
+        on a JPEG on an ARM machine, relative to previous versions of this
+        library. The same results will not obtain for all JPGs and for all
+        x86/ARM machines. (Note that progressive JPEGs are significantly
+        slower to decode than regular JPEGs.) This doesn't mean that this
+        is the fastest JPEG decoder in the land; rather, it brings it
+        closer to parity with standard libraries. If you want the fastest
+        decode, look elsewhere. (See "Philosophy" section of docs below.)
+        See final bullet items below for more info on SIMD.
+      - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
+        the memory allocator. Unlike other STBI libraries, these macros don't
+        support a context parameter, so if you need to pass a context in to
+        the allocator, you'll have to store it in a global or a thread-local
+        variable.
+      - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
+        STBI_NO_LINEAR.
+            STBI_NO_HDR:     suppress implementation of .hdr reader format
+            STBI_NO_LINEAR:  suppress high-dynamic-range light-linear float API
+      - You can suppress implementation of any of the decoders to reduce
+        your code footprint by #defining one or more of the following
+        symbols before creating the implementation.
+            STBI_NO_JPEG
+            STBI_NO_PNG
+            STBI_NO_BMP
+            STBI_NO_PSD
+            STBI_NO_TGA
+            STBI_NO_GIF
+            STBI_NO_HDR
+            STBI_NO_PIC
+            STBI_NO_PNM   (.ppm and .pgm)
+      - You can request *only* certain decoders and suppress all other ones
+        (this will be more forward-compatible, as addition of new decoders
+        doesn't require you to disable them explicitly):
+            STBI_ONLY_JPEG
+            STBI_ONLY_PNG
+            STBI_ONLY_BMP
+            STBI_ONLY_PSD
+            STBI_ONLY_TGA
+            STBI_ONLY_GIF
+            STBI_ONLY_HDR
+            STBI_ONLY_PIC
+            STBI_ONLY_PNM   (.ppm and .pgm)
+         Note that you can define multiples of these, and you will get all
+         of them ("only x" and "only y" is interpreted to mean "only x&y").
+       - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+         want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+      - Compilation of all SIMD code can be suppressed with
+            #define STBI_NO_SIMD
+        It should not be necessary to disable SIMD unless you have issues
+        compiling (e.g. using an x86 compiler which doesn't support SSE
+        intrinsics or that doesn't support the method used to detect
+        SSE2 support at run-time), and even those can be reported as
+        bugs so I can refine the built-in compile-time checking to be
+        smarter.
+      - The old STBI_SIMD system which allowed installing a user-defined
+        IDCT etc. has been removed. If you need this, don't upgrade. My
+        assumption is that almost nobody was doing this, and those who
+        were will find the built-in SIMD more satisfactory anyway.
+      - RGB values computed for JPEG images are slightly different from
+        previous versions of stb_image. (This is due to using less
+        integer precision in SIMD.) The C code has been adjusted so
+        that the same RGB values will be computed regardless of whether
+        SIMD support is available, so your app should always produce
+        consistent results. But these results are slightly different from
+        previous versions. (Specifically, about 3% of available YCbCr values
+        will compute different RGB results from pre-1.49 versions by +-1;
+        most of the deviating values are one smaller in the G channel.)
+      - If you must produce consistent results with previous versions of
+        stb_image, #define STBI_JPEG_OLD and you will get the same results
+        you used to; however, you will not get the SIMD speedups for
+        the YCbCr-to-RGB conversion step (although you should still see
+        significant JPEG speedup from the other changes).
+        Please note that STBI_JPEG_OLD is a temporary feature; it will be
+        removed in future versions of the library. It is only intended for
+        near-term back-compatibility use.
+   Latest revision history:
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+                         RGB-format JPEG; remove white matting in PSD;
+                         allocate large structures on the stack; 
+                         correct channel count for PNG & BMP
+      2.10  (2016-01-22) avoid warning introduced in 2.09
+      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+      2.07  (2015-09-13) partial animated GIF support
+                         limited 16-bit PSD support
+                         minor bugs, code cleanup, and compiler warnings
+      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+      2.03  (2015-04-12) additional corruption checking
+                         stbi_set_flip_vertically_on_load
+                         fix NEON support; fix mingw support
+      2.02  (2015-01-19) fix incorrect assert, fix warning
+      2.01  (2015-01-17) fix various warnings
+      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+      2.00  (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
+                         progressive JPEG
+                         PGM/PPM support
+                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
+                         STBI_NO_*, STBI_ONLY_*
+                         GIF bugfix
+   See end of file for full revision history.
+ ============================    Contributors    =========================
+ Image formats                          Extensions, features
+    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
+    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
+    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
+    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
+    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
+    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
+    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
+    urraka@github (animated gif)           Junggon Kim (PNM comments)
+                                           Daniel Gibson (16-bit TGA)
+ Optimizations & bugfixes
+    Fabian "ryg" Giesen
+    Arseny Kapoulkine
+ Bug & warning fixes
+    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
+    Christpher Lloyd        Martin Golini      Jerry Jansson      Joseph Thomson
+    Dave Moore              Roy Eltham         Hayaki Saito       Phil Jordan
+    Won Chun                Luke Graham        Johan Duparc       Nathan Reed
+    the Horde3D community   Thomas Ruf         Ronny Chevalier    Nick Verigakis
+    Janez Zemva             John Bartholomew   Michal Cichon      svdijk@github
+    Jonathan Blow           Ken Hamada         Tero Hanninen      Baldur Karlsson
+    Laurent Gomila          Cort Stratton      Sergio Gonzalez    romigrou@github
+    Aruelien Pocheville     Thibault Reuille   Cass Everitt       Matthew Gregan
+    Ryamond Barbiero        Paul Du Bois       Engin Manap        snagar@github
+    Michaelangel007@github  Oriol Ferrer Mesia socks-the-fox
+    Blazej Dariusz Roszkowski
+LICENSE
+This software is dual-licensed to the public domain and under the following
+license: you are granted a perpetual, irrevocable license to copy, modify,
+publish, and distribute this file as you see fit.
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+//    - no 16-bit-per-channel PNG
+//    - no 12-bit-per-channel JPEG
+//    - no JPEGs with arithmetic coding
+//    - no 1-bit BMP
+//    - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+//    int x,y,n;
+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+//    // ... process data if not NULL ...
+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
+//    // ... replace '0' with '1'..'4' to force that many components per pixel
+//    // ... but 'n' will always be the number that it would have been if you said 0
+//    stbi_image_free(data)
+//
+// Standard parameters:
+//    int *x       -- outputs image width in pixels
+//    int *y       -- outputs image height in pixels
+//    int *comp    -- outputs # of image components in image file
+//    int req_comp -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
+// If req_comp is non-zero, *comp has the number of components that _would_
+// have been output otherwise. E.g. if you set req_comp to 4, you will always
+// get RGBA output, but you can check *comp to see if it's trivially opaque
+// because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+//     N=#comp     components
+//       1           grey
+//       2           grey, alpha
+//       3           red, green, blue
+//       4           red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
+// can be queried for an extremely brief, end-user unfriendly explanation
+// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
+// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+//    1. easy to use
+//    2. easy to maintain
+//    3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy to use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// make more explicit reasons why performance can't be emphasized.
+//
+//    - Portable ("ease of use")
+//    - Small footprint ("easy to maintain")
+//    - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// The output of the JPEG decoder is slightly different from versions where
+// SIMD support was introduced (that is, for versions before 1.49). The
+// difference is only +-1 in the 8-bit RGB channels, and only on a small
+// fraction of pixels. You can force the pre-1.49 behavior by defining
+// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
+// and hence cost some performance.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support   (disable by defining STBI_NO_HDR)
+//
+// stb_image now supports loading HDR images in general, and currently
+// the Radiance .HDR file format, although the support is provided
+// generically. You can still load any file through the existing interface;
+// if you attempt to load an HDR file, it will be automatically remapped to
+// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+//     stbi_hdr_to_ldr_gamma(2.2f);
+//     stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+//     stbi_ldr_to_hdr_scale(1.0f);
+//     stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+//     stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// By default we convert iphone-formatted PNGs back to RGB, even though
+// they are internally encoded differently. You can disable this conversion
+// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
+// you will always just get the native iphone "format" through (which
+// is BGR stored in RGB).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+   STBI_default = 0, // only used for req_comp
+
+   STBI_grey       = 1,
+   STBI_grey_alpha = 2,
+   STBI_rgb        = 3,
+   STBI_rgb_alpha  = 4
+};
+
+typedef unsigned char stbi_uc;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PRIMARY API - works on images of any type
+//
+
+//
+// load image by filename, open file, or memory buffer
+//
+
+typedef struct
+{
+   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
+   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
+} stbi_io_callbacks;
+
+STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *comp, int req_comp);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
+// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+#ifndef STBI_NO_LINEAR
+   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+
+   #ifndef STBI_NO_STDIO
+   STBIDEF float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
+   #endif
+#endif
+
+#ifndef STBI_NO_HDR
+   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
+   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
+   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename);
+STBIDEF int      stbi_is_hdr_from_file(FILE *f);
+#endif // STBI_NO_STDIO
+
+
+// get a VERY brief reason for failure
+// NOT THREADSAFE
+STBIDEF const char *stbi_failure_reason  (void);
+
+// free the loaded image -- this is just free()
+STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
+
+// get image dimensions & components without fully decoding
+STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
+
+#endif
+
+
+
+// for image formats that explicitly notate that they have premultiplied alpha,
+// we just return the colors as stored in the file. set this flag to force
+// unpremultiplication. results are undefined if the unpremultiply overflow.
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+// indicate whether we should process iphone images back to canonical format,
+// or just pass them through "as-is"
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+// flip the image vertically, so the first pixel in the output array is the bottom left
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+// ZLIB client - used by PNG, available for other purposes
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
+STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+////   end header file   /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+  || defined(STBI_ONLY_ZLIB)
+   #ifndef STBI_ONLY_JPEG
+   #define STBI_NO_JPEG
+   #endif
+   #ifndef STBI_ONLY_PNG
+   #define STBI_NO_PNG
+   #endif
+   #ifndef STBI_ONLY_BMP
+   #define STBI_NO_BMP
+   #endif
+   #ifndef STBI_ONLY_PSD
+   #define STBI_NO_PSD
+   #endif
+   #ifndef STBI_ONLY_TGA
+   #define STBI_NO_TGA
+   #endif
+   #ifndef STBI_ONLY_GIF
+   #define STBI_NO_GIF
+   #endif
+   #ifndef STBI_ONLY_HDR
+   #define STBI_NO_HDR
+   #endif
+   #ifndef STBI_ONLY_PIC
+   #define STBI_NO_PIC
+   #endif
+   #ifndef STBI_ONLY_PNM
+   #define STBI_NO_PNM
+   #endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h>  // ldexp
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+
+#ifndef _MSC_VER
+   #ifdef __cplusplus
+   #define stbi_inline inline
+   #else
+   #define stbi_inline
+   #endif
+#else
+   #define stbi_inline __forceinline
+#endif
+
+
+#ifdef _MSC_VER
+typedef unsigned short stbi__uint16;
+typedef   signed short stbi__int16;
+typedef unsigned int   stbi__uint32;
+typedef   signed int   stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t  stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t  stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v)  (void)(v)
+#else
+#define STBI_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+   #define stbi_lrot(x,y)  _lrotl(x,y)
+#else
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz)           malloc(sz)
+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
+#define STBI_FREE(p)              free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// NOTE: not clear do we actually need this for the 64-bit path?
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
+// this is just broken and gcc are jerks for not fixing it properly
+// http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400  // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+   int info[4];
+   __cpuid(info,1);
+   return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+   int res;
+   __asm {
+      mov  eax,1
+      cpuid
+      mov  res,edx
+   }
+   return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+static int stbi__sse2_available()
+{
+   int info3 = stbi__cpuid3();
+   return ((info3 >> 26) & 1) != 0;
+}
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+static int stbi__sse2_available()
+{
+#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
+   // GCC 4.8+ has a nice way to do this
+   return __builtin_cpu_supports("sse2");
+#else
+   // portable way to do this, preferably without using GCC inline ASM?
+   // just bail for now.
+   return 0;
+#endif
+}
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+// assume GCC or Clang on ARM targets
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+///////////////////////////////////////////////
+//
+//  stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+   stbi__uint32 img_x, img_y;
+   int img_n, img_out_n;
+
+   stbi_io_callbacks io;
+   void *io_user_data;
+
+   int read_from_callbacks;
+   int buflen;
+   stbi_uc buffer_start[128];
+
+   stbi_uc *img_buffer, *img_buffer_end;
+   stbi_uc *img_buffer_original, *img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context *s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
+{
+   s->io.read = NULL;
+   s->read_from_callbacks = 0;
+   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
+   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
+{
+   s->io = *c;
+   s->io_user_data = user;
+   s->buflen = sizeof(s->buffer_start);
+   s->read_from_callbacks = 1;
+   s->img_buffer_original = s->buffer_start;
+   stbi__refill_buffer(s);
+   s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void *user, char *data, int size)
+{
+   return (int) fread(data,1,size,(FILE*) user);
+}
+
+static void stbi__stdio_skip(void *user, int n)
+{
+   fseek((FILE*) user, n, SEEK_CUR);
+}
+
+static int stbi__stdio_eof(void *user)
+{
+   return feof((FILE*) user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+   stbi__stdio_read,
+   stbi__stdio_skip,
+   stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context *s, FILE *f)
+{
+   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context *s)
+{
+   // conceptually rewind SHOULD rewind to the beginning of the stream,
+   // but we just rewind to the beginning of the initial buffer, because
+   // we only use it after doing 'test', which only ever looks at at most 92 bytes
+   s->img_buffer = s->img_buffer_original;
+   s->img_buffer_end = s->img_buffer_original_end;
+}
+
+#ifndef STBI_NO_JPEG
+static int      stbi__jpeg_test(stbi__context *s);
+static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int      stbi__png_test(stbi__context *s);
+static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_BMP
+static int      stbi__bmp_test(stbi__context *s);
+static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int      stbi__tga_test(stbi__context *s);
+static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int      stbi__psd_test(stbi__context *s);
+static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static int      stbi__hdr_test(stbi__context *s);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int      stbi__pic_test(stbi__context *s);
+static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int      stbi__gif_test(stbi__context *s);
+static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int      stbi__pnm_test(stbi__context *s);
+static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+// this is not threadsafe
+static const char *stbi__g_failure_reason;
+
+STBIDEF const char *stbi_failure_reason(void)
+{
+   return stbi__g_failure_reason;
+}
+
+static int stbi__err(const char *str)
+{
+   stbi__g_failure_reason = str;
+   return 0;
+}
+
+static void *stbi__malloc(size_t size)
+{
+    return STBI_MALLOC(size);
+}
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+   #define stbi__err(x,y)  0
+#elif defined(STBI_FAILURE_USERMSG)
+   #define stbi__err(x,y)  stbi__err(y)
+#else
+   #define stbi__err(x,y)  stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void *retval_from_stbi_load)
+{
+   STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+    stbi__vertically_flip_on_load = flag_true_if_should_flip;
+}
+
+static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_PNG
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp);
+   #endif
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp);
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
+      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+   }
+   #endif
+
+   #ifndef STBI_NO_TGA
+   // test tga last because it's a crappy test!
+   if (stbi__tga_test(s))
+      return stbi__tga_load(s,x,y,comp,req_comp);
+   #endif
+
+   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
+
+   if (stbi__vertically_flip_on_load && result != NULL) {
+      int w = *x, h = *y;
+      int depth = req_comp ? req_comp : *comp;
+      int row,col,z;
+      stbi_uc temp;
+
+      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
+      for (row = 0; row < (h>>1); row++) {
+         for (col = 0; col < w; col++) {
+            for (z = 0; z < depth; z++) {
+               temp = result[(row * w + col) * depth + z];
+               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
+               result[((h - row - 1) * w + col) * depth + z] = temp;
+            }
+         }
+      }
+   }
+
+   return result;
+}
+
+#ifndef STBI_NO_HDR
+static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
+{
+   if (stbi__vertically_flip_on_load && result != NULL) {
+      int w = *x, h = *y;
+      int depth = req_comp ? req_comp : *comp;
+      int row,col,z;
+      float temp;
+
+      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
+      for (row = 0; row < (h>>1); row++) {
+         for (col = 0; col < w; col++) {
+            for (z = 0; z < depth; z++) {
+               temp = result[(row * w + col) * depth + z];
+               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
+               result[((h - row - 1) * w + col) * depth + z] = temp;
+            }
+         }
+      }
+   }
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+static FILE *stbi__fopen(char const *filename, char const *mode)
+{
+   FILE *f;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+   if (0 != fopen_s(&f, filename, mode))
+      f=0;
+#else
+   f = fopen(filename, mode);
+#endif
+   return f;
+}
+
+
+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   unsigned char *result;
+   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_flip(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_flip(&s,x,y,comp,req_comp);
+}
+
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__load_flip(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *data;
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
+      if (hdr_data)
+         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
+      return hdr_data;
+   }
+   #endif
+   data = stbi__load_flip(s, x, y, comp, req_comp);
+   if (data)
+      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   float *result;
+   FILE *f = stbi__fopen(filename, "rb");
+   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_file(&s,f);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(buffer);
+   STBI_NOTUSED(len);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   int result=0;
+   if (f) {
+      result = stbi_is_hdr_from_file(f);
+      fclose(f);
+   }
+   return result;
+}
+
+STBIDEF int      stbi_is_hdr_from_file(FILE *f)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_file(&s,f);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(f);
+   return 0;
+   #endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(clbk);
+   STBI_NOTUSED(user);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
+
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
+
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+   STBI__SCAN_load=0,
+   STBI__SCAN_type,
+   STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context *s)
+{
+   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+   if (n == 0) {
+      // at end of file, treat same as if from memory, but need to handle case
+      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+      s->read_from_callbacks = 0;
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start+1;
+      *s->img_buffer = 0;
+   } else {
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start + n;
+   }
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context *s)
+{
+   if (s->img_buffer < s->img_buffer_end)
+      return *s->img_buffer++;
+   if (s->read_from_callbacks) {
+      stbi__refill_buffer(s);
+      return *s->img_buffer++;
+   }
+   return 0;
+}
+
+stbi_inline static int stbi__at_eof(stbi__context *s)
+{
+   if (s->io.read) {
+      if (!(s->io.eof)(s->io_user_data)) return 0;
+      // if feof() is true, check if buffer = end
+      // special case: we've only got the special 0 character at the end
+      if (s->read_from_callbacks == 0) return 1;
+   }
+
+   return s->img_buffer >= s->img_buffer_end;
+}
+
+static void stbi__skip(stbi__context *s, int n)
+{
+   if (n < 0) {
+      s->img_buffer = s->img_buffer_end;
+      return;
+   }
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         s->img_buffer = s->img_buffer_end;
+         (s->io.skip)(s->io_user_data, n - blen);
+         return;
+      }
+   }
+   s->img_buffer += n;
+}
+
+static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
+{
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         int res, count;
+
+         memcpy(buffer, s->img_buffer, blen);
+
+         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
+         res = (count == (n-blen));
+         s->img_buffer = s->img_buffer_end;
+         return res;
+      }
+   }
+
+   if (s->img_buffer+n <= s->img_buffer_end) {
+      memcpy(buffer, s->img_buffer, n);
+      s->img_buffer += n;
+      return 1;
+   } else
+      return 0;
+}
+
+static int stbi__get16be(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return (z << 8) + stbi__get8(s);
+}
+
+static stbi__uint32 stbi__get32be(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16be(s);
+   return (z << 16) + stbi__get16be(s);
+}
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16le(s);
+   return z + (stbi__get16le(s) << 16);
+}
+#endif
+
+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  generic converter from built-in img_n to req_comp
+//    individual types do this automatically as much as possible (e.g. jpeg
+//    does all cases internally since it needs to colorspace convert anyway,
+//    and it never has alpha, so very few cases ). png can automatically
+//    interleave an alpha=255 channel, but falls back to this for other cases
+//
+//  assume data buffer is malloced, so malloc a new one and free that one
+//  only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+
+static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   unsigned char *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (unsigned char *) stbi__malloc(req_comp * x * y);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      unsigned char *src  = data + j * x * img_n   ;
+      unsigned char *dest = good + j * x * req_comp;
+
+      #define COMBO(a,b)  ((a)*8+(b))
+      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (COMBO(img_n, req_comp)) {
+         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
+         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
+         CASE(2,1) dest[0]=src[0]; break;
+         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
+         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
+         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
+         CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
+         CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
+         CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
+         CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
+         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
+         default: STBI_ASSERT(0);
+      }
+      #undef CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
+{
+   int i,k,n;
+   float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
+   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+      }
+      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x)   ((int) (x))
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
+{
+   int i,k,n;
+   stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
+   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+      if (k < comp) {
+         float z = data[i*comp+k] * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  "baseline" JPEG/JFIF decoder
+//
+//    simple implementation
+//      - doesn't support delayed output of y-dimension
+//      - simple interface (only one output format: 8-bit interleaved RGB)
+//      - doesn't try to recover corrupt jpegs
+//      - doesn't allow partial loading, loading multiple at once
+//      - still fast on x86 (copying globals into locals doesn't help x86)
+//      - allocates lots of intermediate memory (full size of all components)
+//        - non-interleaved case requires this anyway
+//        - allows good upsampling (see next)
+//    high-quality
+//      - upsampled channels are bilinearly interpolated, even across blocks
+//      - quality integer IDCT derived from IJG's 'slow'
+//    performance
+//      - fast huffman; reasonable integer IDCT
+//      - some SIMD kernels for common paths on targets with SSE2/NEON
+//      - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+   stbi_uc  fast[1 << FAST_BITS];
+   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+   stbi__uint16 code[256];
+   stbi_uc  values[256];
+   stbi_uc  size[257];
+   unsigned int maxcode[18];
+   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+   stbi__context *s;
+   stbi__huffman huff_dc[4];
+   stbi__huffman huff_ac[4];
+   stbi_uc dequant[4][64];
+   stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+// sizes for components, interleaved MCUs
+   int img_h_max, img_v_max;
+   int img_mcu_x, img_mcu_y;
+   int img_mcu_w, img_mcu_h;
+
+// definition of jpeg image component
+   struct
+   {
+      int id;
+      int h,v;
+      int tq;
+      int hd,ha;
+      int dc_pred;
+
+      int x,y,w2,h2;
+      stbi_uc *data;
+      void *raw_data, *raw_coeff;
+      stbi_uc *linebuf;
+      short   *coeff;   // progressive only
+      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
+   } img_comp[4];
+
+   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
+   int            code_bits;   // number of valid bits
+   unsigned char  marker;      // marker seen while filling entropy buffer
+   int            nomore;      // flag if we saw a marker so must stop
+
+   int            progressive;
+   int            spec_start;
+   int            spec_end;
+   int            succ_high;
+   int            succ_low;
+   int            eob_run;
+   int            rgb;
+
+   int scan_n, order[4];
+   int restart_interval, todo;
+
+// kernels
+   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
+   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
+   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman *h, int *count)
+{
+   int i,j,k=0,code;
+   // build size list for each symbol (from JPEG spec)
+   for (i=0; i < 16; ++i)
+      for (j=0; j < count[i]; ++j)
+         h->size[k++] = (stbi_uc) (i+1);
+   h->size[k] = 0;
+
+   // compute actual symbols (from jpeg spec)
+   code = 0;
+   k = 0;
+   for(j=1; j <= 16; ++j) {
+      // compute delta to add to code to compute symbol id
+      h->delta[j] = k - code;
+      if (h->size[k] == j) {
+         while (h->size[k] == j)
+            h->code[k++] = (stbi__uint16) (code++);
+         if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+      }
+      // compute largest code + 1 for this size, preshifted as needed later
+      h->maxcode[j] = code << (16-j);
+      code <<= 1;
+   }
+   h->maxcode[j] = 0xffffffff;
+
+   // build non-spec acceleration table; 255 is flag for not-accelerated
+   memset(h->fast, 255, 1 << FAST_BITS);
+   for (i=0; i < k; ++i) {
+      int s = h->size[i];
+      if (s <= FAST_BITS) {
+         int c = h->code[i] << (FAST_BITS-s);
+         int m = 1 << (FAST_BITS-s);
+         for (j=0; j < m; ++j) {
+            h->fast[c+j] = (stbi_uc) i;
+         }
+      }
+   }
+   return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
+{
+   int i;
+   for (i=0; i < (1 << FAST_BITS); ++i) {
+      stbi_uc fast = h->fast[i];
+      fast_ac[i] = 0;
+      if (fast < 255) {
+         int rs = h->values[fast];
+         int run = (rs >> 4) & 15;
+         int magbits = rs & 15;
+         int len = h->size[fast];
+
+         if (magbits && len + magbits <= FAST_BITS) {
+            // magnitude code followed by receive_extend code
+            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+            int m = 1 << (magbits - 1);
+            if (k < m) k += (-1 << magbits) + 1;
+            // if the result is small enough, we can fit it in fast_ac table
+            if (k >= -128 && k <= 127)
+               fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
+         }
+      }
+   }
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
+{
+   do {
+      int b = j->nomore ? 0 : stbi__get8(j->s);
+      if (b == 0xff) {
+         int c = stbi__get8(j->s);
+         if (c != 0) {
+            j->marker = (unsigned char) c;
+            j->nomore = 1;
+            return;
+         }
+      }
+      j->code_buffer |= b << (24 - j->code_bits);
+      j->code_bits += 8;
+   } while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
+{
+   unsigned int temp;
+   int c,k;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   // look at the top FAST_BITS and determine what symbol ID it is,
+   // if the code is <= FAST_BITS
+   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+   k = h->fast[c];
+   if (k < 255) {
+      int s = h->size[k];
+      if (s > j->code_bits)
+         return -1;
+      j->code_buffer <<= s;
+      j->code_bits -= s;
+      return h->values[k];
+   }
+
+   // naive test is to shift the code_buffer down so k bits are
+   // valid, then test against maxcode. To speed this up, we've
+   // preshifted maxcode left so that it has (16-k) 0s at the
+   // end; in other words, regardless of the number of bits, it
+   // wants to be compared against something shifted to have 16;
+   // that way we don't need to shift inside the loop.
+   temp = j->code_buffer >> 16;
+   for (k=FAST_BITS+1 ; ; ++k)
+      if (temp < h->maxcode[k])
+         break;
+   if (k == 17) {
+      // error! code not found
+      j->code_bits -= 16;
+      return -1;
+   }
+
+   if (k > j->code_bits)
+      return -1;
+
+   // convert the huffman code to the symbol id
+   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+   // convert the id to a symbol
+   j->code_bits -= k;
+   j->code_buffer <<= k;
+   return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   int sgn;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+
+   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+   k = stbi_lrot(j->code_buffer, n);
+   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k + (stbi__jbias[n] & ~sgn);
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   k = stbi_lrot(j->code_buffer, n);
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
+{
+   unsigned int k;
+   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+   k = j->code_buffer;
+   j->code_buffer <<= 1;
+   --j->code_bits;
+   return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static stbi_uc stbi__jpeg_dezigzag[64+15] =
+{
+    0,  1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63,
+   // let corrupt input sample past end
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
+{
+   int diff,dc,k;
+   int t;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+   t = stbi__jpeg_huff_decode(j, hdc);
+   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+
+   // 0 all the ac values now so we can do it 32-bits at a time
+   memset(data,0,64*sizeof(data[0]));
+
+   diff = t ? stbi__extend_receive(j, t) : 0;
+   dc = j->img_comp[b].dc_pred + diff;
+   j->img_comp[b].dc_pred = dc;
+   data[0] = (short) (dc * dequant[0]);
+
+   // decode AC components, see JPEG spec
+   k = 1;
+   do {
+      unsigned int zig;
+      int c,r,s;
+      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+      r = fac[c];
+      if (r) { // fast-AC path
+         k += (r >> 4) & 15; // run
+         s = r & 15; // combined length
+         j->code_buffer <<= s;
+         j->code_bits -= s;
+         // decode into unzigzag'd location
+         zig = stbi__jpeg_dezigzag[k++];
+         data[zig] = (short) ((r >> 8) * dequant[zig]);
+      } else {
+         int rs = stbi__jpeg_huff_decode(j, hac);
+         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+         s = rs & 15;
+         r = rs >> 4;
+         if (s == 0) {
+            if (rs != 0xf0) break; // end block
+            k += 16;
+         } else {
+            k += r;
+            // decode into unzigzag'd location
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
+         }
+      }
+   } while (k < 64);
+   return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
+{
+   int diff,dc;
+   int t;
+   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   if (j->succ_high == 0) {
+      // first scan for DC coefficient, must be first
+      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
+      t = stbi__jpeg_huff_decode(j, hdc);
+      diff = t ? stbi__extend_receive(j, t) : 0;
+
+      dc = j->img_comp[b].dc_pred + diff;
+      j->img_comp[b].dc_pred = dc;
+      data[0] = (short) (dc << j->succ_low);
+   } else {
+      // refinement scan for DC coefficient
+      if (stbi__jpeg_get_bit(j))
+         data[0] += (short) (1 << j->succ_low);
+   }
+   return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
+{
+   int k;
+   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->succ_high == 0) {
+      int shift = j->succ_low;
+
+      if (j->eob_run) {
+         --j->eob_run;
+         return 1;
+      }
+
+      k = j->spec_start;
+      do {
+         unsigned int zig;
+         int c,r,s;
+         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+         r = fac[c];
+         if (r) { // fast-AC path
+            k += (r >> 4) & 15; // run
+            s = r & 15; // combined length
+            j->code_buffer <<= s;
+            j->code_bits -= s;
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) ((r >> 8) << shift);
+         } else {
+            int rs = stbi__jpeg_huff_decode(j, hac);
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r);
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  --j->eob_run;
+                  break;
+               }
+               k += 16;
+            } else {
+               k += r;
+               zig = stbi__jpeg_dezigzag[k++];
+               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+            }
+         }
+      } while (k <= j->spec_end);
+   } else {
+      // refinement scan for these AC coefficients
+
+      short bit = (short) (1 << j->succ_low);
+
+      if (j->eob_run) {
+         --j->eob_run;
+         for (k = j->spec_start; k <= j->spec_end; ++k) {
+            short *p = &data[stbi__jpeg_dezigzag[k]];
+            if (*p != 0)
+               if (stbi__jpeg_get_bit(j))
+                  if ((*p & bit)==0) {
+                     if (*p > 0)
+                        *p += bit;
+                     else
+                        *p -= bit;
+                  }
+         }
+      } else {
+         k = j->spec_start;
+         do {
+            int r,s;
+            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r) - 1;
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  r = 64; // force end of block
+               } else {
+                  // r=15 s=0 should write 16 0s, so we just do
+                  // a run of 15 0s and then write s (which is 0),
+                  // so we don't have to do anything special here
+               }
+            } else {
+               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+               // sign bit
+               if (stbi__jpeg_get_bit(j))
+                  s = bit;
+               else
+                  s = -bit;
+            }
+
+            // advance by r
+            while (k <= j->spec_end) {
+               short *p = &data[stbi__jpeg_dezigzag[k++]];
+               if (*p != 0) {
+                  if (stbi__jpeg_get_bit(j))
+                     if ((*p & bit)==0) {
+                        if (*p > 0)
+                           *p += bit;
+                        else
+                           *p -= bit;
+                     }
+               } else {
+                  if (r == 0) {
+                     *p = (short) s;
+                     break;
+                  }
+                  --r;
+               }
+            }
+         } while (k <= j->spec_end);
+      }
+   }
+   return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+   // trick to use a single test to catch both cases
+   if ((unsigned int) x > 255) {
+      if (x < 0) return 0;
+      if (x > 255) return 255;
+   }
+   return (stbi_uc) x;
+}
+
+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x)  ((x) << 12)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+   p2 = s2;                                    \
+   p3 = s6;                                    \
+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
+   p2 = s0;                                    \
+   p3 = s4;                                    \
+   t0 = stbi__fsh(p2+p3);                      \
+   t1 = stbi__fsh(p2-p3);                      \
+   x0 = t0+t3;                                 \
+   x3 = t0-t3;                                 \
+   x1 = t1+t2;                                 \
+   x2 = t1-t2;                                 \
+   t0 = s7;                                    \
+   t1 = s5;                                    \
+   t2 = s3;                                    \
+   t3 = s1;                                    \
+   p3 = t0+t2;                                 \
+   p4 = t1+t3;                                 \
+   p1 = t0+t3;                                 \
+   p2 = t1+t2;                                 \
+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
+   t0 = t0*stbi__f2f( 0.298631336f);           \
+   t1 = t1*stbi__f2f( 2.053119869f);           \
+   t2 = t2*stbi__f2f( 3.072711026f);           \
+   t3 = t3*stbi__f2f( 1.501321110f);           \
+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
+   p3 = p3*stbi__f2f(-1.961570560f);           \
+   p4 = p4*stbi__f2f(-0.390180644f);           \
+   t3 += p1+p4;                                \
+   t2 += p2+p3;                                \
+   t1 += p2+p4;                                \
+   t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
+{
+   int i,val[64],*v=val;
+   stbi_uc *o;
+   short *d = data;
+
+   // columns
+   for (i=0; i < 8; ++i,++d, ++v) {
+      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
+           && d[40]==0 && d[48]==0 && d[56]==0) {
+         //    no shortcut                 0     seconds
+         //    (1|2|3|4|5|6|7)==0          0     seconds
+         //    all separate               -0.047 seconds
+         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
+         int dcterm = d[0] << 2;
+         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+      } else {
+         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
+         // constants scaled things up by 1<<12; let's bring them back
+         // down, but keep 2 extra bits of precision
+         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+         v[ 0] = (x0+t3) >> 10;
+         v[56] = (x0-t3) >> 10;
+         v[ 8] = (x1+t2) >> 10;
+         v[48] = (x1-t2) >> 10;
+         v[16] = (x2+t1) >> 10;
+         v[40] = (x2-t1) >> 10;
+         v[24] = (x3+t0) >> 10;
+         v[32] = (x3-t0) >> 10;
+      }
+   }
+
+   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
+      // no fast case since the first 1D IDCT spread components out
+      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
+      // constants scaled things up by 1<<12, plus we had 1<<2 from first
+      // loop, plus horizontal and vertical each scale by sqrt(8) so together
+      // we've got an extra 1<<3, so 1<<17 total we need to remove.
+      // so we want to round that, which means adding 0.5 * 1<<17,
+      // aka 65536. Also, we'll end up with -128 to 127 that we want
+      // to encode as 0..255 by adding 128, so we'll add that before the shift
+      x0 += 65536 + (128<<17);
+      x1 += 65536 + (128<<17);
+      x2 += 65536 + (128<<17);
+      x3 += 65536 + (128<<17);
+      // tried computing the shifts into temps, or'ing the temps to see
+      // if any were out of range, but that was slower
+      o[0] = stbi__clamp((x0+t3) >> 17);
+      o[7] = stbi__clamp((x0-t3) >> 17);
+      o[1] = stbi__clamp((x1+t2) >> 17);
+      o[6] = stbi__clamp((x1-t2) >> 17);
+      o[2] = stbi__clamp((x2+t1) >> 17);
+      o[5] = stbi__clamp((x2-t1) >> 17);
+      o[3] = stbi__clamp((x3+t0) >> 17);
+      o[4] = stbi__clamp((x3-t0) >> 17);
+   }
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   // This is constructed to match our regular (generic) integer IDCT exactly.
+   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+   __m128i tmp;
+
+   // dot product constant: even elems=x, odd elems=y
+   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
+   // out(1) = c1[even]*x + c1[odd]*y
+   #define dct_rot(out0,out1, x,y,c0,c1) \
+      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+   // out = in << 12  (in 16-bit, out 32-bit)
+   #define dct_widen(out, in) \
+      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+   // wide add
+   #define dct_wadd(out, a, b) \
+      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+   // wide sub
+   #define dct_wsub(out, a, b) \
+      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+   // butterfly a/b, add bias, then shift by "s" and pack
+   #define dct_bfly32o(out0, out1, a,b,bias,s) \
+      { \
+         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+         dct_wadd(sum, abiased, b); \
+         dct_wsub(dif, abiased, b); \
+         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+      }
+
+   // 8-bit interleave step (for transposes)
+   #define dct_interleave8(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi8(a, b); \
+      b = _mm_unpackhi_epi8(tmp, b)
+
+   // 16-bit interleave step (for transposes)
+   #define dct_interleave16(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi16(a, b); \
+      b = _mm_unpackhi_epi16(tmp, b)
+
+   #define dct_pass(bias,shift) \
+      { \
+         /* even part */ \
+         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+         __m128i sum04 = _mm_add_epi16(row0, row4); \
+         __m128i dif04 = _mm_sub_epi16(row0, row4); \
+         dct_widen(t0e, sum04); \
+         dct_widen(t1e, dif04); \
+         dct_wadd(x0, t0e, t3e); \
+         dct_wsub(x3, t0e, t3e); \
+         dct_wadd(x1, t1e, t2e); \
+         dct_wsub(x2, t1e, t2e); \
+         /* odd part */ \
+         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+         __m128i sum17 = _mm_add_epi16(row1, row7); \
+         __m128i sum35 = _mm_add_epi16(row3, row5); \
+         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+         dct_wadd(x4, y0o, y4o); \
+         dct_wadd(x5, y1o, y5o); \
+         dct_wadd(x6, y2o, y5o); \
+         dct_wadd(x7, y3o, y4o); \
+         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+      }
+
+   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
+   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
+   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
+   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
+   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
+
+   // rounding biases in column/row passes, see stbi__idct_block for explanation.
+   __m128i bias_0 = _mm_set1_epi32(512);
+   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
+
+   // load
+   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
+   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
+   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
+   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
+   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
+   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
+   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
+   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
+
+   // column pass
+   dct_pass(bias_0, 10);
+
+   {
+      // 16bit 8x8 transpose pass 1
+      dct_interleave16(row0, row4);
+      dct_interleave16(row1, row5);
+      dct_interleave16(row2, row6);
+      dct_interleave16(row3, row7);
+
+      // transpose pass 2
+      dct_interleave16(row0, row2);
+      dct_interleave16(row1, row3);
+      dct_interleave16(row4, row6);
+      dct_interleave16(row5, row7);
+
+      // transpose pass 3
+      dct_interleave16(row0, row1);
+      dct_interleave16(row2, row3);
+      dct_interleave16(row4, row5);
+      dct_interleave16(row6, row7);
+   }
+
+   // row pass
+   dct_pass(bias_1, 17);
+
+   {
+      // pack
+      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+      __m128i p1 = _mm_packus_epi16(row2, row3);
+      __m128i p2 = _mm_packus_epi16(row4, row5);
+      __m128i p3 = _mm_packus_epi16(row6, row7);
+
+      // 8bit 8x8 transpose pass 1
+      dct_interleave8(p0, p2); // a0e0a1e1...
+      dct_interleave8(p1, p3); // c0g0c1g1...
+
+      // transpose pass 2
+      dct_interleave8(p0, p1); // a0c0e0g0...
+      dct_interleave8(p2, p3); // b0d0f0h0...
+
+      // transpose pass 3
+      dct_interleave8(p0, p2); // a0b0c0d0...
+      dct_interleave8(p1, p3); // a4b4c4d4...
+
+      // store
+      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
+   }
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
+   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
+   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
+   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
+   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
+   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+// wide add
+#define dct_wadd(out, a, b) \
+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+   { \
+      dct_wadd(sum, a, b); \
+      dct_wsub(dif, a, b); \
+      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+   }
+
+#define dct_pass(shiftop, shift) \
+   { \
+      /* even part */ \
+      int16x8_t sum26 = vaddq_s16(row2, row6); \
+      dct_long_mul(p1e, sum26, rot0_0); \
+      dct_long_mac(t2e, p1e, row6, rot0_1); \
+      dct_long_mac(t3e, p1e, row2, rot0_2); \
+      int16x8_t sum04 = vaddq_s16(row0, row4); \
+      int16x8_t dif04 = vsubq_s16(row0, row4); \
+      dct_widen(t0e, sum04); \
+      dct_widen(t1e, dif04); \
+      dct_wadd(x0, t0e, t3e); \
+      dct_wsub(x3, t0e, t3e); \
+      dct_wadd(x1, t1e, t2e); \
+      dct_wsub(x2, t1e, t2e); \
+      /* odd part */ \
+      int16x8_t sum15 = vaddq_s16(row1, row5); \
+      int16x8_t sum17 = vaddq_s16(row1, row7); \
+      int16x8_t sum35 = vaddq_s16(row3, row5); \
+      int16x8_t sum37 = vaddq_s16(row3, row7); \
+      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+      dct_long_mul(p5o, sumodd, rot1_0); \
+      dct_long_mac(p1o, p5o, sum17, rot1_1); \
+      dct_long_mac(p2o, p5o, sum35, rot1_2); \
+      dct_long_mul(p3o, sum37, rot2_0); \
+      dct_long_mul(p4o, sum15, rot2_1); \
+      dct_wadd(sump13o, p1o, p3o); \
+      dct_wadd(sump24o, p2o, p4o); \
+      dct_wadd(sump23o, p2o, p3o); \
+      dct_wadd(sump14o, p1o, p4o); \
+      dct_long_mac(x4, sump13o, row7, rot3_0); \
+      dct_long_mac(x5, sump24o, row5, rot3_1); \
+      dct_long_mac(x6, sump23o, row3, rot3_2); \
+      dct_long_mac(x7, sump14o, row1, rot3_3); \
+      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+   }
+
+   // load
+   row0 = vld1q_s16(data + 0*8);
+   row1 = vld1q_s16(data + 1*8);
+   row2 = vld1q_s16(data + 2*8);
+   row3 = vld1q_s16(data + 3*8);
+   row4 = vld1q_s16(data + 4*8);
+   row5 = vld1q_s16(data + 5*8);
+   row6 = vld1q_s16(data + 6*8);
+   row7 = vld1q_s16(data + 7*8);
+
+   // add DC bias
+   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+   // column pass
+   dct_pass(vrshrn_n_s32, 10);
+
+   // 16bit 8x8 transpose
+   {
+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+      // pass 1
+      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+      dct_trn16(row2, row3);
+      dct_trn16(row4, row5);
+      dct_trn16(row6, row7);
+
+      // pass 2
+      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+      dct_trn32(row1, row3);
+      dct_trn32(row4, row6);
+      dct_trn32(row5, row7);
+
+      // pass 3
+      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+      dct_trn64(row1, row5);
+      dct_trn64(row2, row6);
+      dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+   }
+
+   // row pass
+   // vrshrn_n_s32 only supports shifts up to 16, we need
+   // 17. so do a non-rounding shift of 16 first then follow
+   // up with a rounding shift by 1.
+   dct_pass(vshrn_n_s32, 16);
+
+   {
+      // pack and round
+      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+      // again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+      // sadly can't use interleaved stores here since we only write
+      // 8 bytes to each scan line!
+
+      // 8x8 8-bit transpose pass 1
+      dct_trn8_8(p0, p1);
+      dct_trn8_8(p2, p3);
+      dct_trn8_8(p4, p5);
+      dct_trn8_8(p6, p7);
+
+      // pass 2
+      dct_trn8_16(p0, p2);
+      dct_trn8_16(p1, p3);
+      dct_trn8_16(p4, p6);
+      dct_trn8_16(p5, p7);
+
+      // pass 3
+      dct_trn8_32(p0, p4);
+      dct_trn8_32(p1, p5);
+      dct_trn8_32(p2, p6);
+      dct_trn8_32(p3, p7);
+
+      // store
+      vst1_u8(out, p0); out += out_stride;
+      vst1_u8(out, p1); out += out_stride;
+      vst1_u8(out, p2); out += out_stride;
+      vst1_u8(out, p3); out += out_stride;
+      vst1_u8(out, p4); out += out_stride;
+      vst1_u8(out, p5); out += out_stride;
+      vst1_u8(out, p6); out += out_stride;
+      vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+   }
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none  0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg *j)
+{
+   stbi_uc x;
+   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+   x = stbi__get8(j->s);
+   if (x != 0xff) return STBI__MARKER_none;
+   while (x == 0xff)
+      x = stbi__get8(j->s);
+   return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg *j)
+{
+   j->code_bits = 0;
+   j->code_buffer = 0;
+   j->nomore = 0;
+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
+   j->marker = STBI__MARKER_none;
+   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+   j->eob_run = 0;
+   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+   // since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
+{
+   stbi__jpeg_reset(z);
+   if (!z->progressive) {
+      if (z->scan_n == 1) {
+         int i,j;
+         STBI_SIMD_ALIGN(short, data[64]);
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               int ha = z->img_comp[n].ha;
+               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  // if it's NOT a restart, then just bail, so we get corrupt data
+                  // rather than no data
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         STBI_SIMD_ALIGN(short, data[64]);
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x)*8;
+                        int y2 = (j*z->img_comp[n].v + y)*8;
+                        int ha = z->img_comp[n].ha;
+                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   } else {
+      if (z->scan_n == 1) {
+         int i,j;
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               if (z->spec_start == 0) {
+                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                     return 0;
+               } else {
+                  int ha = z->img_comp[n].ha;
+                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+                     return 0;
+               }
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x);
+                        int y2 = (j*z->img_comp[n].v + y);
+                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                           return 0;
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   }
+}
+
+static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
+{
+   int i;
+   for (i=0; i < 64; ++i)
+      data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg *z)
+{
+   if (z->progressive) {
+      // dequantize and idct the data
+      int i,j,n;
+      for (n=0; n < z->s->img_n; ++n) {
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+            }
+         }
+      }
+   }
+}
+
+static int stbi__process_marker(stbi__jpeg *z, int m)
+{
+   int L;
+   switch (m) {
+      case STBI__MARKER_none: // no marker found
+         return stbi__err("expected marker","Corrupt JPEG");
+
+      case 0xDD: // DRI - specify restart interval
+         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
+         z->restart_interval = stbi__get16be(z->s);
+         return 1;
+
+      case 0xDB: // DQT - define quantization table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            int q = stbi__get8(z->s);
+            int p = q >> 4;
+            int t = q & 15,i;
+            if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
+            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+            for (i=0; i < 64; ++i)
+               z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
+            L -= 65;
+         }
+         return L==0;
+
+      case 0xC4: // DHT - define huffman table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            stbi_uc *v;
+            int sizes[16],i,n=0;
+            int q = stbi__get8(z->s);
+            int tc = q >> 4;
+            int th = q & 15;
+            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
+            for (i=0; i < 16; ++i) {
+               sizes[i] = stbi__get8(z->s);
+               n += sizes[i];
+            }
+            L -= 17;
+            if (tc == 0) {
+               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
+               v = z->huff_dc[th].values;
+            } else {
+               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
+               v = z->huff_ac[th].values;
+            }
+            for (i=0; i < n; ++i)
+               v[i] = stbi__get8(z->s);
+            if (tc != 0)
+               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+            L -= n;
+         }
+         return L==0;
+   }
+   // check for comment block or APP blocks
+   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+      stbi__skip(z->s, stbi__get16be(z->s)-2);
+      return 1;
+   }
+   return 0;
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg *z)
+{
+   int i;
+   int Ls = stbi__get16be(z->s);
+   z->scan_n = stbi__get8(z->s);
+   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
+   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
+   for (i=0; i < z->scan_n; ++i) {
+      int id = stbi__get8(z->s), which;
+      int q = stbi__get8(z->s);
+      for (which = 0; which < z->s->img_n; ++which)
+         if (z->img_comp[which].id == id)
+            break;
+      if (which == z->s->img_n) return 0; // no match
+      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
+      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
+      z->order[i] = which;
+   }
+
+   {
+      int aa;
+      z->spec_start = stbi__get8(z->s);
+      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
+      aa = stbi__get8(z->s);
+      z->succ_high = (aa >> 4);
+      z->succ_low  = (aa & 15);
+      if (z->progressive) {
+         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+            return stbi__err("bad SOS", "Corrupt JPEG");
+      } else {
+         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         z->spec_end = 63;
+      }
+   }
+
+   return 1;
+}
+
+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
+{
+   stbi__context *s = z->s;
+   int Lf,p,i,q, h_max=1,v_max=1,c;
+   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+   c = stbi__get8(s);
+   if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG");    // JFIF requires
+   s->img_n = c;
+   for (i=0; i < c; ++i) {
+      z->img_comp[i].data = NULL;
+      z->img_comp[i].linebuf = NULL;
+   }
+
+   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
+
+   z->rgb = 0;
+   for (i=0; i < s->img_n; ++i) {
+      static unsigned char rgb[3] = { 'R', 'G', 'B' };
+      z->img_comp[i].id = stbi__get8(s);
+      if (z->img_comp[i].id != i+1)   // JFIF requires
+         if (z->img_comp[i].id != i) {  // some version of jpegtran outputs non-JFIF-compliant files!
+            // somethings output this (see http://fileformats.archiveteam.org/wiki/JPEG#Color_format)
+            if (z->img_comp[i].id != rgb[i])
+               return stbi__err("bad component ID","Corrupt JPEG");
+            ++z->rgb;
+         }
+      q = stbi__get8(s);
+      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
+      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
+      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
+   }
+
+   if (scan != STBI__SCAN_load) return 1;
+
+   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+
+   for (i=0; i < s->img_n; ++i) {
+      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+   }
+
+   // compute interleaved mcu info
+   z->img_h_max = h_max;
+   z->img_v_max = v_max;
+   z->img_mcu_w = h_max * 8;
+   z->img_mcu_h = v_max * 8;
+   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
+   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
+
+   for (i=0; i < s->img_n; ++i) {
+      // number of effective pixels (e.g. for non-interleaved MCU)
+      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
+      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
+      // to simplify generation, we'll allocate enough memory to decode
+      // the bogus oversized data from using interleaved MCUs and their
+      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+      // discard the extra data until colorspace conversion
+      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+      z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
+
+      if (z->img_comp[i].raw_data == NULL) {
+         for(--i; i >= 0; --i) {
+            STBI_FREE(z->img_comp[i].raw_data);
+            z->img_comp[i].raw_data = NULL;
+         }
+         return stbi__err("outofmem", "Out of memory");
+      }
+      // align blocks for idct using mmx/sse
+      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
+      z->img_comp[i].linebuf = NULL;
+      if (z->progressive) {
+         z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
+         z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
+         z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
+         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
+      } else {
+         z->img_comp[i].coeff = 0;
+         z->img_comp[i].raw_coeff = 0;
+      }
+   }
+
+   return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x)         ((x) == 0xdc)
+#define stbi__SOI(x)         ((x) == 0xd8)
+#define stbi__EOI(x)         ((x) == 0xd9)
+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x)         ((x) == 0xda)
+
+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
+{
+   int m;
+   z->marker = STBI__MARKER_none; // initialize cached marker to empty
+   m = stbi__get_marker(z);
+   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
+   if (scan == STBI__SCAN_type) return 1;
+   m = stbi__get_marker(z);
+   while (!stbi__SOF(m)) {
+      if (!stbi__process_marker(z,m)) return 0;
+      m = stbi__get_marker(z);
+      while (m == STBI__MARKER_none) {
+         // some files have extra padding after their blocks, so ok, we'll scan
+         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+         m = stbi__get_marker(z);
+      }
+   }
+   z->progressive = stbi__SOF_progressive(m);
+   if (!stbi__process_frame_header(z, scan)) return 0;
+   return 1;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg *j)
+{
+   int m;
+   for (m = 0; m < 4; m++) {
+      j->img_comp[m].raw_data = NULL;
+      j->img_comp[m].raw_coeff = NULL;
+   }
+   j->restart_interval = 0;
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+   m = stbi__get_marker(j);
+   while (!stbi__EOI(m)) {
+      if (stbi__SOS(m)) {
+         if (!stbi__process_scan_header(j)) return 0;
+         if (!stbi__parse_entropy_coded_data(j)) return 0;
+         if (j->marker == STBI__MARKER_none ) {
+            // handle 0s at the end of image data from IP Kamera 9060
+            while (!stbi__at_eof(j->s)) {
+               int x = stbi__get8(j->s);
+               if (x == 255) {
+                  j->marker = stbi__get8(j->s);
+                  break;
+               } else if (x != 0) {
+                  return stbi__err("junk before marker", "Corrupt JPEG");
+               }
+            }
+            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+         }
+      } else {
+         if (!stbi__process_marker(j, m)) return 0;
+      }
+      m = stbi__get_marker(j);
+   }
+   if (j->progressive)
+      stbi__jpeg_finish(j);
+   return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
+                                    int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   STBI_NOTUSED(out);
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(w);
+   STBI_NOTUSED(hs);
+   return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples vertically for every one in input
+   int i;
+   STBI_NOTUSED(hs);
+   for (i=0; i < w; ++i)
+      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
+   return out;
+}
+
+static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples horizontally for every one in input
+   int i;
+   stbi_uc *input = in_near;
+
+   if (w == 1) {
+      // if only one sample, can't do any interpolation
+      out[0] = out[1] = input[0];
+      return out;
+   }
+
+   out[0] = input[0];
+   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
+   for (i=1; i < w-1; ++i) {
+      int n = 3*input[i]+2;
+      out[i*2+0] = stbi__div4(n+input[i-1]);
+      out[i*2+1] = stbi__div4(n+input[i+1]);
+   }
+   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
+   out[i*2+1] = input[w-1];
+
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i,t0,t1;
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   out[0] = stbi__div4(t1+2);
+   for (i=1; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i=0,t0,t1;
+
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   // process groups of 8 pixels for as long as we can.
+   // note we can't handle the last pixel in a row in this loop
+   // because we need to handle the filter boundary conditions.
+   for (; i < ((w-1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      __m128i zero  = _mm_setzero_si128();
+      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
+      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
+      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
+      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+      __m128i diff  = _mm_sub_epi16(farw, nearw);
+      __m128i nears = _mm_slli_epi16(nearw, 2);
+      __m128i curr  = _mm_add_epi16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      __m128i prv0 = _mm_slli_si128(curr, 2);
+      __m128i nxt0 = _mm_srli_si128(curr, 2);
+      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      __m128i bias  = _mm_set1_epi16(8);
+      __m128i curs = _mm_slli_epi16(curr, 2);
+      __m128i prvd = _mm_sub_epi16(prev, curr);
+      __m128i nxtd = _mm_sub_epi16(next, curr);
+      __m128i curb = _mm_add_epi16(curs, bias);
+      __m128i even = _mm_add_epi16(prvd, curb);
+      __m128i odd  = _mm_add_epi16(nxtd, curb);
+
+      // interleave even and odd pixels, then undo scaling.
+      __m128i int0 = _mm_unpacklo_epi16(even, odd);
+      __m128i int1 = _mm_unpackhi_epi16(even, odd);
+      __m128i de0  = _mm_srli_epi16(int0, 4);
+      __m128i de1  = _mm_srli_epi16(int1, 4);
+
+      // pack and write output
+      __m128i outv = _mm_packus_epi16(de0, de1);
+      _mm_storeu_si128((__m128i *) (out + i*2), outv);
+#elif defined(STBI_NEON)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      uint8x8_t farb  = vld1_u8(in_far + i);
+      uint8x8_t nearb = vld1_u8(in_near + i);
+      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+      int16x8_t curr  = vaddq_s16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      int16x8_t prv0 = vextq_s16(curr, curr, 7);
+      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      int16x8_t curs = vshlq_n_s16(curr, 2);
+      int16x8_t prvd = vsubq_s16(prev, curr);
+      int16x8_t nxtd = vsubq_s16(next, curr);
+      int16x8_t even = vaddq_s16(curs, prvd);
+      int16x8_t odd  = vaddq_s16(curs, nxtd);
+
+      // undo scaling and round, then store with even/odd phases interleaved
+      uint8x8x2_t o;
+      o.val[0] = vqrshrun_n_s16(even, 4);
+      o.val[1] = vqrshrun_n_s16(odd,  4);
+      vst2_u8(out + i*2, o);
+#endif
+
+      // "previous" value for next iter
+      t1 = 3*in_near[i+7] + in_far[i+7];
+   }
+
+   t0 = t1;
+   t1 = 3*in_near[i] + in_far[i];
+   out[i*2] = stbi__div16(3*t1 + t0 + 8);
+
+   for (++i; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+#endif
+
+static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // resample with nearest-neighbor
+   int i,j;
+   STBI_NOTUSED(in_far);
+   for (i=0; i < w; ++i)
+      for (j=0; j < hs; ++j)
+         out[i*hs+j] = in_near[i];
+   return out;
+}
+
+#ifdef STBI_JPEG_OLD
+// this is the same YCbCr-to-RGB calculation that stb_image has used
+// historically before the algorithm changes in 1.49
+#define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+   int i;
+   for (i=0; i < count; ++i) {
+      int y_fixed = (y[i] << 16) + 32768; // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed + cr*float2fixed(1.40200f);
+      g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
+      b = y_fixed                            + cb*float2fixed(1.77200f);
+      r >>= 16;
+      g >>= 16;
+      b >>= 16;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+#else
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+   int i;
+   for (i=0; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed +  cr* float2fixed(1.40200f);
+      g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                               +   cb* float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+#endif
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
+{
+   int i = 0;
+
+#ifdef STBI_SSE2
+   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+   // it's useful in practice (you wouldn't use it for textures, for example).
+   // so just accelerate step == 4 case.
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      __m128i signflip  = _mm_set1_epi8(-0x80);
+      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
+      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
+      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
+      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
+      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
+      __m128i xw = _mm_set1_epi16(255); // alpha channel
+
+      for (; i+7 < count; i += 8) {
+         // load
+         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
+         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
+         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
+         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+         // unpack to short (and left-shift cr, cb by 8)
+         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
+         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+         // color transform
+         __m128i yws = _mm_srli_epi16(yw, 4);
+         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+         __m128i rws = _mm_add_epi16(cr0, yws);
+         __m128i gwt = _mm_add_epi16(cb0, yws);
+         __m128i bws = _mm_add_epi16(yws, cb1);
+         __m128i gws = _mm_add_epi16(gwt, cr1);
+
+         // descale
+         __m128i rw = _mm_srai_epi16(rws, 4);
+         __m128i bw = _mm_srai_epi16(bws, 4);
+         __m128i gw = _mm_srai_epi16(gws, 4);
+
+         // back to byte, set up for transpose
+         __m128i brb = _mm_packus_epi16(rw, bw);
+         __m128i gxb = _mm_packus_epi16(gw, xw);
+
+         // transpose to interleave channels
+         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
+         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+         // store
+         _mm_storeu_si128((__m128i *) (out + 0), o0);
+         _mm_storeu_si128((__m128i *) (out + 16), o1);
+         out += 32;
+      }
+   }
+#endif
+
+#ifdef STBI_NEON
+   // in this version, step=3 support would be easy to add. but is there demand?
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      uint8x8_t signflip = vdup_n_u8(0x80);
+      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
+      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
+      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
+      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
+
+      for (; i+7 < count; i += 8) {
+         // load
+         uint8x8_t y_bytes  = vld1_u8(y + i);
+         uint8x8_t cr_bytes = vld1_u8(pcr + i);
+         uint8x8_t cb_bytes = vld1_u8(pcb + i);
+         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+         // expand to s16
+         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+         int16x8_t crw = vshll_n_s8(cr_biased, 7);
+         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+         // color transform
+         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+         int16x8_t rws = vaddq_s16(yws, cr0);
+         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+         int16x8_t bws = vaddq_s16(yws, cb1);
+
+         // undo scaling, round, convert to byte
+         uint8x8x4_t o;
+         o.val[0] = vqrshrun_n_s16(rws, 4);
+         o.val[1] = vqrshrun_n_s16(gws, 4);
+         o.val[2] = vqrshrun_n_s16(bws, 4);
+         o.val[3] = vdup_n_u8(255);
+
+         // store, interleaving r/g/b/a
+         vst4_u8(out, o);
+         out += 8*4;
+      }
+   }
+#endif
+
+   for (; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed + cr* float2fixed(1.40200f);
+      g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                             +   cb* float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg *j)
+{
+   j->idct_block_kernel = stbi__idct_block;
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+   if (stbi__sse2_available()) {
+      j->idct_block_kernel = stbi__idct_simd;
+      #ifndef STBI_JPEG_OLD
+      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+      #endif
+      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+   }
+#endif
+
+#ifdef STBI_NEON
+   j->idct_block_kernel = stbi__idct_simd;
+   #ifndef STBI_JPEG_OLD
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+   #endif
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg *j)
+{
+   int i;
+   for (i=0; i < j->s->img_n; ++i) {
+      if (j->img_comp[i].raw_data) {
+         STBI_FREE(j->img_comp[i].raw_data);
+         j->img_comp[i].raw_data = NULL;
+         j->img_comp[i].data = NULL;
+      }
+      if (j->img_comp[i].raw_coeff) {
+         STBI_FREE(j->img_comp[i].raw_coeff);
+         j->img_comp[i].raw_coeff = 0;
+         j->img_comp[i].coeff = 0;
+      }
+      if (j->img_comp[i].linebuf) {
+         STBI_FREE(j->img_comp[i].linebuf);
+         j->img_comp[i].linebuf = NULL;
+      }
+   }
+}
+
+typedef struct
+{
+   resample_row_func resample;
+   stbi_uc *line0,*line1;
+   int hs,vs;   // expansion factor in each axis
+   int w_lores; // horizontal pixels pre-expansion
+   int ystep;   // how far through vertical expansion we are
+   int ypos;    // which pre-expansion row we're on
+} stbi__resample;
+
+static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
+{
+   int n, decode_n;
+   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+   // validate req_comp
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+   // load a jpeg image from whichever source, but leave in YCbCr format
+   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+   // determine actual number of components to generate
+   n = req_comp ? req_comp : z->s->img_n;
+
+   if (z->s->img_n == 3 && n < 3)
+      decode_n = 1;
+   else
+      decode_n = z->s->img_n;
+
+   // resample and color-convert
+   {
+      int k;
+      unsigned int i,j;
+      stbi_uc *output;
+      stbi_uc *coutput[4];
+
+      stbi__resample res_comp[4];
+
+      for (k=0; k < decode_n; ++k) {
+         stbi__resample *r = &res_comp[k];
+
+         // allocate line buffer big enough for upsampling off the edges
+         // with upsample factor of 4
+         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
+         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+         r->hs      = z->img_h_max / z->img_comp[k].h;
+         r->vs      = z->img_v_max / z->img_comp[k].v;
+         r->ystep   = r->vs >> 1;
+         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
+         r->ypos    = 0;
+         r->line0   = r->line1 = z->img_comp[k].data;
+
+         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+         else                               r->resample = stbi__resample_row_generic;
+      }
+
+      // can't error after this so, this is safe
+      output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
+      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+      // now go ahead and resample
+      for (j=0; j < z->s->img_y; ++j) {
+         stbi_uc *out = output + n * z->s->img_x * j;
+         for (k=0; k < decode_n; ++k) {
+            stbi__resample *r = &res_comp[k];
+            int y_bot = r->ystep >= (r->vs >> 1);
+            coutput[k] = r->resample(z->img_comp[k].linebuf,
+                                     y_bot ? r->line1 : r->line0,
+                                     y_bot ? r->line0 : r->line1,
+                                     r->w_lores, r->hs);
+            if (++r->ystep >= r->vs) {
+               r->ystep = 0;
+               r->line0 = r->line1;
+               if (++r->ypos < z->img_comp[k].y)
+                  r->line1 += z->img_comp[k].w2;
+            }
+         }
+         if (n >= 3) {
+            stbi_uc *y = coutput[0];
+            if (z->s->img_n == 3) {
+               if (z->rgb == 3) {
+                  for (i=0; i < z->s->img_x; ++i) {
+                     out[0] = y[i];
+                     out[1] = coutput[1][i];
+                     out[2] = coutput[2][i];
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else {
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
+            } else
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = out[1] = out[2] = y[i];
+                  out[3] = 255; // not used if n==3
+                  out += n;
+               }
+         } else {
+            stbi_uc *y = coutput[0];
+            if (n == 1)
+               for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+            else
+               for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
+         }
+      }
+      stbi__cleanup_jpeg(z);
+      *out_x = z->s->img_x;
+      *out_y = z->s->img_y;
+      if (comp) *comp  = z->s->img_n; // report original components, not output
+      return output;
+   }
+}
+
+static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char* result;
+   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   j->s = s;
+   stbi__setup_jpeg(j);
+   result = load_jpeg_image(j, x,y,comp,req_comp);
+   STBI_FREE(j);
+   return result;
+}
+
+static int stbi__jpeg_test(stbi__context *s)
+{
+   int r;
+   stbi__jpeg j;
+   j.s = s;
+   stbi__setup_jpeg(&j);
+   r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
+{
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+      stbi__rewind( j->s );
+      return 0;
+   }
+   if (x) *x = j->s->img_x;
+   if (y) *y = j->s->img_y;
+   if (comp) *comp = j->s->img_n;
+   return 1;
+}
+
+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int result;
+   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   j->s = s;
+   result = stbi__jpeg_info_raw(j, x, y, comp);
+   STBI_FREE(j);
+   return result;
+}
+#endif
+
+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
+//    simple implementation
+//      - all input must be provided in an upfront buffer
+//      - all output is written to a single output buffer (can malloc/realloc)
+//    performance
+//      - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+   stbi__uint16 firstcode[16];
+   int maxcode[17];
+   stbi__uint16 firstsymbol[16];
+   stbi_uc  size[288];
+   stbi__uint16 value[288];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
+  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
+  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
+  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
+  return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+   STBI_ASSERT(bits <= 16);
+   // to bit reverse n bits, reverse 16 and shift
+   // e.g. 11 bits, bit reverse and shift away 5
+   return stbi__bitreverse16(v) >> (16-bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
+{
+   int i,k=0;
+   int code, next_code[16], sizes[17];
+
+   // DEFLATE spec for generating codes
+   memset(sizes, 0, sizeof(sizes));
+   memset(z->fast, 0, sizeof(z->fast));
+   for (i=0; i < num; ++i)
+      ++sizes[sizelist[i]];
+   sizes[0] = 0;
+   for (i=1; i < 16; ++i)
+      if (sizes[i] > (1 << i))
+         return stbi__err("bad sizes", "Corrupt PNG");
+   code = 0;
+   for (i=1; i < 16; ++i) {
+      next_code[i] = code;
+      z->firstcode[i] = (stbi__uint16) code;
+      z->firstsymbol[i] = (stbi__uint16) k;
+      code = (code + sizes[i]);
+      if (sizes[i])
+         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
+      z->maxcode[i] = code << (16-i); // preshift for inner loop
+      code <<= 1;
+      k += sizes[i];
+   }
+   z->maxcode[16] = 0x10000; // sentinel
+   for (i=0; i < num; ++i) {
+      int s = sizelist[i];
+      if (s) {
+         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
+         z->size [c] = (stbi_uc     ) s;
+         z->value[c] = (stbi__uint16) i;
+         if (s <= STBI__ZFAST_BITS) {
+            int j = stbi__bit_reverse(next_code[s],s);
+            while (j < (1 << STBI__ZFAST_BITS)) {
+               z->fast[j] = fastv;
+               j += (1 << s);
+            }
+         }
+         ++next_code[s];
+      }
+   }
+   return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+//    because PNG allows splitting the zlib stream arbitrarily,
+//    and it's annoying structurally to have PNG call ZLIB call PNG,
+//    we require PNG read all the IDATs and combine them into a single
+//    memory buffer
+
+typedef struct
+{
+   stbi_uc *zbuffer, *zbuffer_end;
+   int num_bits;
+   stbi__uint32 code_buffer;
+
+   char *zout;
+   char *zout_start;
+   char *zout_end;
+   int   z_expandable;
+
+   stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
+{
+   if (z->zbuffer >= z->zbuffer_end) return 0;
+   return *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf *z)
+{
+   do {
+      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
+      z->num_bits += 8;
+   } while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
+{
+   unsigned int k;
+   if (z->num_bits < n) stbi__fill_bits(z);
+   k = z->code_buffer & ((1 << n) - 1);
+   z->code_buffer >>= n;
+   z->num_bits -= n;
+   return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s,k;
+   // not resolved by fast table, so compute it the slow way
+   // use jpeg approach, which requires MSbits at top
+   k = stbi__bit_reverse(a->code_buffer, 16);
+   for (s=STBI__ZFAST_BITS+1; ; ++s)
+      if (k < z->maxcode[s])
+         break;
+   if (s == 16) return -1; // invalid code!
+   // code size is s, so:
+   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
+   STBI_ASSERT(z->size[b] == s);
+   a->code_buffer >>= s;
+   a->num_bits -= s;
+   return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s;
+   if (a->num_bits < 16) stbi__fill_bits(a);
+   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+   if (b) {
+      s = b >> 9;
+      a->code_buffer >>= s;
+      a->num_bits -= s;
+      return b & 511;
+   }
+   return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
+{
+   char *q;
+   int cur, limit, old_limit;
+   z->zout = zout;
+   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
+   cur   = (int) (z->zout     - z->zout_start);
+   limit = old_limit = (int) (z->zout_end - z->zout_start);
+   while (cur + n > limit)
+      limit *= 2;
+   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+   STBI_NOTUSED(old_limit);
+   if (q == NULL) return stbi__err("outofmem", "Out of memory");
+   z->zout_start = q;
+   z->zout       = q + cur;
+   z->zout_end   = q + limit;
+   return 1;
+}
+
+static int stbi__zlength_base[31] = {
+   3,4,5,6,7,8,9,10,11,13,
+   15,17,19,23,27,31,35,43,51,59,
+   67,83,99,115,131,163,195,227,258,0,0 };
+
+static int stbi__zlength_extra[31]=
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+
+static int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+static int stbi__parse_huffman_block(stbi__zbuf *a)
+{
+   char *zout = a->zout;
+   for(;;) {
+      int z = stbi__zhuffman_decode(a, &a->z_length);
+      if (z < 256) {
+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
+         if (zout >= a->zout_end) {
+            if (!stbi__zexpand(a, zout, 1)) return 0;
+            zout = a->zout;
+         }
+         *zout++ = (char) z;
+      } else {
+         stbi_uc *p;
+         int len,dist;
+         if (z == 256) {
+            a->zout = zout;
+            return 1;
+         }
+         z -= 257;
+         len = stbi__zlength_base[z];
+         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+         z = stbi__zhuffman_decode(a, &a->z_distance);
+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
+         dist = stbi__zdist_base[z];
+         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
+         if (zout + len > a->zout_end) {
+            if (!stbi__zexpand(a, zout, len)) return 0;
+            zout = a->zout;
+         }
+         p = (stbi_uc *) (zout - dist);
+         if (dist == 1) { // run of one byte; common in images.
+            stbi_uc v = *p;
+            if (len) { do *zout++ = v; while (--len); }
+         } else {
+            if (len) { do *zout++ = *p++; while (--len); }
+         }
+      }
+   }
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf *a)
+{
+   static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+   stbi__zhuffman z_codelength;
+   stbi_uc lencodes[286+32+137];//padding for maximum single op
+   stbi_uc codelength_sizes[19];
+   int i,n;
+
+   int hlit  = stbi__zreceive(a,5) + 257;
+   int hdist = stbi__zreceive(a,5) + 1;
+   int hclen = stbi__zreceive(a,4) + 4;
+
+   memset(codelength_sizes, 0, sizeof(codelength_sizes));
+   for (i=0; i < hclen; ++i) {
+      int s = stbi__zreceive(a,3);
+      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
+   }
+   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+   n = 0;
+   while (n < hlit + hdist) {
+      int c = stbi__zhuffman_decode(a, &z_codelength);
+      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+      if (c < 16)
+         lencodes[n++] = (stbi_uc) c;
+      else if (c == 16) {
+         c = stbi__zreceive(a,2)+3;
+         memset(lencodes+n, lencodes[n-1], c);
+         n += c;
+      } else if (c == 17) {
+         c = stbi__zreceive(a,3)+3;
+         memset(lencodes+n, 0, c);
+         n += c;
+      } else {
+         STBI_ASSERT(c == 18);
+         c = stbi__zreceive(a,7)+11;
+         memset(lencodes+n, 0, c);
+         n += c;
+      }
+   }
+   if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
+   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
+   return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf *a)
+{
+   stbi_uc header[4];
+   int len,nlen,k;
+   if (a->num_bits & 7)
+      stbi__zreceive(a, a->num_bits & 7); // discard
+   // drain the bit-packed data into header
+   k = 0;
+   while (a->num_bits > 0) {
+      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
+      a->code_buffer >>= 8;
+      a->num_bits -= 8;
+   }
+   STBI_ASSERT(a->num_bits == 0);
+   // now fill header the normal way
+   while (k < 4)
+      header[k++] = stbi__zget8(a);
+   len  = header[1] * 256 + header[0];
+   nlen = header[3] * 256 + header[2];
+   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
+   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
+   if (a->zout + len > a->zout_end)
+      if (!stbi__zexpand(a, a->zout, len)) return 0;
+   memcpy(a->zout, a->zbuffer, len);
+   a->zbuffer += len;
+   a->zout += len;
+   return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf *a)
+{
+   int cmf   = stbi__zget8(a);
+   int cm    = cmf & 15;
+   /* int cinfo = cmf >> 4; */
+   int flg   = stbi__zget8(a);
+   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
+   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
+   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+   return 1;
+}
+
+// @TODO: should statically initialize these for optimal thread safety
+static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
+static void stbi__init_zdefaults(void)
+{
+   int i;   // use <= to match clearly with spec
+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
+
+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
+}
+
+static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
+{
+   int final, type;
+   if (parse_header)
+      if (!stbi__parse_zlib_header(a)) return 0;
+   a->num_bits = 0;
+   a->code_buffer = 0;
+   do {
+      final = stbi__zreceive(a,1);
+      type = stbi__zreceive(a,2);
+      if (type == 0) {
+         if (!stbi__parse_uncompressed_block(a)) return 0;
+      } else if (type == 3) {
+         return 0;
+      } else {
+         if (type == 1) {
+            // use fixed code lengths
+            if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
+         } else {
+            if (!stbi__compute_huffman_codes(a)) return 0;
+         }
+         if (!stbi__parse_huffman_block(a)) return 0;
+      }
+   } while (!final);
+   return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
+{
+   a->zout_start = obuf;
+   a->zout       = obuf;
+   a->zout_end   = obuf + olen;
+   a->z_expandable = exp;
+
+   return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
+{
+   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(16384);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer+len;
+   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
+//    simple implementation
+//      - only 8-bit samples
+//      - no CRC checking
+//      - allocates lots of intermediate memory
+//        - avoids problem of streaming data between subsystems
+//        - avoids explicit window management
+//    performance
+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+   stbi__uint32 length;
+   stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
+{
+   stbi__pngchunk c;
+   c.length = stbi__get32be(s);
+   c.type   = stbi__get32be(s);
+   return c;
+}
+
+static int stbi__check_png_header(stbi__context *s)
+{
+   static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+   int i;
+   for (i=0; i < 8; ++i)
+      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
+   return 1;
+}
+
+typedef struct
+{
+   stbi__context *s;
+   stbi_uc *idata, *expanded, *out;
+   int depth;
+} stbi__png;
+
+
+enum {
+   STBI__F_none=0,
+   STBI__F_sub=1,
+   STBI__F_up=2,
+   STBI__F_avg=3,
+   STBI__F_paeth=4,
+   // synthetic filters used for first scanline to avoid needing a dummy row of 0s
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+   STBI__F_none,
+   STBI__F_sub,
+   STBI__F_none,
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+   int p = a + b - c;
+   int pa = abs(p-a);
+   int pb = abs(p-b);
+   int pc = abs(p-c);
+   if (pa <= pb && pa <= pc) return a;
+   if (pb <= pc) return b;
+   return c;
+}
+
+static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+   int bytes = (depth == 16? 2 : 1);
+   stbi__context *s = a->s;
+   stbi__uint32 i,j,stride = x*out_n*bytes;
+   stbi__uint32 img_len, img_width_bytes;
+   int k;
+   int img_n = s->img_n; // copy it into a local for later
+
+   int output_bytes = out_n*bytes;
+   int filter_bytes = img_n*bytes;
+   int width = x;
+
+   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
+   a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into
+   if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+   img_len = (img_width_bytes + 1) * y;
+   if (s->img_x == x && s->img_y == y) {
+      if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
+   } else { // interlaced:
+      if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+   }
+
+   for (j=0; j < y; ++j) {
+      stbi_uc *cur = a->out + stride*j;
+      stbi_uc *prior = cur - stride;
+      int filter = *raw++;
+
+      if (filter > 4)
+         return stbi__err("invalid filter","Corrupt PNG");
+
+      if (depth < 8) {
+         STBI_ASSERT(img_width_bytes <= x);
+         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+         filter_bytes = 1;
+         width = img_width_bytes;
+      }
+
+      // if first row, use special filter that doesn't sample previous row
+      if (j == 0) filter = first_row_filter[filter];
+
+      // handle first byte explicitly
+      for (k=0; k < filter_bytes; ++k) {
+         switch (filter) {
+            case STBI__F_none       : cur[k] = raw[k]; break;
+            case STBI__F_sub        : cur[k] = raw[k]; break;
+            case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
+            case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
+            case STBI__F_avg_first  : cur[k] = raw[k]; break;
+            case STBI__F_paeth_first: cur[k] = raw[k]; break;
+         }
+      }
+
+      if (depth == 8) {
+         if (img_n != out_n)
+            cur[img_n] = 255; // first pixel
+         raw += img_n;
+         cur += out_n;
+         prior += out_n;
+      } else if (depth == 16) {
+         if (img_n != out_n) {
+            cur[filter_bytes]   = 255; // first pixel top byte
+            cur[filter_bytes+1] = 255; // first pixel bottom byte
+         }
+         raw += filter_bytes;
+         cur += output_bytes;
+         prior += output_bytes;
+      } else {
+         raw += 1;
+         cur += 1;
+         prior += 1;
+      }
+
+      // this is a little gross, so that we don't switch per-pixel or per-component
+      if (depth < 8 || img_n == out_n) {
+         int nk = (width - 1)*filter_bytes;
+         #define CASE(f) \
+             case f:     \
+                for (k=0; k < nk; ++k)
+         switch (filter) {
+            // "none" filter turns into a memcpy here; make that explicit.
+            case STBI__F_none:         memcpy(cur, raw, nk); break;
+            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
+            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
+            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
+            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
+            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
+         }
+         #undef CASE
+         raw += nk;
+      } else {
+         STBI_ASSERT(img_n+1 == out_n);
+         #define CASE(f) \
+             case f:     \
+                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
+                   for (k=0; k < filter_bytes; ++k)
+         switch (filter) {
+            CASE(STBI__F_none)         cur[k] = raw[k]; break;
+            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
+            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
+            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
+            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
+            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
+         }
+         #undef CASE
+
+         // the loop above sets the high byte of the pixels' alpha, but for
+         // 16 bit png files we also need the low byte set. we'll do that here.
+         if (depth == 16) {
+            cur = a->out + stride*j; // start at the beginning of the row again
+            for (i=0; i < x; ++i,cur+=output_bytes) {
+               cur[filter_bytes+1] = 255;
+            }
+         }
+      }
+   }
+
+   // we make a separate pass to expand bits to pixels; for performance,
+   // this could run two scanlines behind the above code, so it won't
+   // intefere with filtering but will still be in the cache.
+   if (depth < 8) {
+      for (j=0; j < y; ++j) {
+         stbi_uc *cur = a->out + stride*j;
+         stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
+         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+         // note that the final byte might overshoot and write more data than desired.
+         // we can allocate enough data that this never writes out of memory, but it
+         // could also overwrite the next scanline. can it overwrite non-empty data
+         // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+         // so we need to explicitly clamp the final ones
+
+         if (depth == 4) {
+            for (k=x*img_n; k >= 2; k-=2, ++in) {
+               *cur++ = scale * ((*in >> 4)       );
+               *cur++ = scale * ((*in     ) & 0x0f);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 4)       );
+         } else if (depth == 2) {
+            for (k=x*img_n; k >= 4; k-=4, ++in) {
+               *cur++ = scale * ((*in >> 6)       );
+               *cur++ = scale * ((*in >> 4) & 0x03);
+               *cur++ = scale * ((*in >> 2) & 0x03);
+               *cur++ = scale * ((*in     ) & 0x03);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 6)       );
+            if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
+            if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
+         } else if (depth == 1) {
+            for (k=x*img_n; k >= 8; k-=8, ++in) {
+               *cur++ = scale * ((*in >> 7)       );
+               *cur++ = scale * ((*in >> 6) & 0x01);
+               *cur++ = scale * ((*in >> 5) & 0x01);
+               *cur++ = scale * ((*in >> 4) & 0x01);
+               *cur++ = scale * ((*in >> 3) & 0x01);
+               *cur++ = scale * ((*in >> 2) & 0x01);
+               *cur++ = scale * ((*in >> 1) & 0x01);
+               *cur++ = scale * ((*in     ) & 0x01);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 7)       );
+            if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
+            if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
+            if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
+            if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
+            if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
+            if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
+         }
+         if (img_n != out_n) {
+            int q;
+            // insert alpha = 255
+            cur = a->out + stride*j;
+            if (img_n == 1) {
+               for (q=x-1; q >= 0; --q) {
+                  cur[q*2+1] = 255;
+                  cur[q*2+0] = cur[q];
+               }
+            } else {
+               STBI_ASSERT(img_n == 3);
+               for (q=x-1; q >= 0; --q) {
+                  cur[q*4+3] = 255;
+                  cur[q*4+2] = cur[q*3+2];
+                  cur[q*4+1] = cur[q*3+1];
+                  cur[q*4+0] = cur[q*3+0];
+               }
+            }
+         }
+      }
+   } else if (depth == 16) {
+      // force the image data from big-endian to platform-native.
+      // this is done in a separate pass due to the decoding relying
+      // on the data being untouched, but could probably be done
+      // per-line during decode if care is taken.
+      stbi_uc *cur = a->out;
+      stbi__uint16 *cur16 = (stbi__uint16*)cur;
+
+      for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
+         *cur16 = (cur[0] << 8) | cur[1];
+      }
+   }
+
+   return 1;
+}
+
+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+   stbi_uc *final;
+   int p;
+   if (!interlaced)
+      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+   // de-interlacing
+   final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
+   for (p=0; p < 7; ++p) {
+      int xorig[] = { 0,4,0,2,0,1,0 };
+      int yorig[] = { 0,0,4,0,2,0,1 };
+      int xspc[]  = { 8,8,4,4,2,2,1 };
+      int yspc[]  = { 8,8,8,4,4,2,2 };
+      int i,j,x,y;
+      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
+      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
+      if (x && y) {
+         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+            STBI_FREE(final);
+            return 0;
+         }
+         for (j=0; j < y; ++j) {
+            for (i=0; i < x; ++i) {
+               int out_y = j*yspc[p]+yorig[p];
+               int out_x = i*xspc[p]+xorig[p];
+               memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
+                      a->out + (j*x+i)*out_n, out_n);
+            }
+         }
+         STBI_FREE(a->out);
+         image_data += img_len;
+         image_data_len -= img_len;
+      }
+   }
+   a->out = final;
+
+   return 1;
+}
+
+static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 255 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i=0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 255);
+         p += 2;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi__uint16 *p = (stbi__uint16*) z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 65535 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i = 0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 65535);
+         p += 2;
+      }
+   } else {
+      for (i = 0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
+{
+   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+   stbi_uc *p, *temp_out, *orig = a->out;
+
+   p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
+   if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+   // between here and free(out) below, exitting would leak
+   temp_out = p;
+
+   if (pal_img_n == 3) {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p += 3;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p[3] = palette[n+3];
+         p += 4;
+      }
+   }
+   STBI_FREE(a->out);
+   a->out = temp_out;
+
+   STBI_NOTUSED(len);
+
+   return 1;
+}
+
+static int stbi__reduce_png(stbi__png *p)
+{
+   int i;
+   int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n;
+   stbi_uc *reduced;
+   stbi__uint16 *orig = (stbi__uint16*)p->out;
+
+   if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
+
+   reduced = (stbi_uc *)stbi__malloc(img_len);
+   if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
+
+   p->out = reduced;
+   STBI_FREE(orig);
+
+   return 1;
+}
+
+static int stbi__unpremultiply_on_load = 0;
+static int stbi__de_iphone_flag = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag = flag_true_if_should_convert;
+}
+
+static void stbi__de_iphone(stbi__png *z)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   if (s->img_out_n == 3) {  // convert bgr to rgb
+      for (i=0; i < pixel_count; ++i) {
+         stbi_uc t = p[0];
+         p[0] = p[2];
+         p[2] = t;
+         p += 3;
+      }
+   } else {
+      STBI_ASSERT(s->img_out_n == 4);
+      if (stbi__unpremultiply_on_load) {
+         // convert bgr to rgb and unpremultiply
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc a = p[3];
+            stbi_uc t = p[0];
+            if (a) {
+               p[0] = p[2] * 255 / a;
+               p[1] = p[1] * 255 / a;
+               p[2] =  t   * 255 / a;
+            } else {
+               p[0] = p[2];
+               p[2] = t;
+            }
+            p += 4;
+         }
+      } else {
+         // convert bgr to rgb
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc t = p[0];
+            p[0] = p[2];
+            p[2] = t;
+            p += 4;
+         }
+      }
+   }
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
+
+static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
+{
+   stbi_uc palette[1024], pal_img_n=0;
+   stbi_uc has_trans=0, tc[3];
+   stbi__uint16 tc16[3];
+   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
+   int first=1,k,interlace=0, color=0, is_iphone=0;
+   stbi__context *s = z->s;
+
+   z->expanded = NULL;
+   z->idata = NULL;
+   z->out = NULL;
+
+   if (!stbi__check_png_header(s)) return 0;
+
+   if (scan == STBI__SCAN_type) return 1;
+
+   for (;;) {
+      stbi__pngchunk c = stbi__get_chunk_header(s);
+      switch (c.type) {
+         case STBI__PNG_TYPE('C','g','B','I'):
+            is_iphone = 1;
+            stbi__skip(s, c.length);
+            break;
+         case STBI__PNG_TYPE('I','H','D','R'): {
+            int comp,filter;
+            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
+            first = 0;
+            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
+            s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
+            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
+			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
+            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
+            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
+            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
+            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
+            if (!pal_img_n) {
+               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+               if (scan == STBI__SCAN_header) return 1;
+            } else {
+               // if paletted, then pal_n is our final components, and
+               // img_n is # components to decompress/filter.
+               s->img_n = 1;
+               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
+               // if SCAN_header, have to scan to see if we have a tRNS
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('P','L','T','E'):  {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
+            pal_len = c.length / 3;
+            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
+            for (i=0; i < pal_len; ++i) {
+               palette[i*4+0] = stbi__get8(s);
+               palette[i*4+1] = stbi__get8(s);
+               palette[i*4+2] = stbi__get8(s);
+               palette[i*4+3] = 255;
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('t','R','N','S'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
+            if (pal_img_n) {
+               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
+               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
+               pal_img_n = 4;
+               for (i=0; i < c.length; ++i)
+                  palette[i*4+3] = stbi__get8(s);
+            } else {
+               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
+               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
+               has_trans = 1;
+               if (z->depth == 16) {
+                  for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is
+               } else {
+                  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+               }
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','D','A','T'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
+            if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
+            if ((int)(ioff + c.length) < (int)ioff) return 0;
+            if (ioff + c.length > idata_limit) {
+               stbi__uint32 idata_limit_old = idata_limit;
+               stbi_uc *p;
+               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+               while (ioff + c.length > idata_limit)
+                  idata_limit *= 2;
+               STBI_NOTUSED(idata_limit_old);
+               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+               z->idata = p;
+            }
+            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
+            ioff += c.length;
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','E','N','D'): {
+            stbi__uint32 raw_len, bpl;
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (scan != STBI__SCAN_load) return 1;
+            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
+            // initial guess for decoded data size to avoid unnecessary reallocs
+            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
+            if (z->expanded == NULL) return 0; // zlib should set error
+            STBI_FREE(z->idata); z->idata = NULL;
+            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
+               s->img_out_n = s->img_n+1;
+            else
+               s->img_out_n = s->img_n;
+            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+            if (has_trans) {
+               if (z->depth == 16) {
+                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+               } else {
+                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+               }
+            }
+            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+               stbi__de_iphone(z);
+            if (pal_img_n) {
+               // pal_img_n == 3 or 4
+               s->img_n = pal_img_n; // record the actual colors we had
+               s->img_out_n = pal_img_n;
+               if (req_comp >= 3) s->img_out_n = req_comp;
+               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+                  return 0;
+            }
+            STBI_FREE(z->expanded); z->expanded = NULL;
+            return 1;
+         }
+
+         default:
+            // if critical, fail
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if ((c.type & (1 << 29)) == 0) {
+               #ifndef STBI_NO_FAILURE_STRINGS
+               // not threadsafe
+               static char invalid_chunk[] = "XXXX PNG chunk not known";
+               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
+               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
+               #endif
+               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+            }
+            stbi__skip(s, c.length);
+            break;
+      }
+      // end of PNG chunk, read and skip CRC
+      stbi__get32be(s);
+   }
+}
+
+static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
+{
+   unsigned char *result=NULL;
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+      if (p->depth == 16) {
+         if (!stbi__reduce_png(p)) {
+            return result;
+         }
+      }
+      result = p->out;
+      p->out = NULL;
+      if (req_comp && req_comp != p->s->img_out_n) {
+         result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         p->s->img_out_n = req_comp;
+         if (result == NULL) return result;
+      }
+      *x = p->s->img_x;
+      *y = p->s->img_y;
+      if (n) *n = p->s->img_n;
+   }
+   STBI_FREE(p->out);      p->out      = NULL;
+   STBI_FREE(p->expanded); p->expanded = NULL;
+   STBI_FREE(p->idata);    p->idata    = NULL;
+
+   return result;
+}
+
+static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__do_png(&p, x,y,comp,req_comp);
+}
+
+static int stbi__png_test(stbi__context *s)
+{
+   int r;
+   r = stbi__check_png_header(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
+{
+   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+      stbi__rewind( p->s );
+      return 0;
+   }
+   if (x) *x = p->s->img_x;
+   if (y) *y = p->s->img_y;
+   if (comp) *comp = p->s->img_n;
+   return 1;
+}
+
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__png_info_raw(&p, x, y, comp);
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context *s)
+{
+   int r;
+   int sz;
+   if (stbi__get8(s) != 'B') return 0;
+   if (stbi__get8(s) != 'M') return 0;
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   stbi__get32le(s); // discard data offset
+   sz = stbi__get32le(s);
+   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+   return r;
+}
+
+static int stbi__bmp_test(stbi__context *s)
+{
+   int r = stbi__bmp_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+   int n=0;
+   if (z == 0) return -1;
+   if (z >= 0x10000) n += 16, z >>= 16;
+   if (z >= 0x00100) n +=  8, z >>=  8;
+   if (z >= 0x00010) n +=  4, z >>=  4;
+   if (z >= 0x00004) n +=  2, z >>=  2;
+   if (z >= 0x00002) n +=  1, z >>=  1;
+   return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
+   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
+   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+   a = (a + (a >> 8)); // max 16 per 8 bits
+   a = (a + (a >> 16)); // max 32 per 8 bits
+   return a & 0xff;
+}
+
+static int stbi__shiftsigned(int v, int shift, int bits)
+{
+   int result;
+   int z=0;
+
+   if (shift < 0) v <<= -shift;
+   else v >>= shift;
+   result = v;
+
+   z = bits;
+   while (z < 8) {
+      result += v >> z;
+      z += bits;
+   }
+   return result;
+}
+
+typedef struct
+{
+   int bpp, offset, hsz;
+   unsigned int mr,mg,mb,ma, all_a;
+} stbi__bmp_data;
+
+static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
+{
+   int hsz;
+   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   info->offset = stbi__get32le(s);
+   info->hsz = hsz = stbi__get32le(s);
+   info->mr = info->mg = info->mb = info->ma = 0;
+   
+   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+   if (hsz == 12) {
+      s->img_x = stbi__get16le(s);
+      s->img_y = stbi__get16le(s);
+   } else {
+      s->img_x = stbi__get32le(s);
+      s->img_y = stbi__get32le(s);
+   }
+   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+   info->bpp = stbi__get16le(s);
+   if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
+   if (hsz != 12) {
+      int compress = stbi__get32le(s);
+      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      stbi__get32le(s); // discard sizeof
+      stbi__get32le(s); // discard hres
+      stbi__get32le(s); // discard vres
+      stbi__get32le(s); // discard colorsused
+      stbi__get32le(s); // discard max important
+      if (hsz == 40 || hsz == 56) {
+         if (hsz == 56) {
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+         }
+         if (info->bpp == 16 || info->bpp == 32) {
+            if (compress == 0) {
+               if (info->bpp == 32) {
+                  info->mr = 0xffu << 16;
+                  info->mg = 0xffu <<  8;
+                  info->mb = 0xffu <<  0;
+                  info->ma = 0xffu << 24;
+                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+               } else {
+                  info->mr = 31u << 10;
+                  info->mg = 31u <<  5;
+                  info->mb = 31u <<  0;
+               }
+            } else if (compress == 3) {
+               info->mr = stbi__get32le(s);
+               info->mg = stbi__get32le(s);
+               info->mb = stbi__get32le(s);
+               // not documented, but generated by photoshop and handled by mspaint
+               if (info->mr == info->mg && info->mg == info->mb) {
+                  // ?!?!?
+                  return stbi__errpuc("bad BMP", "bad BMP");
+               }
+            } else
+               return stbi__errpuc("bad BMP", "bad BMP");
+         }
+      } else {
+         int i;
+         if (hsz != 108 && hsz != 124)
+            return stbi__errpuc("bad BMP", "bad BMP");
+         info->mr = stbi__get32le(s);
+         info->mg = stbi__get32le(s);
+         info->mb = stbi__get32le(s);
+         info->ma = stbi__get32le(s);
+         stbi__get32le(s); // discard color space
+         for (i=0; i < 12; ++i)
+            stbi__get32le(s); // discard color space parameters
+         if (hsz == 124) {
+            stbi__get32le(s); // discard rendering intent
+            stbi__get32le(s); // discard offset of profile data
+            stbi__get32le(s); // discard size of profile data
+            stbi__get32le(s); // discard reserved
+         }
+      }
+   }
+   return (void *) 1;
+}
+
+
+static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi_uc *out;
+   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
+   stbi_uc pal[256][4];
+   int psize=0,i,j,width;
+   int flip_vertically, pad, target;
+   stbi__bmp_data info;
+
+   info.all_a = 255;   
+   if (stbi__bmp_parse_header(s, &info) == NULL)
+      return NULL; // error code already set
+
+   flip_vertically = ((int) s->img_y) > 0;
+   s->img_y = abs((int) s->img_y);
+
+   mr = info.mr;
+   mg = info.mg;
+   mb = info.mb;
+   ma = info.ma;
+   all_a = info.all_a;
+
+   if (info.hsz == 12) {
+      if (info.bpp < 24)
+         psize = (info.offset - 14 - 24) / 3;
+   } else {
+      if (info.bpp < 16)
+         psize = (info.offset - 14 - info.hsz) >> 2;
+   }
+
+   s->img_n = ma ? 4 : 3;
+   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+      target = req_comp;
+   else
+      target = s->img_n; // if they want monochrome, we'll post-convert
+
+   out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   if (info.bpp < 16) {
+      int z=0;
+      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+      for (i=0; i < psize; ++i) {
+         pal[i][2] = stbi__get8(s);
+         pal[i][1] = stbi__get8(s);
+         pal[i][0] = stbi__get8(s);
+         if (info.hsz != 12) stbi__get8(s);
+         pal[i][3] = 255;
+      }
+      stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+      if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+      else if (info.bpp == 8) width = s->img_x;
+      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+      pad = (-width)&3;
+      for (j=0; j < (int) s->img_y; ++j) {
+         for (i=0; i < (int) s->img_x; i += 2) {
+            int v=stbi__get8(s),v2=0;
+            if (info.bpp == 4) {
+               v2 = v & 15;
+               v >>= 4;
+            }
+            out[z++] = pal[v][0];
+            out[z++] = pal[v][1];
+            out[z++] = pal[v][2];
+            if (target == 4) out[z++] = 255;
+            if (i+1 == (int) s->img_x) break;
+            v = (info.bpp == 8) ? stbi__get8(s) : v2;
+            out[z++] = pal[v][0];
+            out[z++] = pal[v][1];
+            out[z++] = pal[v][2];
+            if (target == 4) out[z++] = 255;
+         }
+         stbi__skip(s, pad);
+      }
+   } else {
+      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
+      int z = 0;
+      int easy=0;
+      stbi__skip(s, info.offset - 14 - info.hsz);
+      if (info.bpp == 24) width = 3 * s->img_x;
+      else if (info.bpp == 16) width = 2*s->img_x;
+      else /* bpp = 32 and pad = 0 */ width=0;
+      pad = (-width) & 3;
+      if (info.bpp == 24) {
+         easy = 1;
+      } else if (info.bpp == 32) {
+         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+            easy = 2;
+      }
+      if (!easy) {
+         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+         // right shift amt to put high bit in position #7
+         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
+         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
+         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
+         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+      }
+      for (j=0; j < (int) s->img_y; ++j) {
+         if (easy) {
+            for (i=0; i < (int) s->img_x; ++i) {
+               unsigned char a;
+               out[z+2] = stbi__get8(s);
+               out[z+1] = stbi__get8(s);
+               out[z+0] = stbi__get8(s);
+               z += 3;
+               a = (easy == 2 ? stbi__get8(s) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = a;
+            }
+         } else {
+            int bpp = info.bpp;
+            for (i=0; i < (int) s->img_x; ++i) {
+               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
+               int a;
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = STBI__BYTECAST(a);
+            }
+         }
+         stbi__skip(s, pad);
+      }
+   }
+   
+   // if alpha channel is all 0s, replace with all 255s
+   if (target == 4 && all_a == 0)
+      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
+         out[i] = 255;
+
+   if (flip_vertically) {
+      stbi_uc t;
+      for (j=0; j < (int) s->img_y>>1; ++j) {
+         stbi_uc *p1 = out +      j     *s->img_x*target;
+         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
+         for (i=0; i < (int) s->img_x*target; ++i) {
+            t = p1[i], p1[i] = p2[i], p2[i] = t;
+         }
+      }
+   }
+
+   if (req_comp && req_comp != target) {
+      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   *x = s->img_x;
+   *y = s->img_y;
+   if (comp) *comp = s->img_n;
+   return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+   // only RGB or RGBA (incl. 16bit) or grey allowed
+   if(is_rgb16) *is_rgb16 = 0;
+   switch(bits_per_pixel) {
+      case 8:  return STBI_grey;
+      case 16: if(is_grey) return STBI_grey_alpha;
+            // else: fall-through
+      case 15: if(is_rgb16) *is_rgb16 = 1;
+            return STBI_rgb;
+      case 24: // fall-through
+      case 32: return bits_per_pixel/8;
+      default: return 0;
+   }
+}
+
+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
+{
+    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+    int sz, tga_colormap_type;
+    stbi__get8(s);                   // discard Offset
+    tga_colormap_type = stbi__get8(s); // colormap type
+    if( tga_colormap_type > 1 ) {
+        stbi__rewind(s);
+        return 0;      // only RGB or indexed allowed
+    }
+    tga_image_type = stbi__get8(s); // image type
+    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
+        if (tga_image_type != 1 && tga_image_type != 9) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+        sz = stbi__get8(s);    //   check bits per palette color entry
+        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip image x and y origin
+        tga_colormap_bpp = sz;
+    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
+            stbi__rewind(s);
+            return 0; // only RGB or grey allowed, +/- RLE
+        }
+        stbi__skip(s,9); // skip colormap specification and image x/y origin
+        tga_colormap_bpp = 0;
+    }
+    tga_w = stbi__get16le(s);
+    if( tga_w < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test width
+    }
+    tga_h = stbi__get16le(s);
+    if( tga_h < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test height
+    }
+    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+    stbi__get8(s); // ignore alpha bits
+    if (tga_colormap_bpp != 0) {
+        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+            // when using a colormap, tga_bits_per_pixel is the size of the indexes
+            // I don't think anything but 8 or 16bit indexes makes sense
+            stbi__rewind(s);
+            return 0;
+        }
+        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+    } else {
+        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+    }
+    if(!tga_comp) {
+      stbi__rewind(s);
+      return 0;
+    }
+    if (x) *x = tga_w;
+    if (y) *y = tga_h;
+    if (comp) *comp = tga_comp;
+    return 1;                   // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context *s)
+{
+   int res = 0;
+   int sz, tga_color_type;
+   stbi__get8(s);      //   discard Offset
+   tga_color_type = stbi__get8(s);   //   color type
+   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
+   sz = stbi__get8(s);   //   image type
+   if ( tga_color_type == 1 ) { // colormapped (paletted) image
+      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+      sz = stbi__get8(s);    //   check bits per palette color entry
+      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+      stbi__skip(s,4);       // skip image x and y origin
+   } else { // "normal" image w/o colormap
+      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
+      stbi__skip(s,9); // skip colormap specification and image x/y origin
+   }
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
+   sz = stbi__get8(s);   //   bits per pixel
+   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
+   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+
+   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+   stbi__rewind(s);
+   return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+{
+   stbi__uint16 px = stbi__get16le(s);
+   stbi__uint16 fiveBitMask = 31;
+   // we have 3 channels with 5bits each
+   int r = (px >> 10) & fiveBitMask;
+   int g = (px >> 5) & fiveBitMask;
+   int b = px & fiveBitMask;
+   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+   out[0] = (r * 255)/31;
+   out[1] = (g * 255)/31;
+   out[2] = (b * 255)/31;
+
+   // some people claim that the most significant bit might be used for alpha
+   // (possibly if an alpha-bit is set in the "image descriptor byte")
+   // but that only made 16bit test images completely translucent..
+   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   //   read in the TGA header stuff
+   int tga_offset = stbi__get8(s);
+   int tga_indexed = stbi__get8(s);
+   int tga_image_type = stbi__get8(s);
+   int tga_is_RLE = 0;
+   int tga_palette_start = stbi__get16le(s);
+   int tga_palette_len = stbi__get16le(s);
+   int tga_palette_bits = stbi__get8(s);
+   int tga_x_origin = stbi__get16le(s);
+   int tga_y_origin = stbi__get16le(s);
+   int tga_width = stbi__get16le(s);
+   int tga_height = stbi__get16le(s);
+   int tga_bits_per_pixel = stbi__get8(s);
+   int tga_comp, tga_rgb16=0;
+   int tga_inverted = stbi__get8(s);
+   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+   //   image data
+   unsigned char *tga_data;
+   unsigned char *tga_palette = NULL;
+   int i, j;
+   unsigned char raw_data[4];
+   int RLE_count = 0;
+   int RLE_repeating = 0;
+   int read_next_pixel = 1;
+
+   //   do a tiny bit of precessing
+   if ( tga_image_type >= 8 )
+   {
+      tga_image_type -= 8;
+      tga_is_RLE = 1;
+   }
+   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+   //   If I'm paletted, then I'll use the number of bits from the palette
+   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+   //   tga info
+   *x = tga_width;
+   *y = tga_height;
+   if (comp) *comp = tga_comp;
+
+   tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
+   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+   // skip to the data's starting position (offset usually = 0)
+   stbi__skip(s, tga_offset );
+
+   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
+      for (i=0; i < tga_height; ++i) {
+         int row = tga_inverted ? tga_height -i - 1 : i;
+         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
+         stbi__getn(s, tga_row, tga_width * tga_comp);
+      }
+   } else  {
+      //   do I need to load a palette?
+      if ( tga_indexed)
+      {
+         //   any data to skip? (offset usually = 0)
+         stbi__skip(s, tga_palette_start );
+         //   load the palette
+         tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp );
+         if (!tga_palette) {
+            STBI_FREE(tga_data);
+            return stbi__errpuc("outofmem", "Out of memory");
+         }
+         if (tga_rgb16) {
+            stbi_uc *pal_entry = tga_palette;
+            STBI_ASSERT(tga_comp == STBI_rgb);
+            for (i=0; i < tga_palette_len; ++i) {
+               stbi__tga_read_rgb16(s, pal_entry);
+               pal_entry += tga_comp;
+            }
+         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+               STBI_FREE(tga_data);
+               STBI_FREE(tga_palette);
+               return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+      }
+      //   load the data
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+         if ( tga_is_RLE )
+         {
+            if ( RLE_count == 0 )
+            {
+               //   yep, get the next byte as a RLE command
+               int RLE_cmd = stbi__get8(s);
+               RLE_count = 1 + (RLE_cmd & 127);
+               RLE_repeating = RLE_cmd >> 7;
+               read_next_pixel = 1;
+            } else if ( !RLE_repeating )
+            {
+               read_next_pixel = 1;
+            }
+         } else
+         {
+            read_next_pixel = 1;
+         }
+         //   OK, if I need to read a pixel, do it now
+         if ( read_next_pixel )
+         {
+            //   load however much data we did have
+            if ( tga_indexed )
+            {
+               // read in index, then perform the lookup
+               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+               if ( pal_idx >= tga_palette_len ) {
+                  // invalid index
+                  pal_idx = 0;
+               }
+               pal_idx *= tga_comp;
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = tga_palette[pal_idx+j];
+               }
+            } else if(tga_rgb16) {
+               STBI_ASSERT(tga_comp == STBI_rgb);
+               stbi__tga_read_rgb16(s, raw_data);
+            } else {
+               //   read in the data raw
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = stbi__get8(s);
+               }
+            }
+            //   clear the reading flag for the next pixel
+            read_next_pixel = 0;
+         } // end of reading a pixel
+
+         // copy data
+         for (j = 0; j < tga_comp; ++j)
+           tga_data[i*tga_comp+j] = raw_data[j];
+
+         //   in case we're in RLE mode, keep counting down
+         --RLE_count;
+      }
+      //   do I need to invert the image?
+      if ( tga_inverted )
+      {
+         for (j = 0; j*2 < tga_height; ++j)
+         {
+            int index1 = j * tga_width * tga_comp;
+            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+            for (i = tga_width * tga_comp; i > 0; --i)
+            {
+               unsigned char temp = tga_data[index1];
+               tga_data[index1] = tga_data[index2];
+               tga_data[index2] = temp;
+               ++index1;
+               ++index2;
+            }
+         }
+      }
+      //   clear my palette, if I had one
+      if ( tga_palette != NULL )
+      {
+         STBI_FREE( tga_palette );
+      }
+   }
+
+   // swap RGB - if the source data was RGB16, it already is in the right order
+   if (tga_comp >= 3 && !tga_rgb16)
+   {
+      unsigned char* tga_pixel = tga_data;
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         unsigned char temp = tga_pixel[0];
+         tga_pixel[0] = tga_pixel[2];
+         tga_pixel[2] = temp;
+         tga_pixel += tga_comp;
+      }
+   }
+
+   // convert to target component count
+   if (req_comp && req_comp != tga_comp)
+      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+   //   the things I do to get rid of an error message, and yet keep
+   //   Microsoft's C compilers happy... [8^(
+   tga_palette_start = tga_palette_len = tga_palette_bits =
+         tga_x_origin = tga_y_origin = 0;
+   //   OK, done
+   return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context *s)
+{
+   int r = (stbi__get32be(s) == 0x38425053);
+   stbi__rewind(s);
+   return r;
+}
+
+static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   int   pixelCount;
+   int channelCount, compression;
+   int channel, i, count, len;
+   int bitdepth;
+   int w,h;
+   stbi_uc *out;
+
+   // Check identifier
+   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
+      return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+   // Check file type version.
+   if (stbi__get16be(s) != 1)
+      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+   // Skip 6 reserved bytes.
+   stbi__skip(s, 6 );
+
+   // Read the number of channels (R, G, B, A, etc).
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16)
+      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+   // Read the rows and columns of the image.
+   h = stbi__get32be(s);
+   w = stbi__get32be(s);
+
+   // Make sure the depth is 8 bits.
+   bitdepth = stbi__get16be(s);
+   if (bitdepth != 8 && bitdepth != 16)
+      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+   // Make sure the color mode is RGB.
+   // Valid options are:
+   //   0: Bitmap
+   //   1: Grayscale
+   //   2: Indexed color
+   //   3: RGB color
+   //   4: CMYK color
+   //   7: Multichannel
+   //   8: Duotone
+   //   9: Lab color
+   if (stbi__get16be(s) != 3)
+      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
+   stbi__skip(s,stbi__get32be(s) );
+
+   // Skip the image resources.  (resolution, pen tool paths, etc)
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Skip the reserved data.
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Find out if the data is compressed.
+   // Known values:
+   //   0: no compression
+   //   1: RLE compressed
+   compression = stbi__get16be(s);
+   if (compression > 1)
+      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+   // Create the destination image.
+   out = (stbi_uc *) stbi__malloc(4 * w*h);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   pixelCount = w*h;
+
+   // Initialize the data to zero.
+   //memset( out, 0, pixelCount * 4 );
+
+   // Finally, the image data.
+   if (compression) {
+      // RLE as used by .PSD and .TIFF
+      // Loop until you get the number of unpacked bytes you are expecting:
+      //     Read the next source byte into n.
+      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+      //     Else if n is 128, noop.
+      // Endloop
+
+      // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
+      // which we're going to just skip.
+      stbi__skip(s, h * channelCount * 2 );
+
+      // Read the RLE data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         stbi_uc *p;
+
+         p = out+channel;
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            for (i = 0; i < pixelCount; i++, p += 4)
+               *p = (channel == 3 ? 255 : 0);
+         } else {
+            // Read the RLE data.
+            count = 0;
+            while (count < pixelCount) {
+               len = stbi__get8(s);
+               if (len == 128) {
+                  // No-op.
+               } else if (len < 128) {
+                  // Copy next len+1 bytes literally.
+                  len++;
+                  count += len;
+                  while (len) {
+                     *p = stbi__get8(s);
+                     p += 4;
+                     len--;
+                  }
+               } else if (len > 128) {
+                  stbi_uc   val;
+                  // Next -len+1 bytes in the dest are replicated from next source byte.
+                  // (Interpret len as a negative 8-bit int.)
+                  len ^= 0x0FF;
+                  len += 2;
+                  val = stbi__get8(s);
+                  count += len;
+                  while (len) {
+                     *p = val;
+                     p += 4;
+                     len--;
+                  }
+               }
+            }
+         }
+      }
+
+   } else {
+      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
+      // where each channel consists of an 8-bit value for each pixel in the image.
+
+      // Read the data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         stbi_uc *p;
+
+         p = out + channel;
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            stbi_uc val = channel == 3 ? 255 : 0;
+            for (i = 0; i < pixelCount; i++, p += 4)
+               *p = val;
+         } else {
+            // Read the data.
+            if (bitdepth == 16) {
+               for (i = 0; i < pixelCount; i++, p += 4)
+                  *p = (stbi_uc) (stbi__get16be(s) >> 8);
+            } else {
+               for (i = 0; i < pixelCount; i++, p += 4)
+                  *p = stbi__get8(s);
+            }
+         }
+      }
+   }
+
+   if (channelCount >= 4) {
+      for (i=0; i < w*h; ++i) {
+         unsigned char *pixel = out + 4*i;
+         if (pixel[3] != 0 && pixel[3] != 255) {
+            // remove weird white matte from PSD
+            float a = pixel[3] / 255.0f;
+            float ra = 1.0f / a;
+            float inv_a = 255.0f * (1 - ra);
+            pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+            pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+            pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+         }
+      }
+   }
+
+   if (req_comp && req_comp != 4) {
+      out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   if (comp) *comp = 4;
+   *y = h;
+   *x = w;
+
+   return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context *s,const char *str)
+{
+   int i;
+   for (i=0; i<4; ++i)
+      if (stbi__get8(s) != (stbi_uc)str[i])
+         return 0;
+
+   return 1;
+}
+
+static int stbi__pic_test_core(stbi__context *s)
+{
+   int i;
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
+      return 0;
+
+   for(i=0;i<84;++i)
+      stbi__get8(s);
+
+   if (!stbi__pic_is4(s,"PICT"))
+      return 0;
+
+   return 1;
+}
+
+typedef struct
+{
+   stbi_uc size,type,channel;
+} stbi__pic_packet;
+
+static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
+{
+   int mask=0x80, i;
+
+   for (i=0; i<4; ++i, mask>>=1) {
+      if (channel & mask) {
+         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
+         dest[i]=stbi__get8(s);
+      }
+   }
+
+   return dest;
+}
+
+static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
+{
+   int mask=0x80,i;
+
+   for (i=0;i<4; ++i, mask>>=1)
+      if (channel&mask)
+         dest[i]=src[i];
+}
+
+static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
+{
+   int act_comp=0,num_packets=0,y,chained;
+   stbi__pic_packet packets[10];
+
+   // this will (should...) cater for even some bizarre stuff like having data
+    // for the same channel in multiple packets.
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return stbi__errpuc("bad format","too many packets");
+
+      packet = &packets[num_packets++];
+
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
+      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+   for(y=0; y<height; ++y) {
+      int packet_idx;
+
+      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
+         stbi__pic_packet *packet = &packets[packet_idx];
+         stbi_uc *dest = result+y*width*4;
+
+         switch (packet->type) {
+            default:
+               return stbi__errpuc("bad format","packet has bad compression type");
+
+            case 0: {//uncompressed
+               int x;
+
+               for(x=0;x<width;++x, dest+=4)
+                  if (!stbi__readval(s,packet->channel,dest))
+                     return 0;
+               break;
+            }
+
+            case 1://Pure RLE
+               {
+                  int left=width, i;
+
+                  while (left>0) {
+                     stbi_uc count,value[4];
+
+                     count=stbi__get8(s);
+                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
+
+                     if (count > left)
+                        count = (stbi_uc) left;
+
+                     if (!stbi__readval(s,packet->channel,value))  return 0;
+
+                     for(i=0; i<count; ++i,dest+=4)
+                        stbi__copyval(packet->channel,dest,value);
+                     left -= count;
+                  }
+               }
+               break;
+
+            case 2: {//Mixed RLE
+               int left=width;
+               while (left>0) {
+                  int count = stbi__get8(s), i;
+                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
+
+                  if (count >= 128) { // Repeated
+                     stbi_uc value[4];
+
+                     if (count==128)
+                        count = stbi__get16be(s);
+                     else
+                        count -= 127;
+                     if (count > left)
+                        return stbi__errpuc("bad file","scanline overrun");
+
+                     if (!stbi__readval(s,packet->channel,value))
+                        return 0;
+
+                     for(i=0;i<count;++i, dest += 4)
+                        stbi__copyval(packet->channel,dest,value);
+                  } else { // Raw
+                     ++count;
+                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
+
+                     for(i=0;i<count;++i, dest+=4)
+                        if (!stbi__readval(s,packet->channel,dest))
+                           return 0;
+                  }
+                  left-=count;
+               }
+               break;
+            }
+         }
+      }
+   }
+
+   return result;
+}
+
+static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
+{
+   stbi_uc *result;
+   int i, x,y;
+
+   for (i=0; i<92; ++i)
+      stbi__get8(s);
+
+   x = stbi__get16be(s);
+   y = stbi__get16be(s);
+   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
+   if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
+
+   stbi__get32be(s); //skip `ratio'
+   stbi__get16be(s); //skip `fields'
+   stbi__get16be(s); //skip `pad'
+
+   // intermediate buffer is RGBA
+   result = (stbi_uc *) stbi__malloc(x*y*4);
+   memset(result, 0xff, x*y*4);
+
+   if (!stbi__pic_load_core(s,x,y,comp, result)) {
+      STBI_FREE(result);
+      result=0;
+   }
+   *px = x;
+   *py = y;
+   if (req_comp == 0) req_comp = *comp;
+   result=stbi__convert_format(result,4,req_comp,x,y);
+
+   return result;
+}
+
+static int stbi__pic_test(stbi__context *s)
+{
+   int r = stbi__pic_test_core(s);
+   stbi__rewind(s);
+   return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+   stbi__int16 prefix;
+   stbi_uc first;
+   stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+   int w,h;
+   stbi_uc *out, *old_out;             // output buffer (always 4 components)
+   int flags, bgindex, ratio, transparent, eflags, delay;
+   stbi_uc  pal[256][4];
+   stbi_uc lpal[256][4];
+   stbi__gif_lzw codes[4096];
+   stbi_uc *color_table;
+   int parse, step;
+   int lflags;
+   int start_x, start_y;
+   int max_x, max_y;
+   int cur_x, cur_y;
+   int line_size;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context *s)
+{
+   int sz;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+   sz = stbi__get8(s);
+   if (sz != '9' && sz != '7') return 0;
+   if (stbi__get8(s) != 'a') return 0;
+   return 1;
+}
+
+static int stbi__gif_test(stbi__context *s)
+{
+   int r = stbi__gif_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+   int i;
+   for (i=0; i < num_entries; ++i) {
+      pal[i][2] = stbi__get8(s);
+      pal[i][1] = stbi__get8(s);
+      pal[i][0] = stbi__get8(s);
+      pal[i][3] = transp == i ? 0 : 255;
+   }
+}
+
+static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
+{
+   stbi_uc version;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+      return stbi__err("not GIF", "Corrupt GIF");
+
+   version = stbi__get8(s);
+   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
+   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
+
+   stbi__g_failure_reason = "";
+   g->w = stbi__get16le(s);
+   g->h = stbi__get16le(s);
+   g->flags = stbi__get8(s);
+   g->bgindex = stbi__get8(s);
+   g->ratio = stbi__get8(s);
+   g->transparent = -1;
+
+   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
+
+   if (is_info) return 1;
+
+   if (g->flags & 0x80)
+      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
+
+   return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!stbi__gif_header(s, g, comp, 1)) {
+      STBI_FREE(g);
+      stbi__rewind( s );
+      return 0;
+   }
+   if (x) *x = g->w;
+   if (y) *y = g->h;
+   STBI_FREE(g);
+   return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
+{
+   stbi_uc *p, *c;
+
+   // recurse to decode the prefixes, since the linked-list is backwards,
+   // and working backwards through an interleaved image would be nasty
+   if (g->codes[code].prefix >= 0)
+      stbi__out_gif_code(g, g->codes[code].prefix);
+
+   if (g->cur_y >= g->max_y) return;
+
+   p = &g->out[g->cur_x + g->cur_y];
+   c = &g->color_table[g->codes[code].suffix * 4];
+
+   if (c[3] >= 128) {
+      p[0] = c[2];
+      p[1] = c[1];
+      p[2] = c[0];
+      p[3] = c[3];
+   }
+   g->cur_x += 4;
+
+   if (g->cur_x >= g->max_x) {
+      g->cur_x = g->start_x;
+      g->cur_y += g->step;
+
+      while (g->cur_y >= g->max_y && g->parse > 0) {
+         g->step = (1 << g->parse) * g->line_size;
+         g->cur_y = g->start_y + (g->step >> 1);
+         --g->parse;
+      }
+   }
+}
+
+static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
+{
+   stbi_uc lzw_cs;
+   stbi__int32 len, init_code;
+   stbi__uint32 first;
+   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+   stbi__gif_lzw *p;
+
+   lzw_cs = stbi__get8(s);
+   if (lzw_cs > 12) return NULL;
+   clear = 1 << lzw_cs;
+   first = 1;
+   codesize = lzw_cs + 1;
+   codemask = (1 << codesize) - 1;
+   bits = 0;
+   valid_bits = 0;
+   for (init_code = 0; init_code < clear; init_code++) {
+      g->codes[init_code].prefix = -1;
+      g->codes[init_code].first = (stbi_uc) init_code;
+      g->codes[init_code].suffix = (stbi_uc) init_code;
+   }
+
+   // support no starting clear code
+   avail = clear+2;
+   oldcode = -1;
+
+   len = 0;
+   for(;;) {
+      if (valid_bits < codesize) {
+         if (len == 0) {
+            len = stbi__get8(s); // start new block
+            if (len == 0)
+               return g->out;
+         }
+         --len;
+         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
+         valid_bits += 8;
+      } else {
+         stbi__int32 code = bits & codemask;
+         bits >>= codesize;
+         valid_bits -= codesize;
+         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
+         if (code == clear) {  // clear code
+            codesize = lzw_cs + 1;
+            codemask = (1 << codesize) - 1;
+            avail = clear + 2;
+            oldcode = -1;
+            first = 0;
+         } else if (code == clear + 1) { // end of stream code
+            stbi__skip(s, len);
+            while ((len = stbi__get8(s)) > 0)
+               stbi__skip(s,len);
+            return g->out;
+         } else if (code <= avail) {
+            if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
+
+            if (oldcode >= 0) {
+               p = &g->codes[avail++];
+               if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
+               p->prefix = (stbi__int16) oldcode;
+               p->first = g->codes[oldcode].first;
+               p->suffix = (code == avail) ? p->first : g->codes[code].first;
+            } else if (code == avail)
+               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+            stbi__out_gif_code(g, (stbi__uint16) code);
+
+            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+               codesize++;
+               codemask = (1 << codesize) - 1;
+            }
+
+            oldcode = code;
+         } else {
+            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+         }
+      }
+   }
+}
+
+static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
+{
+   int x, y;
+   stbi_uc *c = g->pal[g->bgindex];
+   for (y = y0; y < y1; y += 4 * g->w) {
+      for (x = x0; x < x1; x += 4) {
+         stbi_uc *p  = &g->out[y + x];
+         p[0] = c[2];
+         p[1] = c[1];
+         p[2] = c[0];
+         p[3] = 0;
+      }
+   }
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
+{
+   int i;
+   stbi_uc *prev_out = 0;
+
+   if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
+      return 0; // stbi__g_failure_reason set by stbi__gif_header
+
+   prev_out = g->out;
+   g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
+   if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
+
+   switch ((g->eflags & 0x1C) >> 2) {
+      case 0: // unspecified (also always used on 1st frame)
+         stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
+         break;
+      case 1: // do not dispose
+         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
+         g->old_out = prev_out;
+         break;
+      case 2: // dispose to background
+         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
+         stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
+         break;
+      case 3: // dispose to previous
+         if (g->old_out) {
+            for (i = g->start_y; i < g->max_y; i += 4 * g->w)
+               memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
+         }
+         break;
+   }
+
+   for (;;) {
+      switch (stbi__get8(s)) {
+         case 0x2C: /* Image Descriptor */
+         {
+            int prev_trans = -1;
+            stbi__int32 x, y, w, h;
+            stbi_uc *o;
+
+            x = stbi__get16le(s);
+            y = stbi__get16le(s);
+            w = stbi__get16le(s);
+            h = stbi__get16le(s);
+            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+            g->line_size = g->w * 4;
+            g->start_x = x * 4;
+            g->start_y = y * g->line_size;
+            g->max_x   = g->start_x + w * 4;
+            g->max_y   = g->start_y + h * g->line_size;
+            g->cur_x   = g->start_x;
+            g->cur_y   = g->start_y;
+
+            g->lflags = stbi__get8(s);
+
+            if (g->lflags & 0x40) {
+               g->step = 8 * g->line_size; // first interlaced spacing
+               g->parse = 3;
+            } else {
+               g->step = g->line_size;
+               g->parse = 0;
+            }
+
+            if (g->lflags & 0x80) {
+               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+               g->color_table = (stbi_uc *) g->lpal;
+            } else if (g->flags & 0x80) {
+               if (g->transparent >= 0 && (g->eflags & 0x01)) {
+                  prev_trans = g->pal[g->transparent][3];
+                  g->pal[g->transparent][3] = 0;
+               }
+               g->color_table = (stbi_uc *) g->pal;
+            } else
+               return stbi__errpuc("missing color table", "Corrupt GIF");
+
+            o = stbi__process_gif_raster(s, g);
+            if (o == NULL) return NULL;
+
+            if (prev_trans != -1)
+               g->pal[g->transparent][3] = (stbi_uc) prev_trans;
+
+            return o;
+         }
+
+         case 0x21: // Comment Extension.
+         {
+            int len;
+            if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
+               len = stbi__get8(s);
+               if (len == 4) {
+                  g->eflags = stbi__get8(s);
+                  g->delay = stbi__get16le(s);
+                  g->transparent = stbi__get8(s);
+               } else {
+                  stbi__skip(s, len);
+                  break;
+               }
+            }
+            while ((len = stbi__get8(s)) != 0)
+               stbi__skip(s, len);
+            break;
+         }
+
+         case 0x3B: // gif stream termination code
+            return (stbi_uc *) s; // using '1' causes warning on some compilers
+
+         default:
+            return stbi__errpuc("unknown code", "Corrupt GIF");
+      }
+   }
+
+   STBI_NOTUSED(req_comp);
+}
+
+static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi_uc *u = 0;
+   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   memset(g, 0, sizeof(*g));
+
+   u = stbi__gif_load_next(s, g, comp, req_comp);
+   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+   if (u) {
+      *x = g->w;
+      *y = g->h;
+      if (req_comp && req_comp != 4)
+         u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
+   }
+   else if (g->out)
+      STBI_FREE(g->out);
+   STBI_FREE(g);
+   return u;
+}
+
+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   return stbi__gif_info_raw(s,x,y,comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context *s)
+{
+   const char *signature = "#?RADIANCE\n";
+   int i;
+   for (i=0; signature[i]; ++i)
+      if (stbi__get8(s) != signature[i])
+         return 0;
+   return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+   int r = stbi__hdr_test_core(s);
+   stbi__rewind(s);
+   return r;
+}
+
+#define STBI__HDR_BUFLEN  1024
+static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
+{
+   int len=0;
+   char c = '\0';
+
+   c = (char) stbi__get8(z);
+
+   while (!stbi__at_eof(z) && c != '\n') {
+      buffer[len++] = c;
+      if (len == STBI__HDR_BUFLEN-1) {
+         // flush to end of line
+         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+            ;
+         break;
+      }
+      c = (char) stbi__get8(z);
+   }
+
+   buffer[len] = 0;
+   return buffer;
+}
+
+static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
+{
+   if ( input[3] != 0 ) {
+      float f1;
+      // Exponent
+      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
+      if (req_comp <= 2)
+         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+      else {
+         output[0] = input[0] * f1;
+         output[1] = input[1] * f1;
+         output[2] = input[2] * f1;
+      }
+      if (req_comp == 2) output[1] = 1;
+      if (req_comp == 4) output[3] = 1;
+   } else {
+      switch (req_comp) {
+         case 4: output[3] = 1; /* fallthrough */
+         case 3: output[0] = output[1] = output[2] = 0;
+                 break;
+         case 2: output[1] = 1; /* fallthrough */
+         case 1: output[0] = 0;
+                 break;
+      }
+   }
+}
+
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+   int width, height;
+   stbi_uc *scanline;
+   float *hdr_data;
+   int len;
+   unsigned char count, value;
+   int i, j, k, c1,c2, z;
+
+
+   // Check identifier
+   if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
+      return stbi__errpf("not HDR", "Corrupt HDR image");
+
+   // Parse header
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+   // Parse width and height
+   // can't use sscanf() if we're not using stdio!
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   height = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   width = (int) strtol(token, NULL, 10);
+
+   *x = width;
+   *y = height;
+
+   if (comp) *comp = 3;
+   if (req_comp == 0) req_comp = 3;
+
+   // Read data
+   hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
+
+   // Load image data
+   // image data is stored as some number of sca
+   if ( width < 8 || width >= 32768) {
+      // Read flat data
+      for (j=0; j < height; ++j) {
+         for (i=0; i < width; ++i) {
+            stbi_uc rgbe[4];
+           main_decode_loop:
+            stbi__getn(s, rgbe, 4);
+            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+         }
+      }
+   } else {
+      // Read RLE-encoded data
+      scanline = NULL;
+
+      for (j = 0; j < height; ++j) {
+         c1 = stbi__get8(s);
+         c2 = stbi__get8(s);
+         len = stbi__get8(s);
+         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+            // not run-length encoded, so we have to actually use THIS data as a decoded
+            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+            stbi_uc rgbe[4];
+            rgbe[0] = (stbi_uc) c1;
+            rgbe[1] = (stbi_uc) c2;
+            rgbe[2] = (stbi_uc) len;
+            rgbe[3] = (stbi_uc) stbi__get8(s);
+            stbi__hdr_convert(hdr_data, rgbe, req_comp);
+            i = 1;
+            j = 0;
+            STBI_FREE(scanline);
+            goto main_decode_loop; // yes, this makes no sense
+         }
+         len <<= 8;
+         len |= stbi__get8(s);
+         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+         if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
+
+         for (k = 0; k < 4; ++k) {
+            i = 0;
+            while (i < width) {
+               count = stbi__get8(s);
+               if (count > 128) {
+                  // Run
+                  value = stbi__get8(s);
+                  count -= 128;
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = value;
+               } else {
+                  // Dump
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = stbi__get8(s);
+               }
+            }
+         }
+         for (i=0; i < width; ++i)
+            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
+      }
+      STBI_FREE(scanline);
+   }
+
+   return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+
+   if (stbi__hdr_test(s) == 0) {
+       stbi__rewind( s );
+       return 0;
+   }
+
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *y = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *x = (int) strtol(token, NULL, 10);
+   *comp = 3;
+   return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   void *p;
+   stbi__bmp_data info;
+
+   info.all_a = 255;   
+   p = stbi__bmp_parse_header(s, &info);
+   stbi__rewind( s );
+   if (p == NULL)
+      return 0;
+   *x = s->img_x;
+   *y = s->img_y;
+   *comp = info.ma ? 4 : 3;
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int channelCount;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *y = stbi__get32be(s);
+   *x = stbi__get32be(s);
+   if (stbi__get16be(s) != 8) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 3) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *comp = 4;
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int act_comp=0,num_packets=0,chained;
+   stbi__pic_packet packets[10];
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
+      stbi__rewind(s);
+      return 0;
+   }
+
+   stbi__skip(s, 88);
+
+   *x = stbi__get16be(s);
+   *y = stbi__get16be(s);
+   if (stbi__at_eof(s)) {
+      stbi__rewind( s);
+      return 0;
+   }
+   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
+      stbi__rewind( s );
+      return 0;
+   }
+
+   stbi__skip(s, 8);
+
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return 0;
+
+      packet = &packets[num_packets++];
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s)) {
+          stbi__rewind( s );
+          return 0;
+      }
+      if (packet->size != 8) {
+          stbi__rewind( s );
+          return 0;
+      }
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3);
+
+   return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+//    Does not support comments in the header section
+//    Does not support ASCII image data (formats P2 and P3)
+//    Does not support 16-bit-per-channel
+
+#ifndef STBI_NO_PNM
+
+static int      stbi__pnm_test(stbi__context *s)
+{
+   char p, t;
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
+
+static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi_uc *out;
+   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+      return 0;
+   *x = s->img_x;
+   *y = s->img_y;
+   *comp = s->img_n;
+
+   out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+
+   if (req_comp && req_comp != s->img_n) {
+      out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+   return out;
+}
+
+static int      stbi__pnm_isspace(char c)
+{
+   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
+{
+   for (;;) {
+      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+         *c = (char) stbi__get8(s);
+
+      if (stbi__at_eof(s) || *c != '#')
+         break;
+
+      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
+         *c = (char) stbi__get8(s);
+   }
+}
+
+static int      stbi__pnm_isdigit(char c)
+{
+   return c >= '0' && c <= '9';
+}
+
+static int      stbi__pnm_getinteger(stbi__context *s, char *c)
+{
+   int value = 0;
+
+   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+      value = value*10 + (*c - '0');
+      *c = (char) stbi__get8(s);
+   }
+
+   return value;
+}
+
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int maxv;
+   char c, p, t;
+
+   stbi__rewind( s );
+
+   // Get identifier
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind( s );
+       return 0;
+   }
+
+   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+   c = (char) stbi__get8(s);
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *x = stbi__pnm_getinteger(s, &c); // read width
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *y = stbi__pnm_getinteger(s, &c); // read height
+   stbi__pnm_skip_whitespace(s, &c);
+
+   maxv = stbi__pnm_getinteger(s, &c);  // read max value
+
+   if (maxv > 255)
+      return stbi__err("max value > 255", "PPM image not 8-bit");
+   else
+      return 1;
+}
+#endif
+
+static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
+{
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_info(s, x, y, comp)) return 1;
+   #endif
+
+   #ifndef STBI_NO_PNG
+   if (stbi__png_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_info(s, x, y, comp))  return 1;
+   #endif
+
+   // test tga last because it's a crappy test!
+   #ifndef STBI_NO_TGA
+   if (stbi__tga_info(s, x, y, comp))
+       return 1;
+   #endif
+   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_info_from_file(f, x, y, comp);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__info_main(&s,x,y,comp);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+   revision history:
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) allocate large structures on the stack
+                         remove white matting for transparent PSD
+                         fix reported channel count for PNG & BMP
+                         re-enable SSE2 in non-gcc 64-bit
+                         support RGB-formatted JPEG
+                         read 16-bit PNGs (only as 8-bit)
+      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+      2.09  (2016-01-16) allow comments in PNM files
+                         16-bit-per-pixel TGA (not bit-per-component)
+                         info() for TGA could break due to .hdr handling
+                         info() for BMP to shares code instead of sloppy parse
+                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+                         code cleanup
+      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+      2.07  (2015-09-13) fix compiler warnings
+                         partial animated GIF support
+                         limited 16-bpc PSD support
+                         #ifdef unused functions
+                         bug with < 92 byte PIC,PNM,HDR,TGA
+      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+      2.03  (2015-04-12) extra corruption checking (mmozeiko)
+                         stbi_set_flip_vertically_on_load (nguillemot)
+                         fix NEON support; fix mingw support
+      2.02  (2015-01-19) fix incorrect assert, fix warning
+      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+                         progressive JPEG (stb)
+                         PGM/PPM support (Ken Miller)
+                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
+                         GIF bugfix -- seemingly never worked
+                         STBI_NO_*, STBI_ONLY_*
+      1.48  (2014-12-14) fix incorrectly-named assert()
+      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+                         optimize PNG (ryg)
+                         fix bug in interlaced PNG with user-specified channel count (stb)
+      1.46  (2014-08-26)
+              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+      1.45  (2014-08-16)
+              fix MSVC-ARM internal compiler error by wrapping malloc
+      1.44  (2014-08-07)
+              various warning fixes from Ronny Chevalier
+      1.43  (2014-07-15)
+              fix MSVC-only compiler problem in code changed in 1.42
+      1.42  (2014-07-09)
+              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+              fixes to stbi__cleanup_jpeg path
+              added STBI_ASSERT to avoid requiring assert.h
+      1.41  (2014-06-25)
+              fix search&replace from 1.36 that messed up comments/error messages
+      1.40  (2014-06-22)
+              fix gcc struct-initialization warning
+      1.39  (2014-06-15)
+              fix to TGA optimization when req_comp != number of components in TGA;
+              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+              add support for BMP version 5 (more ignored fields)
+      1.38  (2014-06-06)
+              suppress MSVC warnings on integer casts truncating values
+              fix accidental rename of 'skip' field of I/O
+      1.37  (2014-06-04)
+              remove duplicate typedef
+      1.36  (2014-06-03)
+              convert to header file single-file library
+              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+      1.35  (2014-05-27)
+              various warnings
+              fix broken STBI_SIMD path
+              fix bug where stbi_load_from_file no longer left file pointer in correct place
+              fix broken non-easy path for 32-bit BMP (possibly never used)
+              TGA optimization by Arseny Kapoulkine
+      1.34  (unknown)
+              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+      1.33  (2011-07-14)
+              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+      1.32  (2011-07-13)
+              support for "info" function for all supported filetypes (SpartanJ)
+      1.31  (2011-06-20)
+              a few more leak fixes, bug in PNG handling (SpartanJ)
+      1.30  (2011-06-11)
+              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+              removed deprecated format-specific test/load functions
+              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+              fix inefficiency in decoding 32-bit BMP (David Woo)
+      1.29  (2010-08-16)
+              various warning fixes from Aurelien Pocheville
+      1.28  (2010-08-01)
+              fix bug in GIF palette transparency (SpartanJ)
+      1.27  (2010-08-01)
+              cast-to-stbi_uc to fix warnings
+      1.26  (2010-07-24)
+              fix bug in file buffering for PNG reported by SpartanJ
+      1.25  (2010-07-17)
+              refix trans_data warning (Won Chun)
+      1.24  (2010-07-12)
+              perf improvements reading from files on platforms with lock-heavy fgetc()
+              minor perf improvements for jpeg
+              deprecated type-specific functions so we'll get feedback if they're needed
+              attempt to fix trans_data warning (Won Chun)
+      1.23    fixed bug in iPhone support
+      1.22  (2010-07-10)
+              removed image *writing* support
+              stbi_info support from Jetro Lauha
+              GIF support from Jean-Marc Lienher
+              iPhone PNG-extensions from James Brown
+              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
+      1.20    added support for Softimage PIC, by Tom Seddon
+      1.19    bug in interlaced PNG corruption check (found by ryg)
+      1.18  (2008-08-02)
+              fix a threading bug (local mutable static)
+      1.17    support interlaced PNG
+      1.16    major bugfix - stbi__convert_format converted one too many pixels
+      1.15    initialize some fields for thread safety
+      1.14    fix threadsafe conversion bug
+              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+      1.13    threadsafe
+      1.12    const qualifiers in the API
+      1.11    Support installable IDCT, colorspace conversion routines
+      1.10    Fixes for 64-bit (don't use "unsigned long")
+              optimized upsampling by Fabian "ryg" Giesen
+      1.09    Fix format-conversion for PSD code (bad global variables!)
+      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+      1.07    attempt to fix C++ warning/errors again
+      1.06    attempt to fix C++ warning/errors again
+      1.05    fix TGA loading to return correct *comp and use good luminance calc
+      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
+      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+      1.02    support for (subset of) HDR files, float interface for preferred access to them
+      1.01    fix bug: possible bug in handling right-side up bmps... not sure
+              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+      1.00    interface to zlib that skips zlib header
+      0.99    correct handling of alpha in palette
+      0.98    TGA loader by lonesock; dynamically add loaders (untested)
+      0.97    jpeg errors on too large a file; also catch another malloc failure
+      0.96    fix detection of invalid v value - particleman@mollyrocket forum
+      0.95    during header scan, seek to markers in case of padding
+      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+      0.93    handle jpegtran output; verbose errors
+      0.92    read 4,8,16,24,32-bit BMP files of several formats
+      0.91    output 24-bit Windows 3.0 BMP files
+      0.90    fix a few more warnings; bump version number to approach 1.0
+      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
+      0.60    fix compiling as c++
+      0.59    fix warnings: merge Dave Moore's -Wall fixes
+      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
+      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+      0.56    fix bug: zlib uncompressed mode len vs. nlen
+      0.55    fix bug: restart_interval not initialized to 0
+      0.54    allow NULL for 'int *comp'
+      0.53    fix bug in png 3->4; speedup png decoding
+      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
+              on 'test' only check type, not whether we support this variant
+      0.50  (2006-11-19)
+              first released version
+*/

+ 1458 - 0
Source/DFPSR/image/stbImage/stb_image_write.h

@@ -0,0 +1,1458 @@
+/* stb_image_write - v1.07 - public domain - http://nothings.org/stb/stb_image_write.h
+   writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
+                                     no warranty implied; use at your own risk
+
+   Before #including,
+
+       #define STB_IMAGE_WRITE_IMPLEMENTATION
+
+   in the file that you want to have the implementation.
+
+   Will probably not work correctly with strict-aliasing optimizations.
+
+ABOUT:
+
+   This header file is a library for writing images to C stdio. It could be
+   adapted to write to memory or a general streaming interface; let me know.
+
+   The PNG output is not optimal; it is 20-50% larger than the file
+   written by a decent optimizing implementation. This library is designed
+   for source code compactness and simplicity, not optimal image file size
+   or run-time performance.
+
+BUILDING:
+
+   You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
+   You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
+   malloc,realloc,free.
+   You can define STBIW_MEMMOVE() to replace memmove()
+
+USAGE:
+
+   There are four functions, one for each image file format:
+
+     int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
+     int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
+     int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
+     int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
+     int stbi_write_jpg(char const *filename, int w, int h, int comp, const float *data);
+
+   There are also four equivalent functions that use an arbitrary write function. You are
+   expected to open/close your file-equivalent before and after calling these:
+
+     int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data, int stride_in_bytes);
+     int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+     int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+     int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
+     int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
+
+   where the callback is:
+      void stbi_write_func(void *context, void *data, int size);
+
+   You can define STBI_WRITE_NO_STDIO to disable the file variant of these
+   functions, so the library will not use stdio.h at all. However, this will
+   also disable HDR writing, because it requires stdio for formatted output.
+
+   Each function returns 0 on failure and non-0 on success.
+
+   The functions create an image file defined by the parameters. The image
+   is a rectangle of pixels stored from left-to-right, top-to-bottom.
+   Each pixel contains 'comp' channels of data stored interleaved with 8-bits
+   per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
+   monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
+   The *data pointer points to the first byte of the top-left-most pixel.
+   For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
+   a row of pixels to the first byte of the next row of pixels.
+
+   PNG creates output files with the same number of components as the input.
+   The BMP format expands Y to RGB in the file format and does not
+   output alpha.
+
+   PNG supports writing rectangles of data even when the bytes storing rows of
+   data are not consecutive in memory (e.g. sub-rectangles of a larger image),
+   by supplying the stride between the beginning of adjacent rows. The other
+   formats do not. (Thus you cannot write a native-format BMP through the BMP
+   writer, both because it is in BGR order and because it may have padding
+   at the end of the line.)
+
+   HDR expects linear float data. Since the format is always 32-bit rgb(e)
+   data, alpha (if provided) is discarded, and for monochrome data it is
+   replicated across all three channels.
+
+   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
+   data, set the global variable 'stbi_write_tga_with_rle' to 0.
+   
+   JPEG does ignore alpha channels in input data; quality is between 1 and 100.
+   Higher quality looks better but results in a bigger image.
+   JPEG baseline (no JPEG progressive).
+
+CREDITS:
+
+   PNG/BMP/TGA
+      Sean Barrett
+   HDR
+      Baldur Karlsson
+   TGA monochrome:
+      Jean-Sebastien Guay
+   misc enhancements:
+      Tim Kelsey
+   TGA RLE
+      Alan Hickman
+   initial file IO callback implementation
+      Emmanuel Julien
+   JPEG
+      Jon Olick (original jo_jpeg.cpp code)
+      Daniel Gibson
+   bugfixes:
+      github:Chribba
+      Guillaume Chereau
+      github:jry2
+      github:romigrou
+      Sergio Gonzalez
+      Jonas Karlsson
+      Filip Wasil
+      Thatcher Ulrich
+      github:poppolopoppo
+      Patrick Boettcher
+      
+LICENSE
+
+  See end of file for license information.
+
+*/
+
+#ifndef INCLUDE_STB_IMAGE_WRITE_H
+#define INCLUDE_STB_IMAGE_WRITE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_IMAGE_WRITE_STATIC
+#define STBIWDEF static
+#else
+#define STBIWDEF extern
+extern int stbi_write_tga_with_rle;
+#endif
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void  *data, int stride_in_bytes);
+STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality);
+#endif
+
+typedef void stbi_write_func(void *context, void *data, int size);
+
+STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data, int stride_in_bytes);
+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void  *data, int quality);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//INCLUDE_STB_IMAGE_WRITE_H
+
+#ifdef STB_IMAGE_WRITE_IMPLEMENTATION
+
+#ifdef _WIN32
+   #ifndef _CRT_SECURE_NO_WARNINGS
+   #define _CRT_SECURE_NO_WARNINGS
+   #endif
+   #ifndef _CRT_NONSTDC_NO_DEPRECATE
+   #define _CRT_NONSTDC_NO_DEPRECATE
+   #endif
+#endif
+
+#ifndef STBI_WRITE_NO_STDIO
+#include <stdio.h>
+#endif // STBI_WRITE_NO_STDIO
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
+// ok
+#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
+#endif
+
+#ifndef STBIW_MALLOC
+#define STBIW_MALLOC(sz)        malloc(sz)
+#define STBIW_REALLOC(p,newsz)  realloc(p,newsz)
+#define STBIW_FREE(p)           free(p)
+#endif
+
+#ifndef STBIW_REALLOC_SIZED
+#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
+#endif
+
+
+#ifndef STBIW_MEMMOVE
+#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
+#endif
+
+
+#ifndef STBIW_ASSERT
+#include <assert.h>
+#define STBIW_ASSERT(x) assert(x)
+#endif
+
+#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
+
+typedef struct
+{
+   stbi_write_func *func;
+   void *context;
+} stbi__write_context;
+
+// initialize a callback-based context
+static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
+{
+   s->func    = c;
+   s->context = context;
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+
+static void stbi__stdio_write(void *context, void *data, int size)
+{
+   fwrite(data,1,size,(FILE*) context);
+}
+
+static int stbi__start_write_file(stbi__write_context *s, const char *filename)
+{
+   FILE *f = fopen(filename, "wb");
+   stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
+   return f != NULL;
+}
+
+static void stbi__end_write_file(stbi__write_context *s)
+{
+   fclose((FILE *)s->context);
+}
+
+#endif // !STBI_WRITE_NO_STDIO
+
+typedef unsigned int stbiw_uint32;
+typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
+
+#ifdef STB_IMAGE_WRITE_STATIC
+static int stbi_write_tga_with_rle = 1;
+#else
+int stbi_write_tga_with_rle = 1;
+#endif
+
+static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
+{
+   while (*fmt) {
+      switch (*fmt++) {
+         case ' ': break;
+         case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
+                     s->func(s->context,&x,1);
+                     break; }
+         case '2': { int x = va_arg(v,int);
+                     unsigned char b[2];
+                     b[0] = STBIW_UCHAR(x);
+                     b[1] = STBIW_UCHAR(x>>8);
+                     s->func(s->context,b,2);
+                     break; }
+         case '4': { stbiw_uint32 x = va_arg(v,int);
+                     unsigned char b[4];
+                     b[0]=STBIW_UCHAR(x);
+                     b[1]=STBIW_UCHAR(x>>8);
+                     b[2]=STBIW_UCHAR(x>>16);
+                     b[3]=STBIW_UCHAR(x>>24);
+                     s->func(s->context,b,4);
+                     break; }
+         default:
+            STBIW_ASSERT(0);
+            return;
+      }
+   }
+}
+
+static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
+{
+   va_list v;
+   va_start(v, fmt);
+   stbiw__writefv(s, fmt, v);
+   va_end(v);
+}
+
+static void stbiw__putc(stbi__write_context *s, unsigned char c)
+{
+   s->func(s->context, &c, 1);
+}
+
+static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
+{
+   unsigned char arr[3];
+   arr[0] = a, arr[1] = b, arr[2] = c;
+   s->func(s->context, arr, 3);
+}
+
+static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
+{
+   unsigned char bg[3] = { 255, 0, 255}, px[3];
+   int k;
+
+   if (write_alpha < 0)
+      s->func(s->context, &d[comp - 1], 1);
+
+   switch (comp) {
+      case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
+      case 1:
+         if (expand_mono)
+            stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
+         else
+            s->func(s->context, d, 1);  // monochrome TGA
+         break;
+      case 4:
+         if (!write_alpha) {
+            // composite against pink background
+            for (k = 0; k < 3; ++k)
+               px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
+            stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
+            break;
+         }
+         /* FALLTHROUGH */
+      case 3:
+         stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
+         break;
+   }
+   if (write_alpha > 0)
+      s->func(s->context, &d[comp - 1], 1);
+}
+
+static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
+{
+   stbiw_uint32 zero = 0;
+   int i,j, j_end;
+
+   if (y <= 0)
+      return;
+
+   if (vdir < 0)
+      j_end = -1, j = y-1;
+   else
+      j_end =  y, j = 0;
+
+   for (; j != j_end; j += vdir) {
+      for (i=0; i < x; ++i) {
+         unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
+         stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
+      }
+      s->func(s->context, &zero, scanline_pad);
+   }
+}
+
+static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
+{
+   if (y < 0 || x < 0) {
+      return 0;
+   } else {
+      va_list v;
+      va_start(v, fmt);
+      stbiw__writefv(s, fmt, v);
+      va_end(v);
+      stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
+      return 1;
+   }
+}
+
+static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
+{
+   int pad = (-x*3) & 3;
+   return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
+           "11 4 22 4" "4 44 22 444444",
+           'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
+            40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
+}
+
+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_bmp_core(&s, x, y, comp, data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_bmp_core(&s, x, y, comp, data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif //!STBI_WRITE_NO_STDIO
+
+static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
+{
+   int has_alpha = (comp == 2 || comp == 4);
+   int colorbytes = has_alpha ? comp-1 : comp;
+   int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
+
+   if (y < 0 || x < 0)
+      return 0;
+
+   if (!stbi_write_tga_with_rle) {
+      return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
+         "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
+   } else {
+      int i,j,k;
+
+      stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
+
+      for (j = y - 1; j >= 0; --j) {
+          unsigned char *row = (unsigned char *) data + j * x * comp;
+         int len;
+
+         for (i = 0; i < x; i += len) {
+            unsigned char *begin = row + i * comp;
+            int diff = 1;
+            len = 1;
+
+            if (i < x - 1) {
+               ++len;
+               diff = memcmp(begin, row + (i + 1) * comp, comp);
+               if (diff) {
+                  const unsigned char *prev = begin;
+                  for (k = i + 2; k < x && len < 128; ++k) {
+                     if (memcmp(prev, row + k * comp, comp)) {
+                        prev += comp;
+                        ++len;
+                     } else {
+                        --len;
+                        break;
+                     }
+                  }
+               } else {
+                  for (k = i + 2; k < x && len < 128; ++k) {
+                     if (!memcmp(begin, row + k * comp, comp)) {
+                        ++len;
+                     } else {
+                        break;
+                     }
+                  }
+               }
+            }
+
+            if (diff) {
+               unsigned char header = STBIW_UCHAR(len - 1);
+               s->func(s->context, &header, 1);
+               for (k = 0; k < len; ++k) {
+                  stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
+               }
+            } else {
+               unsigned char header = STBIW_UCHAR(len - 129);
+               s->func(s->context, &header, 1);
+               stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
+            }
+         }
+      }
+   }
+   return 1;
+}
+
+STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_tga_core(&s, x, y, comp, (void *) data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR writer
+// by Baldur Karlsson
+
+#define stbiw__max(a, b)  ((a) > (b) ? (a) : (b))
+
+void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
+{
+   int exponent;
+   float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
+
+   if (maxcomp < 1e-32f) {
+      rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
+   } else {
+      float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
+
+      rgbe[0] = (unsigned char)(linear[0] * normalize);
+      rgbe[1] = (unsigned char)(linear[1] * normalize);
+      rgbe[2] = (unsigned char)(linear[2] * normalize);
+      rgbe[3] = (unsigned char)(exponent + 128);
+   }
+}
+
+void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
+{
+   unsigned char lengthbyte = STBIW_UCHAR(length+128);
+   STBIW_ASSERT(length+128 <= 255);
+   s->func(s->context, &lengthbyte, 1);
+   s->func(s->context, &databyte, 1);
+}
+
+void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
+{
+   unsigned char lengthbyte = STBIW_UCHAR(length);
+   STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
+   s->func(s->context, &lengthbyte, 1);
+   s->func(s->context, data, length);
+}
+
+void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
+{
+   unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
+   unsigned char rgbe[4];
+   float linear[3];
+   int x;
+
+   scanlineheader[2] = (width&0xff00)>>8;
+   scanlineheader[3] = (width&0x00ff);
+
+   /* skip RLE for images too small or large */
+   if (width < 8 || width >= 32768) {
+      for (x=0; x < width; x++) {
+         switch (ncomp) {
+            case 4: /* fallthrough */
+            case 3: linear[2] = scanline[x*ncomp + 2];
+                    linear[1] = scanline[x*ncomp + 1];
+                    linear[0] = scanline[x*ncomp + 0];
+                    break;
+            default:
+                    linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
+                    break;
+         }
+         stbiw__linear_to_rgbe(rgbe, linear);
+         s->func(s->context, rgbe, 4);
+      }
+   } else {
+      int c,r;
+      /* encode into scratch buffer */
+      for (x=0; x < width; x++) {
+         switch(ncomp) {
+            case 4: /* fallthrough */
+            case 3: linear[2] = scanline[x*ncomp + 2];
+                    linear[1] = scanline[x*ncomp + 1];
+                    linear[0] = scanline[x*ncomp + 0];
+                    break;
+            default:
+                    linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
+                    break;
+         }
+         stbiw__linear_to_rgbe(rgbe, linear);
+         scratch[x + width*0] = rgbe[0];
+         scratch[x + width*1] = rgbe[1];
+         scratch[x + width*2] = rgbe[2];
+         scratch[x + width*3] = rgbe[3];
+      }
+
+      s->func(s->context, scanlineheader, 4);
+
+      /* RLE each component separately */
+      for (c=0; c < 4; c++) {
+         unsigned char *comp = &scratch[width*c];
+
+         x = 0;
+         while (x < width) {
+            // find first run
+            r = x;
+            while (r+2 < width) {
+               if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
+                  break;
+               ++r;
+            }
+            if (r+2 >= width)
+               r = width;
+            // dump up to first run
+            while (x < r) {
+               int len = r-x;
+               if (len > 128) len = 128;
+               stbiw__write_dump_data(s, len, &comp[x]);
+               x += len;
+            }
+            // if there's a run, output it
+            if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
+               // find next byte after run
+               while (r < width && comp[r] == comp[x])
+                  ++r;
+               // output run up to r
+               while (x < r) {
+                  int len = r-x;
+                  if (len > 127) len = 127;
+                  stbiw__write_run_data(s, len, comp[x]);
+                  x += len;
+               }
+            }
+         }
+      }
+   }
+}
+
+static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
+{
+   if (y <= 0 || x <= 0 || data == NULL)
+      return 0;
+   else {
+      // Each component is stored separately. Allocate scratch space for full output scanline.
+      unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
+      int i, len;
+      char buffer[128];
+      char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
+      s->func(s->context, header, sizeof(header)-1);
+
+      len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
+      s->func(s->context, buffer, len);
+
+      for(i=0; i < y; i++)
+         stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*i*x);
+      STBIW_FREE(scratch);
+      return 1;
+   }
+}
+
+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif // STBI_WRITE_NO_STDIO
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PNG writer
+//
+
+// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
+#define stbiw__sbraw(a) ((int *) (a) - 2)
+#define stbiw__sbm(a)   stbiw__sbraw(a)[0]
+#define stbiw__sbn(a)   stbiw__sbraw(a)[1]
+
+#define stbiw__sbneedgrow(a,n)  ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
+#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
+#define stbiw__sbgrow(a,n)  stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
+
+#define stbiw__sbpush(a, v)      (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
+#define stbiw__sbcount(a)        ((a) ? stbiw__sbn(a) : 0)
+#define stbiw__sbfree(a)         ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
+
+static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
+{
+   int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
+   void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
+   STBIW_ASSERT(p);
+   if (p) {
+      if (!*arr) ((int *) p)[1] = 0;
+      *arr = (void *) ((int *) p + 2);
+      stbiw__sbm(*arr) = m;
+   }
+   return *arr;
+}
+
+static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
+{
+   while (*bitcount >= 8) {
+      stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
+      *bitbuffer >>= 8;
+      *bitcount -= 8;
+   }
+   return data;
+}
+
+static int stbiw__zlib_bitrev(int code, int codebits)
+{
+   int res=0;
+   while (codebits--) {
+      res = (res << 1) | (code & 1);
+      code >>= 1;
+   }
+   return res;
+}
+
+static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
+{
+   int i;
+   for (i=0; i < limit && i < 258; ++i)
+      if (a[i] != b[i]) break;
+   return i;
+}
+
+static unsigned int stbiw__zhash(unsigned char *data)
+{
+   stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
+   hash ^= hash << 3;
+   hash += hash >> 5;
+   hash ^= hash << 4;
+   hash += hash >> 17;
+   hash ^= hash << 25;
+   hash += hash >> 6;
+   return hash;
+}
+
+#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
+#define stbiw__zlib_add(code,codebits) \
+      (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
+#define stbiw__zlib_huffa(b,c)  stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
+// default huffman tables
+#define stbiw__zlib_huff1(n)  stbiw__zlib_huffa(0x30 + (n), 8)
+#define stbiw__zlib_huff2(n)  stbiw__zlib_huffa(0x190 + (n)-144, 9)
+#define stbiw__zlib_huff3(n)  stbiw__zlib_huffa(0 + (n)-256,7)
+#define stbiw__zlib_huff4(n)  stbiw__zlib_huffa(0xc0 + (n)-280,8)
+#define stbiw__zlib_huff(n)  ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
+#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
+
+#define stbiw__ZHASH   16384
+
+unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
+{
+   static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
+   static unsigned char  lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,  5,  5,  5,  0 };
+   static unsigned short distc[]   = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
+   static unsigned char  disteb[]  = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
+   unsigned int bitbuf=0;
+   int i,j, bitcount=0;
+   unsigned char *out = NULL;
+   unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**));
+   if (quality < 5) quality = 5;
+
+   stbiw__sbpush(out, 0x78);   // DEFLATE 32K window
+   stbiw__sbpush(out, 0x5e);   // FLEVEL = 1
+   stbiw__zlib_add(1,1);  // BFINAL = 1
+   stbiw__zlib_add(1,2);  // BTYPE = 1 -- fixed huffman
+
+   for (i=0; i < stbiw__ZHASH; ++i)
+      hash_table[i] = NULL;
+
+   i=0;
+   while (i < data_len-3) {
+      // hash next 3 bytes of data to be compressed
+      int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
+      unsigned char *bestloc = 0;
+      unsigned char **hlist = hash_table[h];
+      int n = stbiw__sbcount(hlist);
+      for (j=0; j < n; ++j) {
+         if (hlist[j]-data > i-32768) { // if entry lies within window
+            int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
+            if (d >= best) best=d,bestloc=hlist[j];
+         }
+      }
+      // when hash table entry is too long, delete half the entries
+      if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
+         STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
+         stbiw__sbn(hash_table[h]) = quality;
+      }
+      stbiw__sbpush(hash_table[h],data+i);
+
+      if (bestloc) {
+         // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
+         h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
+         hlist = hash_table[h];
+         n = stbiw__sbcount(hlist);
+         for (j=0; j < n; ++j) {
+            if (hlist[j]-data > i-32767) {
+               int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
+               if (e > best) { // if next match is better, bail on current match
+                  bestloc = NULL;
+                  break;
+               }
+            }
+         }
+      }
+
+      if (bestloc) {
+         int d = (int) (data+i - bestloc); // distance back
+         STBIW_ASSERT(d <= 32767 && best <= 258);
+         for (j=0; best > lengthc[j+1]-1; ++j);
+         stbiw__zlib_huff(j+257);
+         if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
+         for (j=0; d > distc[j+1]-1; ++j);
+         stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
+         if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
+         i += best;
+      } else {
+         stbiw__zlib_huffb(data[i]);
+         ++i;
+      }
+   }
+   // write out final bytes
+   for (;i < data_len; ++i)
+      stbiw__zlib_huffb(data[i]);
+   stbiw__zlib_huff(256); // end of block
+   // pad with 0 bits to byte boundary
+   while (bitcount)
+      stbiw__zlib_add(0,1);
+
+   for (i=0; i < stbiw__ZHASH; ++i)
+      (void) stbiw__sbfree(hash_table[i]);
+   STBIW_FREE(hash_table);
+
+   {
+      // compute adler32 on input
+      unsigned int s1=1, s2=0;
+      int blocklen = (int) (data_len % 5552);
+      j=0;
+      while (j < data_len) {
+         for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1;
+         s1 %= 65521, s2 %= 65521;
+         j += blocklen;
+         blocklen = 5552;
+      }
+      stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
+      stbiw__sbpush(out, STBIW_UCHAR(s2));
+      stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
+      stbiw__sbpush(out, STBIW_UCHAR(s1));
+   }
+   *out_len = stbiw__sbn(out);
+   // make returned pointer freeable
+   STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
+   return (unsigned char *) stbiw__sbraw(out);
+}
+
+static unsigned int stbiw__crc32(unsigned char *buffer, int len)
+{
+   static unsigned int crc_table[256] =
+   {
+      0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+      0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+      0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+      0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+      0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+      0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+      0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+      0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+      0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+      0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+      0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+      0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+      0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+      0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+      0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+      0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+      0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+      0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+      0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+      0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+      0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+      0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+      0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+      0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+      0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+      0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+      0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+      0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+      0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+      0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+      0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+      0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+   };
+
+   unsigned int crc = ~0u;
+   int i;
+   for (i=0; i < len; ++i)
+      crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
+   return ~crc;
+}
+
+#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
+#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
+#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
+
+static void stbiw__wpcrc(unsigned char **data, int len)
+{
+   unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
+   stbiw__wp32(*data, crc);
+}
+
+static unsigned char stbiw__paeth(int a, int b, int c)
+{
+   int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
+   if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
+   if (pb <= pc) return STBIW_UCHAR(b);
+   return STBIW_UCHAR(c);
+}
+
+// @OPTIMIZE: provide an option that always forces left-predict or paeth predict
+unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
+{
+   int ctype[5] = { -1, 0, 4, 2, 6 };
+   unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
+   unsigned char *out,*o, *filt, *zlib;
+   signed char *line_buffer;
+   int i,j,k,p,zlen;
+
+   if (stride_bytes == 0)
+      stride_bytes = x * n;
+
+   filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
+   line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
+   for (j=0; j < y; ++j) {
+      static int mapping[] = { 0,1,2,3,4 };
+      static int firstmap[] = { 0,1,0,5,6 };
+      int *mymap = (j != 0) ? mapping : firstmap;
+      int best = 0, bestval = 0x7fffffff;
+      for (p=0; p < 2; ++p) {
+         for (k= p?best:0; k < 5; ++k) { // @TODO: clarity: rewrite this to go 0..5, and 'continue' the unwanted ones during 2nd pass
+            int type = mymap[k],est=0;
+            unsigned char *z = pixels + stride_bytes*j;
+            for (i=0; i < n; ++i)
+               switch (type) {
+                  case 0: line_buffer[i] = z[i]; break;
+                  case 1: line_buffer[i] = z[i]; break;
+                  case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break;
+                  case 3: line_buffer[i] = z[i] - (z[i-stride_bytes]>>1); break;
+                  case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-stride_bytes],0)); break;
+                  case 5: line_buffer[i] = z[i]; break;
+                  case 6: line_buffer[i] = z[i]; break;
+               }
+            for (i=n; i < x*n; ++i) {
+               switch (type) {
+                  case 0: line_buffer[i] = z[i]; break;
+                  case 1: line_buffer[i] = z[i] - z[i-n]; break;
+                  case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break;
+                  case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-stride_bytes])>>1); break;
+                  case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-stride_bytes], z[i-stride_bytes-n]); break;
+                  case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break;
+                  case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
+               }
+            }
+            if (p) break;
+            for (i=0; i < x*n; ++i)
+               est += abs((signed char) line_buffer[i]);
+            if (est < bestval) { bestval = est; best = k; }
+         }
+      }
+      // when we get here, best contains the filter type, and line_buffer contains the data
+      filt[j*(x*n+1)] = (unsigned char) best;
+      STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
+   }
+   STBIW_FREE(line_buffer);
+   zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, 8); // increase 8 to get smaller but use more memory
+   STBIW_FREE(filt);
+   if (!zlib) return 0;
+
+   // each tag requires 12 bytes of overhead
+   out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
+   if (!out) return 0;
+   *out_len = 8 + 12+13 + 12+zlen + 12;
+
+   o=out;
+   STBIW_MEMMOVE(o,sig,8); o+= 8;
+   stbiw__wp32(o, 13); // header length
+   stbiw__wptag(o, "IHDR");
+   stbiw__wp32(o, x);
+   stbiw__wp32(o, y);
+   *o++ = 8;
+   *o++ = STBIW_UCHAR(ctype[n]);
+   *o++ = 0;
+   *o++ = 0;
+   *o++ = 0;
+   stbiw__wpcrc(&o,13);
+
+   stbiw__wp32(o, zlen);
+   stbiw__wptag(o, "IDAT");
+   STBIW_MEMMOVE(o, zlib, zlen);
+   o += zlen;
+   STBIW_FREE(zlib);
+   stbiw__wpcrc(&o, zlen);
+
+   stbiw__wp32(o,0);
+   stbiw__wptag(o, "IEND");
+   stbiw__wpcrc(&o,0);
+
+   STBIW_ASSERT(o == out + *out_len);
+
+   return out;
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
+{
+   FILE *f;
+   int len;
+   unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len);
+   if (png == NULL) return 0;
+   f = fopen(filename, "wb");
+   if (!f) { STBIW_FREE(png); return 0; }
+   fwrite(png, 1, len, f);
+   fclose(f);
+   STBIW_FREE(png);
+   return 1;
+}
+#endif
+
+STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
+{
+   int len;
+   unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len);
+   if (png == NULL) return 0;
+   func(context, png, len);
+   STBIW_FREE(png);
+   return 1;
+}
+
+
+/* ***************************************************************************
+ *
+ * JPEG writer
+ *
+ * This is based on Jon Olick's jo_jpeg.cpp:
+ * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
+ */
+
+static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
+      24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
+
+static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
+   int bitBuf = *bitBufP, bitCnt = *bitCntP;
+   bitCnt += bs[1];
+   bitBuf |= bs[0] << (24 - bitCnt);
+   while(bitCnt >= 8) {
+      unsigned char c = (bitBuf >> 16) & 255;
+      stbiw__putc(s, c);
+      if(c == 255) {
+         stbiw__putc(s, 0);
+      }
+      bitBuf <<= 8;
+      bitCnt -= 8;
+   }
+   *bitBufP = bitBuf;
+   *bitCntP = bitCnt;
+}
+
+static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
+   float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
+   float z1, z2, z3, z4, z5, z11, z13;
+
+   float tmp0 = d0 + d7;
+   float tmp7 = d0 - d7;
+   float tmp1 = d1 + d6;
+   float tmp6 = d1 - d6;
+   float tmp2 = d2 + d5;
+   float tmp5 = d2 - d5;
+   float tmp3 = d3 + d4;
+   float tmp4 = d3 - d4;
+
+   // Even part
+   float tmp10 = tmp0 + tmp3;   // phase 2
+   float tmp13 = tmp0 - tmp3;
+   float tmp11 = tmp1 + tmp2;
+   float tmp12 = tmp1 - tmp2;
+
+   d0 = tmp10 + tmp11;       // phase 3
+   d4 = tmp10 - tmp11;
+
+   z1 = (tmp12 + tmp13) * 0.707106781f; // c4
+   d2 = tmp13 + z1;       // phase 5
+   d6 = tmp13 - z1;
+
+   // Odd part
+   tmp10 = tmp4 + tmp5;       // phase 2
+   tmp11 = tmp5 + tmp6;
+   tmp12 = tmp6 + tmp7;
+
+   // The rotator is modified from fig 4-8 to avoid extra negations.
+   z5 = (tmp10 - tmp12) * 0.382683433f; // c6
+   z2 = tmp10 * 0.541196100f + z5; // c2-c6
+   z4 = tmp12 * 1.306562965f + z5; // c2+c6
+   z3 = tmp11 * 0.707106781f; // c4
+
+   z11 = tmp7 + z3;      // phase 5
+   z13 = tmp7 - z3;
+
+   *d5p = z13 + z2;         // phase 6
+   *d3p = z13 - z2;
+   *d1p = z11 + z4;
+   *d7p = z11 - z4;
+
+   *d0p = d0;  *d2p = d2;  *d4p = d4;  *d6p = d6;
+}
+
+static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
+   int tmp1 = val < 0 ? -val : val;
+   val = val < 0 ? val-1 : val;
+   bits[1] = 1;
+   while(tmp1 >>= 1) {
+      ++bits[1];
+   }
+   bits[0] = val & ((1<<bits[1])-1);
+}
+
+static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
+   const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
+   const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };
+   int dataOff, i, diff, end0pos;
+   int DU[64];
+
+   // DCT rows
+   for(dataOff=0; dataOff<64; dataOff+=8) {
+      stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]);
+   }
+   // DCT columns
+   for(dataOff=0; dataOff<8; ++dataOff) {
+      stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+8], &CDU[dataOff+16], &CDU[dataOff+24], &CDU[dataOff+32], &CDU[dataOff+40], &CDU[dataOff+48], &CDU[dataOff+56]);
+   }
+   // Quantize/descale/zigzag the coefficients
+   for(i=0; i<64; ++i) {
+      float v = CDU[i]*fdtbl[i];
+      // DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
+      // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway?
+      DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);
+   }
+
+   // Encode DC
+   diff = DU[0] - DC;
+   if (diff == 0) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);
+   } else {
+      unsigned short bits[2];
+      stbiw__jpg_calcBits(diff, bits);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
+   }
+   // Encode ACs
+   end0pos = 63;
+   for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) {
+   }
+   // end0pos = first element in reverse order !=0
+   if(end0pos == 0) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
+      return DU[0];
+   }
+   for(i = 1; i <= end0pos; ++i) {
+      int startpos = i;
+      int nrzeroes;
+      unsigned short bits[2];
+      for (; DU[i]==0 && i<=end0pos; ++i) {
+      }
+      nrzeroes = i-startpos;
+      if ( nrzeroes >= 16 ) {
+         int lng = nrzeroes>>4;
+         int nrmarker;
+         for (nrmarker=1; nrmarker <= lng; ++nrmarker)
+            stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
+         nrzeroes &= 15;
+      }
+      stbiw__jpg_calcBits(DU[i], bits);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
+   }
+   if(end0pos != 63) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
+   }
+   return DU[0];
+}
+
+static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
+   // Constants that don't pollute global namespace
+   static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
+   static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
+   static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
+   static const unsigned char std_ac_luminance_values[] = {
+      0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
+      0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
+      0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
+      0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
+      0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
+      0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
+      0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
+   };
+   static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
+   static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
+   static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
+   static const unsigned char std_ac_chrominance_values[] = {
+      0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
+      0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
+      0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
+      0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
+      0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
+      0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
+      0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
+   };
+   // Huffman tables
+   static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
+   static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
+   static const unsigned short YAC_HT[256][2] = {
+      {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
+   };
+   static const unsigned short UVAC_HT[256][2] = {
+      {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
+   };
+   static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
+                             37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
+   static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
+                              99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
+   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 
+                                 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
+
+   int row, col, i, k;
+   float fdtbl_Y[64], fdtbl_UV[64];
+   unsigned char YTable[64], UVTable[64];
+
+   if(!data || !width || !height || comp > 4 || comp < 1) {
+      return 0;
+   }
+
+   quality = quality ? quality : 90;
+   quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
+   quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
+
+   for(i = 0; i < 64; ++i) {
+      int uvti, yti = (YQT[i]*quality+50)/100;
+      YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
+      uvti = (UVQT[i]*quality+50)/100;
+      UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
+   }
+
+   for(row = 0, k = 0; row < 8; ++row) {
+      for(col = 0; col < 8; ++col, ++k) {
+         fdtbl_Y[k]  = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
+         fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
+      }
+   }
+
+   // Write Headers
+   {
+      static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
+      static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
+      const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
+                                      3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
+      s->func(s->context, (void*)head0, sizeof(head0));
+      s->func(s->context, (void*)YTable, sizeof(YTable));
+      stbiw__putc(s, 1);
+      s->func(s->context, UVTable, sizeof(UVTable));
+      s->func(s->context, (void*)head1, sizeof(head1));
+      s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
+      s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
+      stbiw__putc(s, 0x10); // HTYACinfo
+      s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
+      s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
+      stbiw__putc(s, 1); // HTUDCinfo
+      s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
+      s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
+      stbiw__putc(s, 0x11); // HTUACinfo
+      s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
+      s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
+      s->func(s->context, (void*)head2, sizeof(head2));
+   }
+
+   // Encode 8x8 macroblocks
+   {
+      static const unsigned short fillBits[] = {0x7F, 7};
+      const unsigned char *imageData = (const unsigned char *)data;
+      int DCY=0, DCU=0, DCV=0;
+      int bitBuf=0, bitCnt=0;
+      // comp == 2 is grey+alpha (alpha is ignored)
+      int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
+      int x, y, pos;
+      for(y = 0; y < height; y += 8) {
+         for(x = 0; x < width; x += 8) {
+            float YDU[64], UDU[64], VDU[64];
+            for(row = y, pos = 0; row < y+8; ++row) {
+               for(col = x; col < x+8; ++col, ++pos) {
+                  int p = row*width*comp + col*comp;
+                  float r, g, b;
+                  if(row >= height) {
+                     p -= width*comp*(row+1 - height);
+                  }
+                  if(col >= width) {
+                     p -= comp*(col+1 - width);
+                  }
+
+                  r = imageData[p+0];
+                  g = imageData[p+ofsG];
+                  b = imageData[p+ofsB];
+                  YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128;
+                  UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b;
+                  VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b;
+               }
+            }
+
+            DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT);
+            DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
+            DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
+         }
+      }
+
+      // Do the bit alignment of the EOI marker
+      stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
+   }
+
+   // EOI
+   stbiw__putc(s, 0xFF);
+   stbiw__putc(s, 0xD9);
+
+   return 1;
+}
+
+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
+}
+
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif
+
+#endif // STB_IMAGE_WRITE_IMPLEMENTATION
+
+/* Revision history
+      1.07  (2017-07-24)
+             doc fix
+      1.06 (2017-07-23)
+             writing JPEG (using Jon Olick's code)
+      1.05   ???
+      1.04 (2017-03-03)
+             monochrome BMP expansion
+      1.03   ???
+      1.02 (2016-04-02)
+             avoid allocating large structures on the stack
+      1.01 (2016-01-16)
+             STBIW_REALLOC_SIZED: support allocators with no realloc support
+             avoid race-condition in crc initialization
+             minor compile issues
+      1.00 (2015-09-14)
+             installable file IO function
+      0.99 (2015-09-13)
+             warning fixes; TGA rle support
+      0.98 (2015-04-08)
+             added STBIW_MALLOC, STBIW_ASSERT etc
+      0.97 (2015-01-18)
+             fixed HDR asserts, rewrote HDR rle logic
+      0.96 (2015-01-17)
+             add HDR output
+             fix monochrome BMP
+      0.95 (2014-08-17)
+		       add monochrome TGA output
+      0.94 (2014-05-31)
+             rename private functions to avoid conflicts with stb_image.h
+      0.93 (2014-05-27)
+             warning fixes
+      0.92 (2010-08-01)
+             casts to unsigned char to fix warnings
+      0.91 (2010-07-17)
+             first public release
+      0.90   first internal release
+*/
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of 
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 
+software, either in source code form or as a compiled binary, for any purpose, 
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this 
+software dedicate any and all copyright interest in the software to the public 
+domain. We make this dedication for the benefit of the public at large and to 
+the detriment of our heirs and successors. We intend this dedication to be an 
+overt act of relinquishment in perpetuity of all present and future rights to 
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/

+ 31 - 0
Source/DFPSR/includeFramework.h

@@ -0,0 +1,31 @@
+
+// Header for including the most commonly needed parts of the framework
+
+#ifndef DFPSR_INCLUDED_FRAMEWORK
+#define DFPSR_INCLUDED_FRAMEWORK
+
+	// Needed to use the APIs (exposed value types that will never change)
+	#include "math/includeMath.h"
+	#include "base/text.h"
+
+	// Additional functionality for convenience (not to be used in any API)
+	#include "collection/includeCollection.h" // Safer and easier to use than std collections
+
+	// 2D API
+	#include "api/imageAPI.h" // Creating images and modifying pixels
+	#include "api/drawAPI.h" // Efficient drawing on images
+	// 3D API
+	#include "api/modelAPI.h" // Polygon models for 3D rendering
+	// GUI API
+	#include "api/guiAPI.h" // Handling windows, interfaces and components
+	#include "api/mediaMachineAPI.h" // A machine for running image functions
+	// Convenient API
+	#include "api/timeAPI.h" // Methods for time and delays
+	#include "api/configAPI.h" // Making it easy to load your application's settings from configuration files
+
+	// TODO: Create more APIs
+	#include "gui/VisualTheme.h" // Place in the gui API
+	#include "gui/Font.h" // Place in the gui API
+
+#endif
+

+ 481 - 0
Source/DFPSR/machine/VirtualMachine.cpp

@@ -0,0 +1,481 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "VirtualMachine.h"
+#include "../api/timeAPI.h"
+
+using namespace dsr;
+
+VirtualMachine::VirtualMachine(const ReadableString& code, const std::shared_ptr<PlanarMemory>& memory,
+  const InsSig* machineInstructions, int32_t machineInstructionCount,
+  const VMTypeDef* machineTypes, int32_t machineTypeCount)
+: memory(memory), machineInstructions(machineInstructions), machineInstructionCount(machineInstructionCount),
+  machineTypes(machineTypes), machineTypeCount(machineTypeCount) {
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		printText("Starting media machine.\n");
+	#endif
+	this->methods.pushConstruct(U"<init>", 0, this->machineTypeCount);
+	List<ReadableString> lines = code.split(U'\n');
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		printText("Reading assembly.\n");
+	#endif
+	for (int l = 0; l < lines.length(); l++) {
+		ReadableString currentLine = lines[l];
+		// If the line has a comment, then skip everything from #
+		int commentIndex = currentLine.findFirst(U'#');
+		if (commentIndex > -1) {
+			currentLine = currentLine.before(commentIndex);
+		}
+		currentLine = string_removeOuterWhiteSpace(currentLine);
+		int colonIndex = currentLine.findFirst(U':');
+		if (colonIndex > -1) {
+			ReadableString command = string_removeOuterWhiteSpace(currentLine.before(colonIndex));
+			ReadableString argumentLine = currentLine.after(colonIndex);
+			List<ReadableString> arguments = argumentLine.split(U',');
+			this->interpretMachineWord(command, arguments);
+		} else if (currentLine.length() > 0) {
+			throwError("Unexpected line \"", currentLine, "\".\n");
+		}
+	}
+	// Calling "<init>" to execute global commands
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		printText("Initializing global machine state.\n");
+	#endif
+	this->executeMethod(0);
+}
+
+int VirtualMachine::findMethod(const ReadableString& name) {
+	for (int i = 0; i < this->methods.length(); i++) {
+		if (string_caseInsensitiveMatch(this->methods[i].name, name)) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+Variable* VirtualMachine::getResource(const ReadableString& name, int methodIndex) {
+	Variable* result = this->methods[methodIndex].getLocal(name);
+	if (result) {
+		// If found, take the local variable
+		return result;
+	} else if (methodIndex > 0) {
+		// If not found but having another scope, look for global variables in the global initiation method
+		return getResource(name, 0);
+	} else {
+		return nullptr;
+	}
+}
+
+void VirtualMachine::addMachineWord(MachineOperation operation, const List<VMA>& args) {
+	this->machineWords.pushConstruct(operation, args);
+	this->methods[this->methods.length() - 1].instructionCount++;
+}
+
+void VirtualMachine::addMachineWord(MachineOperation operation) {
+	this->machineWords.pushConstruct(operation);
+	this->methods[this->methods.length() - 1].instructionCount++;
+}
+
+void VirtualMachine::interpretCommand(const ReadableString& operation, const List<VMA>& resolvedArguments) {
+	// Compare the input with overloads
+	for (int s = 0; s < machineInstructionCount; s++) {
+		if (machineInstructions[s].matches(operation, resolvedArguments)) {
+			this->addMachineWord(machineInstructions[s].operation, resolvedArguments);
+			return;
+		}
+	}
+	// TODO: Allow asking the specific machine type what the given types are called.
+	String message = string_combine(U"\nError! ", operation, U" does not match any overload for the given arguments:\n");
+	for (int s = 0; s < machineInstructionCount; s++) {
+		const InsSig* signature = &machineInstructions[s];
+		if (string_caseInsensitiveMatch(signature->name, operation)) {
+			string_append(message, "  * ", signature->name, "(");
+			for (int a = 0; a < signature->arguments.length(); a++) {
+				if (a > 0) {
+					string_append(message, ", ");
+				}
+				const ArgSig* argument = &signature->arguments[a];
+				string_append(message, argument->name);
+			}
+			string_append(message, ")\n");
+		}
+	}
+	throwError(message);
+}
+
+// TODO: Inline into declareVariable
+Variable* VirtualMachine::declareVariable_aux(const VMTypeDef& typeDef, int methodIndex, AccessType access, const ReadableString& name, bool initialize, const ReadableString& defaultValueText) {
+	// Make commonly used data more readable
+	bool global = methodIndex == 0;
+	Method* currentMethod = &this->methods[methodIndex];
+
+	// Assert correctness
+	if (global && (access == AccessType::Input || access == AccessType::Output)) {
+		throwError("Cannot declare inputs or outputs globally!\n");
+	}
+
+	// Count how many variables the method has of each type
+	currentMethod->count[typeDef.dataType]++;
+	this->methods[methodIndex].unifiedLocalIndices[typeDef.dataType].push(this->methods[methodIndex].locals.length());
+	// Count inputs for calling the method
+	if (access == AccessType::Input) {
+		if (this->methods[methodIndex].declaredNonInput) {
+			throwError("Cannot declare input \"", name, "\" after a non-input has been declared. Declare inputs, outputs and locals in order.\n");
+		}
+		this->methods[methodIndex].inputCount++;
+	} else if (access == AccessType::Output) {
+		if (this->methods[methodIndex].declaredLocals) {
+			throwError("Cannot declare output \"", name, "\" after a local has been declared. Declare inputs, outputs and locals in order.\n");
+		}
+		this->methods[methodIndex].outputCount++;
+		this->methods[methodIndex].declaredNonInput = true;
+	} else if (access == AccessType::Hidden) {
+		this->methods[methodIndex].declaredLocals = true;
+		this->methods[methodIndex].declaredNonInput = true;
+	}
+	// Declare the variable so that code may find the type and index by name
+	int typeLocalIndex = currentMethod->count[typeDef.dataType] - 1;
+	int globalIndex = typeLocalToGlobalIndex(global, typeLocalIndex);
+	this->methods[methodIndex].locals.pushConstruct(name, access, &typeDef, typeLocalIndex, global);
+	if (initialize && access != AccessType::Input) {
+		// Generate instructions for assigning the variable's initial value
+		typeDef.initializer(*this, globalIndex, defaultValueText);
+	}
+	return &this->methods[methodIndex].locals.last();
+}
+
+Variable* VirtualMachine::declareVariable(int methodIndex, AccessType access, const ReadableString& typeName, const ReadableString& name, bool initialize, const ReadableString& defaultValueText) {
+	if (this->getResource(name, methodIndex)) {
+		throwError("A resource named \"", name, "\" already exists! Be aware that resource names are case insensitive.\n");
+		return nullptr;
+	} else {
+		// Loop over type definitions to find a match
+		const VMTypeDef* typeDef = getMachineType(typeName);
+		if (typeDef) {
+			if (defaultValueText.length() > 0 && !typeDef->allowDefaultValue) {
+				throwError("The variable \"", name, "\" doesn't have an immediate constructor for \"", typeName, "\".\n");
+			}
+			return this->declareVariable_aux(*typeDef, methodIndex, access, name, initialize, defaultValueText);
+		} else {
+			throwError("Cannot declare variable of unknown type \"", typeName, "\"!\n");
+			return nullptr;
+		}
+	}
+}
+
+VMA VirtualMachine::VMAfromText(int methodIndex, const ReadableString& content) {
+	DsrChar first = content[0];
+	DsrChar second = content[1];
+	if (first == U'-' && second >= U'0' && second <= U'9') {
+		return VMA(FixedPoint::fromText(content));
+	} else if (first >= U'0' && first <= U'9') {
+		return VMA(FixedPoint::fromText(content));
+	} else {
+		int leftIndex = content.findFirst(U'<');
+		int rightIndex = content.findLast(U'>');
+		if (leftIndex > -1 && rightIndex > -1) {
+			ReadableString name = string_removeOuterWhiteSpace(content.before(leftIndex));
+			ReadableString typeName = string_removeOuterWhiteSpace(content.inclusiveRange(leftIndex + 1, rightIndex - 1));
+			ReadableString remainder = string_removeOuterWhiteSpace(content.after(rightIndex));
+			if (remainder.length() > 0) {
+				throwError("No code allowed after > for in-place temp declarations!\n");
+			}
+			Variable* resource = this->declareVariable(methodIndex, AccessType::Hidden, typeName, name, false, U"");
+			if (resource) {
+				return VMA(resource->typeDescription->dataType, resource->getGlobalIndex());
+			} else {
+				throwError("The resource \"", name, "\" could not be declared as \"", typeName, "\"!\n");
+				return VMA(FixedPoint());
+			}
+		} else if (leftIndex > -1) {
+			throwError("Using < without > for in-place temp allocation.\n");
+			return VMA(FixedPoint());
+		} else if (rightIndex > -1) {
+			throwError("Using > without < for in-place temp allocation.\n");
+			return VMA(FixedPoint());
+		} else {
+			Variable* resource = getResource(content, methodIndex);
+			if (resource) {
+				return VMA(resource->typeDescription->dataType, resource->getGlobalIndex());
+			} else {
+				throwError("The resource \"", content, "\" could not be found! Make sure that it's declared before being used.\n");
+				return VMA(FixedPoint());
+			}
+		}
+	}
+}
+
+static ReadableString getArg(const List<ReadableString>& arguments, int32_t index) {
+	if (index < 0 || index >= arguments.length()) {
+		return U"";
+	} else {
+		return string_removeOuterWhiteSpace(arguments[index]);
+	}
+}
+void VirtualMachine::addReturnInstruction() {
+	addMachineWord([](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+		if (memory.callStack.length() > 0) {
+			// Return to caller
+			#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+				printText("Returning from \"", machine.methods[memory.current.methodIndex].name, "\" to caller \"", machine.methods[memory.callStack.last().methodIndex].name, "\"\n");
+				machine.debugPrintMemory();
+			#endif
+			memory.current = memory.callStack.last();
+			memory.callStack.pop();
+			memory.current.programCounter++;
+		} else {
+			#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+				printText("Returning from \"", machine.methods[memory.current.methodIndex].name, "\"\n");
+			#endif
+			// Leave the virtual machine
+			memory.current.programCounter = -1;
+		}
+	});
+}
+void VirtualMachine::addCallInstructions(const List<ReadableString>& arguments) {
+	if (arguments.length() < 1) {
+		throwError("Cannot make a call without the name of a method!\n");
+	}
+	// TODO: Allow calling methods that aren't defined yet.
+	int currentMethodIndex = this->methods.length() - 1;
+	int calledMethodIndex = findMethod(string_removeOuterWhiteSpace(arguments[0]));
+	// Check the total number of arguments
+	Method* calledMethod = &this->methods[calledMethodIndex];
+	if (arguments.length() - 1 != calledMethod->outputCount + calledMethod->inputCount) {
+		throwError("Wrong argument count to \"", calledMethod->name, "\"! Call arguments should start with the method to call, continue with output references and end with inputs.\n");
+	}
+	// Split assembler arguments into separate input and output arguments for machine instructions
+	List<VMA> inputArguments;
+	List<VMA> outputArguments;
+	inputArguments.push(VMA(FixedPoint::fromMantissa(calledMethodIndex)));
+	outputArguments.push(VMA(FixedPoint::fromMantissa(calledMethodIndex)));
+	int outputCount = 0;
+	for (int a = 1; a < arguments.length(); a++) {
+		ReadableString content = string_removeOuterWhiteSpace(arguments[a]);
+		if (content.length() > 0) {
+			if (outputCount < calledMethod->outputCount) {
+				outputArguments.push(this->VMAfromText(currentMethodIndex, getArg(arguments, a)));
+				outputCount++;
+			} else {
+				inputArguments.push(this->VMAfromText(currentMethodIndex, getArg(arguments, a)));
+			}
+		}
+	}
+	// Check types
+	for (int a = 1; a < outputArguments.length(); a++) {
+		// Output
+		Variable* variable = &calledMethod->locals[a - 1 + calledMethod->inputCount];
+		if (outputArguments[a].argType != ArgumentType::Reference) {
+			throwError("Output argument for \"", variable->name, "\" in \"", calledMethod->name, "\" must be a reference to allow writing its result!\n");
+		} else if (outputArguments[a].dataType != variable->typeDescription->dataType) {
+			throwError("Output argument for \"", variable->name, "\" in \"", calledMethod->name, "\" must have the type \"", variable->typeDescription->name, "\"!\n");
+		}
+	}
+	for (int a = 1; a < inputArguments.length(); a++) {
+		// Input
+		Variable* variable = &calledMethod->locals[a - 1];
+		if (inputArguments[a].dataType != variable->typeDescription->dataType) {
+			throwError("Input argument for \"", variable->name, "\" in \"", calledMethod->name, "\" must have the type \"", variable->typeDescription->name, "\"!\n");
+		}
+	}
+	addMachineWord([](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+		// Get the method to call
+		int calledMethodIndex = args[0].value.getMantissa();
+		int oldMethodIndex = memory.current.methodIndex;
+		Method* calledMethod = &machine.methods[calledMethodIndex];
+		#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+			printText("Calling \"", calledMethod->name, "\".\n");
+		#endif
+		// Calculate new frame pointers
+		int32_t newFramePointer[MAX_TYPE_COUNT] = {};
+		int32_t newStackPointer[MAX_TYPE_COUNT] = {};
+		for (int t = 0; t < MAX_TYPE_COUNT; t++) {
+			newFramePointer[t] = memory.current.stackPointer[t];
+			newStackPointer[t] = memory.current.stackPointer[t] + machine.methods[oldMethodIndex].count[t];
+		}
+		// Assign inputs
+		for (int a = 1; a < args.length(); a++) {
+			Variable* target = &calledMethod->locals[a - 1];
+			DataType typeIndex = target->typeDescription->dataType;
+			int targetStackIndex = target->getStackIndex(newFramePointer[typeIndex]);
+			memory.store(targetStackIndex, args[a], memory.current.framePointer[typeIndex], typeIndex);
+		}
+		// Jump into the method
+		memory.callStack.push(memory.current);
+		memory.current.methodIndex = calledMethodIndex;
+		memory.current.programCounter = machine.methods[calledMethodIndex].startAddress;
+		for (int t = 0; t < MAX_TYPE_COUNT; t++) {
+			memory.current.framePointer[t] = newFramePointer[t];
+			memory.current.stackPointer[t] = newStackPointer[t];
+		}
+	}, inputArguments);
+	// Get results from the method
+	addMachineWord([](VirtualMachine& machine, PlanarMemory& memory, const List<VMA>& args) {
+		int calledMethodIndex = args[0].value.getMantissa();
+		Method* calledMethod = &machine.methods[calledMethodIndex];
+		#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+			printText("Writing results after call to \"", calledMethod->name, "\":\n");
+		#endif
+		// Assign outputs
+		for (int a = 1; a < args.length(); a++) {
+			Variable* source = &calledMethod->locals[a - 1 + calledMethod->inputCount];
+			DataType typeIndex = source->typeDescription->dataType;
+			int sourceStackIndex = source->getStackIndex(memory.current.stackPointer[typeIndex]);
+			memory.load(sourceStackIndex, args[a], memory.current.framePointer[typeIndex], typeIndex);
+			#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+				printText("  ");
+				machine.debugArgument(VMA(typeIndex, source->getGlobalIndex()), calledMethodIndex, memory.current.stackPointer, false);
+				printText(" -> ");
+				machine.debugArgument(args[a], memory.current.methodIndex, memory.current.framePointer, false);
+				printText("\n");
+			#endif
+		}
+		// TODO: Decrease reference counts for images by zeroing memory above the new stack-pointer
+		//       Avoiding temporary memory leaks and making sure that no cloning is needed for operations that clone if needed
+		//       Planar memory will receive a new memset operation for a range of stack indices for a given type
+		memory.current.programCounter++;
+		#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+			machine.debugPrintMemory();
+		#endif
+	}, outputArguments);
+}
+
+void VirtualMachine::interpretMachineWord(const ReadableString& command, const List<ReadableString>& arguments) {
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		printText("interpretMachineWord @", this->machineWords.length(), " ", command, "(");
+		for (int a = 0; a < arguments.length(); a++) {
+			if (a > 0) { printText(", "); }
+			printText(getArg(arguments, a));
+		}
+		printText(")\n");
+	#endif
+	if (string_caseInsensitiveMatch(command, U"Begin")) {
+		if (this->methods.length() == 1) {
+			// When more than one function exists, the init method must end with a return instruction
+			//   Otherwise it would start executing instructions in another method and crash
+			this->addReturnInstruction();
+		}
+		this->methods.pushConstruct(getArg(arguments, 0), this->machineWords.length(), this->machineTypeCount);
+	} else if (string_caseInsensitiveMatch(command, U"Temp")) {
+		for (int a = 1; a < arguments.length(); a++) {
+			this->declareVariable(methods.length() - 1, AccessType::Hidden, getArg(arguments, 0), getArg(arguments, a), false, U"");
+		}
+	} else if (string_caseInsensitiveMatch(command, U"Hidden")) {
+		this->declareVariable(methods.length() - 1, AccessType::Hidden, getArg(arguments, 0), getArg(arguments, 1), true, getArg(arguments, 2));
+	} else if (string_caseInsensitiveMatch(command, U"Input")) {
+		this->declareVariable(methods.length() - 1, AccessType::Input, getArg(arguments, 0), getArg(arguments, 1), true, getArg(arguments, 2));
+	} else if (string_caseInsensitiveMatch(command, U"Output")) {
+		this->declareVariable(methods.length() - 1, AccessType::Output, getArg(arguments, 0), getArg(arguments, 1), true, getArg(arguments, 2));
+	} else if (string_caseInsensitiveMatch(command, U"End")) {
+		this->addReturnInstruction();
+	} else if (string_caseInsensitiveMatch(command, U"Call")) {
+		this->addCallInstructions(arguments);
+	} else {
+		int methodIndex = this->methods.length() - 1;
+		List<VMA> resolvedArguments;
+		for (int a = 0; a < arguments.length(); a++) {
+			ReadableString content = string_removeOuterWhiteSpace(arguments[a]);
+			if (content.length() > 0) {
+				resolvedArguments.push(this->VMAfromText(methodIndex, getArg(arguments, a)));
+			}
+		}
+		this->interpretCommand(command, resolvedArguments);
+	}
+}
+
+void VirtualMachine::executeMethod(int methodIndex) {
+	Method* rootMethod = &this->methods[methodIndex];
+
+	#ifdef VIRTUAL_MACHINE_PROFILE
+		if (rootMethod->instructionCount < 1) {
+			// TODO: Assert that each method ends with a return or jump instruction after compiling
+			printText("Cannot call \"", rootMethod->name, "\", because it doesn't have any instructions.\n");
+			return;
+		}
+	#endif
+
+	// Create a new current state
+	this->memory->current.methodIndex = methodIndex;
+	this->memory->current.programCounter = rootMethod->startAddress;
+	for (int t = 0; t < this->machineTypeCount; t++) {
+		int framePointer = this->methods[0].count[t];
+		this->memory->current.framePointer[t] = framePointer;
+		this->memory->current.stackPointer[t] = framePointer + this->methods[methodIndex].count[t];
+	}
+
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		this->debugPrintMemory();
+	#endif
+	#ifdef VIRTUAL_MACHINE_PROFILE
+		printText("Calling \"", rootMethod->name, "\":\n");
+		double startTime = time_getSeconds();
+	#endif
+
+	// Execute until the program counter is out of bound (-1)
+	while (true) {
+		int32_t pc = this->memory->current.programCounter;
+		if (pc < 0 || pc >= this->machineWords.length()) {
+			// Return statements will set the program counter to -1 if there are no more callers saved in the stack
+			if (pc != -1) {
+				throwError("Unexpected program counter! @", pc, " outside of 0..", (this->machineWords.length() - 1), "\n");
+			}
+			break;
+		}
+		MachineWord* word = &this->machineWords[pc];
+		#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+			const InsSig* signature = getMachineInstructionFromFunction(word->operation);
+			if (signature) {
+				printText("Executing @", pc, " ", signature->name, "(");
+				for (int a = signature->targetCount; a < word->args.length(); a++) {
+					if (a > signature->targetCount) {
+						printText(", ");
+					}
+					debugArgument(word->args[a], this->memory->current.methodIndex, this->memory->current.framePointer, false);
+				}
+				printText(")");
+			}
+			word->operation(*this, *(this->memory.get()), word->args);
+			if (signature) {
+				if (signature->targetCount > 0) {
+					printText(" -> ");
+					for (int a = 0; a < signature->targetCount; a++) {
+						if (a > 0) {
+							printText(", ");
+						}
+						debugArgument(word->args[a], this->memory->current.methodIndex, this->memory->current.framePointer, true);
+					}
+				}
+			}
+			printText("\n");
+		#else
+			word->operation(*this, *(this->memory.get()), word->args);
+		#endif
+	}
+	#ifdef VIRTUAL_MACHINE_PROFILE
+		double endTime = time_getSeconds();
+		printText("Done calling \"", rootMethod->name, "\" after ", (endTime - startTime) * 1000000.0, " microseconds.\n");
+		#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+			printText(" (debug prints are active)\n");
+		#endif
+	#endif
+}

+ 424 - 0
Source/DFPSR/machine/VirtualMachine.h

@@ -0,0 +1,424 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_VIRTUAL_MACHINE
+#define DFPSR_VIRTUAL_MACHINE
+
+#include <stdint.h>
+#include "../math/FixedPoint.h"
+#include "../collection/Array.h"
+#include "../collection/List.h"
+
+// Flags
+//#define VIRTUAL_MACHINE_PROFILE // Enable profiling
+//#define VIRTUAL_MACHINE_DEBUG_PRINT // Enable debug printing (will affect profiling)
+//#define VIRTUAL_MACHINE_DEBUG_FULL_CONTENT // Allow debug printing to show the full content of images
+
+namespace dsr {
+
+#define MAX_TYPE_COUNT 4
+
+// Forward declarations
+struct VirtualMachine;
+struct VMTypeDef;
+
+enum class AccessType {
+	Any,
+	Hidden,
+	Input,
+	Output
+};
+static ReadableString getName(AccessType access) {
+	switch(access) {
+		case AccessType::Any:    return U"Any";
+		case AccessType::Hidden: return U"Hidden";
+		case AccessType::Input:  return U"Input";
+		case AccessType::Output: return U"Output";
+		default:                 return U"?";
+	}
+}
+
+// Types used in machine instuctions
+enum class ArgumentType {
+	Unused,
+	Immediate,
+	Reference
+};
+
+// Types
+// TODO: Make the use of FixedPoint optional in VirtualMachine
+using DataType = int32_t;
+static const DataType DataType_FixedPoint = 0;
+
+struct Variable {
+	String name;
+	AccessType access;
+	const VMTypeDef* typeDescription;
+	int32_t typeLocalIndex; // The zero-based local index among the members of the same type in the method
+	bool global; // A flag that generates negative global indices for referring to global variables in method zero
+	Variable(const String& name, AccessType access, const VMTypeDef* typeDescription, int32_t typeLocalIndex, bool global)
+	: name(name), access(access), typeDescription(typeDescription), typeLocalIndex(typeLocalIndex), global(global) {}
+	int32_t getGlobalIndex() {
+		int32_t result = this->global ? (-this->typeLocalIndex - 1) : this->typeLocalIndex;
+		return result;
+	}
+	int32_t getStackIndex(int32_t framePointer) {
+		int32_t result = this->global ? this->typeLocalIndex : this->typeLocalIndex + framePointer;
+		return result;
+	}
+};
+
+// Virtual Machine Argument
+struct VMA {
+	const ArgumentType argType = ArgumentType::Unused;
+	const DataType dataType;
+	const FixedPoint value;
+	explicit VMA(FixedPoint value)
+	: argType(ArgumentType::Immediate), dataType(DataType_FixedPoint), value(value) {}
+	VMA(DataType dataType, int32_t globalIndex)
+	: argType(ArgumentType::Reference), dataType(dataType), value(FixedPoint::fromMantissa(globalIndex)) {}
+};
+
+struct ArgSig {
+	ReadableString name;
+	bool byValue;
+	// TODO: Replace with pointers to type definitions (const VMTypeDef*)
+	DataType dataType;
+	ArgSig(const ReadableString& name, bool byValue, DataType dataType)
+	: name(name), byValue(byValue), dataType(dataType) {}
+	bool matches(ArgumentType argType, DataType dataType) const {
+		if (this->byValue && this->dataType == DataType_FixedPoint) {
+			return dataType == this->dataType && (argType == ArgumentType::Immediate || argType == ArgumentType::Reference);
+		} else {
+			return dataType == this->dataType && argType == ArgumentType::Reference;
+		}
+	}
+};
+
+template <typename T>
+struct MemoryPlane {
+	Array<T> stack;
+	explicit MemoryPlane(int32_t size) : stack(size, T()) {}
+	T& accessByStackIndex(int32_t stackIndex) {
+		return this->stack[stackIndex];
+	}
+	T& accessByGlobalIndex(int32_t globalIndex, int32_t framePointer) {
+		int32_t stackIndex = globalIndex < 0 ? -(globalIndex + 1) : framePointer + globalIndex;
+		return this->stack[stackIndex];
+	}
+	T& getRef(const VMA& arg, int32_t framePointer) {
+		assert(arg.argType == ArgumentType::Reference);
+		return this->accessByGlobalIndex(arg.value.getMantissa(), framePointer);
+	}
+};
+
+struct CallState {
+	int32_t methodIndex = 0;
+	int32_t programCounter = 0;
+	int32_t stackPointer[MAX_TYPE_COUNT] = {};
+	int32_t framePointer[MAX_TYPE_COUNT] = {};
+};
+
+// A planar memory system with one stack and frame pointer for each type of memory.
+//   This is possible because the virtual machine only operates on types known in compile-time.
+//   The planar stack system:
+//     * Removes the need to manually initialize and align classes in generic memory.
+	//     * Encapsulates any effects of endianness or signed integer representations in the physical hardware.
+//       Because there cannot be accidental reintepretation when the type is known in compile-time.
+class PlanarMemory {
+public:
+	CallState current;
+	List<CallState> callStack;
+	virtual ~PlanarMemory() {}
+	// Store in memory
+	virtual void store(int targetStackIndex, const VMA& sourceArg, int sourceFramePointer, DataType type) = 0;
+	// Load from memory
+	virtual void load(int sourceStackIndex, const VMA& targetArg, int targetFramePointer, DataType type) = 0;
+};
+
+// Lambdas without capture is used to create function pointers without objects
+inline void MachineOperationTemplate(VirtualMachine& machine, PlanarMemory&, const List<VMA>& args) {}
+using MachineOperation = decltype(&MachineOperationTemplate);
+
+struct MachineWord {
+	MachineOperation operation;
+	List<VMA> args;
+	MachineWord(MachineOperation operation, const List<VMA>& args)
+	: operation(operation), args(args) {}
+	explicit MachineWord(MachineOperation operation)
+	: operation(operation) {}
+};
+
+struct InsSig {
+public:
+	ReadableString name;
+	int targetCount; // Number of first arguments to present as results
+	List<ArgSig> arguments;
+	MachineOperation operation;
+	InsSig(const ReadableString& name, int targetCount, MachineOperation operation)
+	: name(name), targetCount(targetCount), operation(operation) {}
+private:
+	void addArguments() {}
+	template <typename... ARGS>
+	void addArguments(const ArgSig& head, ARGS... tail) {
+		this->arguments.push(head);
+		this->addArguments(tail...);
+	}
+public:
+	template <typename... ARGS>
+	static InsSig create(const ReadableString& name, int targetCount, MachineOperation operation, ARGS... args) {
+		InsSig result = InsSig(name, targetCount, operation);
+		result.addArguments(args...);
+		return result;
+	}
+	bool matches(const ReadableString& name, List<VMA> resolvedArguments) const {
+		if (resolvedArguments.length() != this->arguments.length()) {
+			return false;
+		} else if (!string_caseInsensitiveMatch(this->name, name)) {
+			return false;
+		} else {
+			for (int i = 0; i < this->arguments.length(); i++) {
+				if (!this->arguments[i].matches(resolvedArguments[i].argType, resolvedArguments[i].dataType)) {
+					return false;
+				}
+			}
+			return true;
+		}
+	}
+};
+
+// Types
+inline void initializeTemplate(VirtualMachine& machine, int globalIndex, const ReadableString& defaultValue) {}
+using VMT_Initializer = decltype(&initializeTemplate);
+inline void debugPrintTemplate(PlanarMemory& memory, Variable& variable, int globalIndex, int32_t* framePointer, bool fullContent) {}
+using VMT_DebugPrinter = decltype(&debugPrintTemplate);
+struct VMTypeDef {
+	ReadableString name;
+	DataType dataType;
+	bool allowDefaultValue;
+	VMT_Initializer initializer;
+	VMT_DebugPrinter debugPrinter;
+	VMTypeDef(const ReadableString& name, DataType dataType, bool allowDefaultValue, VMT_Initializer initializer, VMT_DebugPrinter debugPrinter)
+	: name(name), dataType(dataType), allowDefaultValue(allowDefaultValue), initializer(initializer), debugPrinter(debugPrinter) {}
+};
+
+struct Method {
+	String name;
+
+	// Global instruction space
+	const int32_t startAddress; // Index to machineWords
+	int32_t instructionCount = 0; // Number of machine words (safer than return statements in case of memory corruption)
+
+	// Unified local space
+	int32_t inputCount = 0; // Number of inputs declared at the start of locals
+	int32_t outputCount = 0; // Number of output declared directly after the inputs
+
+	// TODO: Merge into a state
+	bool declaredNonInput = false; // Goes true when a non-input is declared
+	bool declaredLocals = false; // Goes true when a local is declared
+	List<Variable> locals; // locals[0..inputCount-1] are the inputs, while locals[inputCount..inputCount+outputCount-1] are the outputs
+
+	// Type-specific spaces
+	int32_t count[MAX_TYPE_COUNT] = {};
+	// Look-up table from a combination of type and type-local indices to unified-local indices
+	List<int32_t> unifiedLocalIndices[MAX_TYPE_COUNT];
+
+	Method(const String& name, int32_t startAddress, int32_t machineTypeCount) : name(name), startAddress(startAddress) {
+		// Increase MAX_TYPE_COUNT if it's not enough
+		assert(machineTypeCount <= MAX_TYPE_COUNT);
+	}
+	Variable* getLocal(const ReadableString& name) {
+		for (int i = 0; i < this->locals.length(); i++) {
+			if (string_caseInsensitiveMatch(this->locals[i].name, name)) {
+				return &this->locals[i];
+			}
+		}
+		return nullptr;
+	}
+};
+
+// A virtual machine for efficient media processing.
+struct VirtualMachine {
+	// Methods
+	List<Method> methods;
+	// Memory
+	std::shared_ptr<PlanarMemory> memory;
+	// Instruction types
+	const InsSig* machineInstructions; int32_t machineInstructionCount;
+	const InsSig* getMachineInstructionFromFunction(MachineOperation functionPointer) {
+		for (int s = 0; s < this->machineInstructionCount; s++) {
+			if (this->machineInstructions[s].operation == functionPointer) {
+				return &this->machineInstructions[s];
+			}
+		}
+		return nullptr;
+	}
+	// Instruction instances
+	List<MachineWord> machineWords;
+	// Types
+	const VMTypeDef* machineTypes; int32_t machineTypeCount;
+	const VMTypeDef* getMachineType(const ReadableString& name) {
+		for (int s = 0; s < this->machineTypeCount; s++) {
+			if (string_caseInsensitiveMatch(this->machineTypes[s].name, name)) {
+				return &this->machineTypes[s];
+			}
+		}
+		return nullptr;
+	}
+	const VMTypeDef* getMachineType(DataType dataType) {
+		for (int s = 0; s < this->machineTypeCount; s++) {
+			if (this->machineTypes[s].dataType == dataType) {
+				return &this->machineTypes[s];
+			}
+		}
+		return nullptr;
+	}
+	// Constructor
+	VirtualMachine(const ReadableString& code, const std::shared_ptr<PlanarMemory>& memory,
+	  const InsSig* machineInstructions, int32_t machineInstructionCount,
+	  const VMTypeDef* machineTypes, int32_t machineTypeCount);
+
+	int findMethod(const ReadableString& name);
+	Variable* getResource(const ReadableString& name, int methodIndex);
+	/*
+	Indices
+		Global index: (Identifier) The value stores in the mantissas of machine instructions to refer to things
+			These are translated into stack indices for run-time lookups
+			Useful for storing in compile-time when there's no stack nor frame-pointer for mapping to any real memory address
+			Relative to the frame-pointer, so it cannot access anything else then globals (using negative indices) and locals (using natural indices)
+		Stack index: (Pointer) The absolute index of a variable at run-time
+			Indices to the type's own stack in the machine
+			A frame pointer is needed to create them, but the memory of calling methods can be accessed using stack indices
+		Type local index: (Frame-pointer offset) The local index of a variable with a type among the same type
+			Quick at finding a stack index for the type's own stack
+			Useful to store in variables and convert into global and stack indices
+			For compile-time generation and run-time variable access
+		Unified local index: (Variable) The index of a variable's debug information
+			Indices to unifiedLocalIndices in methods
+			Can be used to find the name of the variable for debugging
+			Unlike the type local index, the unified index knows the type
+	*/
+	static int globalToTypeLocalIndex(int globalIndex) {
+		return globalIndex < 0 ? -(globalIndex + 1) : globalIndex;
+	}
+	static int typeLocalToGlobalIndex(bool isGlobal, int typeLocalIndex) {
+		return isGlobal ? -(typeLocalIndex + 1) : typeLocalIndex;
+	}
+
+	void addMachineWord(MachineOperation operation, const List<VMA>& args);
+	void addMachineWord(MachineOperation operation);
+	void addReturnInstruction();
+	void addCallInstructions(const List<ReadableString>& arguments);
+	void interpretCommand(const ReadableString& operation, const List<VMA>& resolvedArguments);
+	Variable* declareVariable_aux(const VMTypeDef& typeDef, int methodIndex, AccessType access, const ReadableString& name, bool initialize, const ReadableString& defaultValueText);
+	Variable* declareVariable(int methodIndex, AccessType access, const ReadableString& type, const ReadableString& name, bool initialize, const ReadableString& defaultValueText);
+	VMA VMAfromText(int methodIndex, const ReadableString& content);
+	void interpretMachineWord(const ReadableString& command, const List<ReadableString>& arguments);
+
+	// Run-time debug printing
+	#ifdef VIRTUAL_MACHINE_DEBUG_PRINT
+		Variable* getDebugInfo(DataType dataType, int globalIndex, int methodIndex) {
+			if (globalIndex < 0) { methodIndex = 0; } // Go to the global method if it's a global index
+			Method* method = &this->methods[methodIndex];
+			int typeLocalIndex = globalToTypeLocalIndex(globalIndex);
+			int unifiedLocalIndex = method->unifiedLocalIndices[dataType][typeLocalIndex];
+			return &(method->locals[unifiedLocalIndex]);
+		}
+		void debugArgument(const VMA& data, int methodIndex, int32_t* framePointer, bool fullContent) {
+			if (data.argType == ArgumentType::Immediate) {
+				printText(data.value);
+			} else {
+				int globalIndex = data.value.getMantissa();
+				Variable* variable = getDebugInfo(data.dataType, globalIndex, methodIndex);
+				const VMTypeDef* typeDefinition = getMachineType(data.dataType);
+				#ifndef VIRTUAL_MACHINE_DEBUG_FULL_CONTENT
+					fullContent = false;
+				#endif
+				if (typeDefinition) {
+					typeDefinition->debugPrinter(*(this->memory.get()), *variable, globalIndex, framePointer, fullContent);
+					if (globalIndex < 0) {
+						printText(" @gi(", globalIndex, ")");
+					} else {
+						printText(" @gi(", globalIndex, ")+fp(", framePointer[typeDefinition->dataType], ")");
+					}
+				} else {
+					printText("?");
+				}
+			}
+		}
+		void debugPrintVariables(int methodIndex, int32_t* framePointer, const ReadableString& indentation) {
+			Method* method = &this->methods[methodIndex];
+			for (int i = 0; i < method->locals.length(); i++) {
+				Variable* variable = &method->locals[i];
+				printText(indentation, "* ", getName(variable->access), " ");
+				const VMTypeDef* typeDefinition = getMachineType(variable->typeDescription->dataType);
+				if (typeDefinition) {
+					typeDefinition->debugPrinter(*(this->memory.get()), *variable, variable->getGlobalIndex(), framePointer, false);
+				} else {
+					printText("?");
+				}
+				printText("\n");
+			}
+		}
+		void debugPrintMethod(int methodIndex, int32_t* framePointer, const ReadableString& indentation) {
+			printText("  ", this->methods[methodIndex].name, ":\n");
+			for (int t = 0; t < this->machineTypeCount; t++) {
+				printText("    FramePointer[", t, "] = ", framePointer[t], " Count[", t, "] = ", this->methods[methodIndex].count[t], "\n");
+			}
+			debugPrintVariables(methodIndex, framePointer, indentation);
+			printText("\n");
+		}
+		void debugPrintMemory() {
+			int methodIndex = this->memory->current.methodIndex;
+			printText("\nMemory:\n");
+			if (methodIndex > 0) {
+				int32_t globalFramePointer[MAX_TYPE_COUNT] = {};
+				debugPrintMethod(0, globalFramePointer, U"    ");
+			}
+			for (int i = 0; i < memory->callStack.length(); i++) {
+				debugPrintMethod(memory->callStack[i].methodIndex, memory->callStack[i].framePointer, U"    ");
+			}
+			debugPrintMethod(methodIndex, this->memory->current.framePointer, U"    ");
+		}
+	#endif
+	void executeMethod(int methodIndex);
+	int32_t getResourceStackIndex(const ReadableString& name, int methodIndex, DataType dataType, AccessType access = AccessType::Any) {
+		Variable* variable = getResource(name, methodIndex);
+		if (variable) {
+			if (variable->typeDescription->dataType != dataType) {
+				throwError("The machine's resource named \"", variable->name, "\" had the unexpected type \"", variable->typeDescription->name, "\"!\n");
+			} else if (access != variable->access && access != AccessType::Any) {
+				throwError("The machine's resource named \"", variable->name, "\" is not delared as \"", getName(access), "\"!\n");
+			} else {
+				return variable->getStackIndex(this->memory->current.framePointer[dataType]);
+			}
+		} else {
+			throwError("The machine cannot find any resource named \"", name, "\"!\n");
+		}
+		return -1;
+	}
+};
+
+}
+
+#endif

+ 270 - 0
Source/DFPSR/machine/mediaFilters.cpp

@@ -0,0 +1,270 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "mediaFilters.h"
+#include "../base/simd.h"
+
+using namespace dsr;
+
+template <typename T, typename U>
+static void assertSameSize(const T& imageL, const U& imageR) {
+	if (!image_exists(imageL) || !image_exists(imageR)) {
+		if (image_exists(imageL)) {
+			// Left side exists, so there's no right side
+			throwError("Media filter: Non-existing right side input image.\n");
+		} else if (image_exists(imageR)) {
+			// Right side exists, so there's no left side
+			throwError("Media filter: Non-existing left side input image.\n");
+		} else {
+			// Neither input exists
+			throwError("Media filter: Non-existing input images.\n");
+		}
+	} else if (image_getWidth(imageL) != image_getWidth(imageR)
+	       || image_getHeight(imageL) != image_getHeight(imageR)) {
+		throwError("Media filter: Taking input images of different dimensions, ", image_getWidth(imageL), "x", image_getHeight(imageL), " and ", image_getWidth(imageR), "x", image_getHeight(imageR), ".\n");
+	}
+}
+
+template <typename T>
+static void assertExisting(const T& image) {
+	if (!image_exists(image)) {
+		throwError("Media filter: Non-existing input image.\n");
+	}
+}
+
+template <typename T>
+static void removeIfShared(T& targetImage) {
+	if (image_useCount(targetImage) > 1) {
+		targetImage = AlignedImageU8();
+	}
+}
+
+template <typename T, typename U>
+static void allocateToSameSize(T& targetImage, const U& inputImage) {
+	if (!image_exists(targetImage) || image_getWidth(targetImage) != image_getWidth(inputImage) || image_getHeight(targetImage) != image_getHeight(inputImage)) {
+		if (!image_exists(inputImage)) {
+			throwError("Media filter: Cannot allocate to size of non-existing input image.\n");
+		}
+		targetImage = image_create_U8(image_getWidth(inputImage), image_getHeight(inputImage));
+	}
+}
+
+void dsr::media_filter_add(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR) {
+	assertSameSize(imageL, imageR);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, imageL);
+	// TODO: Implement U8x16 in simd.h
+	//       readAligned, writeAligned, addSaturated, subtractSaturated...
+	/*for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		const SafePointer<uint8_t> targetRow = imageInternal::getSafeData<uint8_t>(targetImage, y);
+		const SafePointer<uint8_t> sourceRowL = imageInternal::getSafeData<uint8_t>(imageL, y);
+		const SafePointer<uint8_t> sourceRowR = imageInternal::getSafeData<uint8_t>(imageR, y);
+		for (int32_t x = 0; x < image_getWidth(targetImage); x += 4) {
+			ALIGN16 U8x16 colorL = U8x16::readAligned(sourceRowL, "media_filter_add (sourceRowL)");
+			ALIGN16 U8x16 colorR = U8x16::readAligned(sourceRowR, "media_filter_add (sourceRowR)");
+			ALIGN16 U8x16 result = U8x16::addSaturated(colorL, colorR);
+			result.writeAligned(targetRow, "media_filter_add (targetRow)");
+			targetRow += 16;
+			sourceRowL += 16;
+			sourceRowR += 16;
+		}
+	}*/
+	// Reference implementation
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, image_readPixel_clamp(imageL, x, y) + image_readPixel_clamp(imageR, x, y));
+		}
+	}
+}
+
+void dsr::media_filter_add(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar) {
+	assertExisting(image);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, image);
+	// Reference implementation
+	int whole = fixedPoint_round(scalar);
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, image_readPixel_clamp(image, x, y) + whole);
+		}
+	}
+}
+
+void dsr::media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR) {
+	assertSameSize(imageL, imageR);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, imageL);
+	// Reference implementation
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, image_readPixel_clamp(imageL, x, y) - image_readPixel_clamp(imageR, x, y));
+		}
+	}
+}
+
+void dsr::media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar) {
+	assertExisting(image);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, image);
+	// Reference implementation
+	int whole = fixedPoint_round(scalar);
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, image_readPixel_clamp(image, x, y) - whole);
+		}
+	}
+}
+
+void dsr::media_filter_sub(AlignedImageU8& targetImage, FixedPoint scalar, AlignedImageU8 image) {
+	assertExisting(image);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, image);
+	// Reference implementation
+	int whole = fixedPoint_round(scalar);
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, whole - image_readPixel_clamp(image, x, y));
+		}
+	}
+}
+
+void dsr::media_filter_mul(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar) {
+	assertExisting(image);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, image);
+	// Reference implementation
+	int64_t mantissa = scalar.getMantissa();
+	if (mantissa < 0) { mantissa = 0; } // At least zero, because negative clamps to zero
+	if (mantissa > 16711680) { mantissa = 16711680; } // At most 255 whole integers, became more makes no difference
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			image_writePixel(targetImage, x, y, ((int64_t)image_readPixel_clamp(image, x, y) * mantissa) / 65536);
+		}
+	}
+}
+
+void dsr::media_filter_mul(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR, FixedPoint scalar) {
+	assertSameSize(imageL, imageR);
+	removeIfShared(targetImage);
+	allocateToSameSize(targetImage, imageL);
+	// Reference implementation
+	int64_t mantissa = scalar.getMantissa();
+	if (mantissa < 0) { mantissa = 0; } // At least zero, because negative clamps to zero
+	if (mantissa > 16711680) { mantissa = 16711680; } // At most 255 whole integers, became more makes no difference
+	for (int32_t y = 0; y < image_getHeight(targetImage); y++) {
+		for (int32_t x = 0; x < image_getWidth(targetImage); x++) {
+			int32_t result = ((uint64_t)image_readPixel_clamp(imageL, x, y) * (uint64_t)image_readPixel_clamp(imageR, x, y) * mantissa) / 65536;
+			image_writePixel(targetImage, x, y, result);
+		}
+	}
+}
+
+void dsr::media_fade_region_linear(ImageU8& targetImage, const IRect& viewport, FixedPoint x1, FixedPoint y1, FixedPoint luma1, FixedPoint x2, FixedPoint y2, FixedPoint luma2) {
+	assertExisting(targetImage);
+	if (luma1 < 0) { luma1 = FixedPoint::zero(); }
+	if (luma1 > 255) { luma1 = FixedPoint::fromWhole(255); }
+	if (luma2 < 0) { luma2 = FixedPoint::zero(); }
+	if (luma2 > 255) { luma2 = FixedPoint::fromWhole(255); }
+	// Subtracting half a pixel in the fade line is equivalent to adding half a pixel on X and Y
+	int64_t startX = x1.getMantissa() - 32768;
+	int64_t startY = y1.getMantissa() - 32768;
+	int64_t endX = x2.getMantissa() - 32768;
+	int64_t endY = y2.getMantissa() - 32768;
+	int64_t diffX = endX - startX; // x2 - x1 * 65536
+	int64_t diffY = endY - startY; // y2 - y1 * 65536
+	// You don't need to get the linear lengths nor distance.
+	//   By both generating a squared length and using a dot product, no square root is required.
+	//   This is because length(v)² = dot(v, v)
+	int64_t squareLength = ((diffX * diffX) + (diffY * diffY)) / 65536; // length² * 65536
+	if (squareLength < 65536) { squareLength = 65536; } // Prevent overflow
+	int64_t reciprocalSquareLength = 4294967296ll / squareLength; // (1 / length²) * 65536
+	// Calculate ratios for 3 pixels using dot products
+	int64_t offsetX = -startX; // First pixel relative to x1
+	int64_t offsetY = -startY; // First pixel relative to y1
+	int64_t offsetX_right = 65536 - startX; // Right pixel relative to x1
+	int64_t offsetY_down = 65536 - startY; // Down pixel relative to y1
+	int64_t dotProduct = ((offsetX * diffX) + (offsetY * diffY)) / 65536; // dot(offset, diff) * 65536
+	int64_t dotProduct_right = ((offsetX_right * diffX) + (offsetY * diffY)) / 65536; // dot(offsetRight, diff) * 65536
+	int64_t dotProduct_down = ((offsetX * diffX) + (offsetY_down * diffY)) / 65536; // dot(offsetDown, diff) * 65536
+	int64_t startRatio = (dotProduct * reciprocalSquareLength) / 65536; // The color mix ratio at the first pixel in a scale from 0 to 65536
+	int64_t ratioDx = (dotProduct_right * reciprocalSquareLength) / 65536 - startRatio; // The color mix difference when going right
+	int64_t ratioDy = (dotProduct_down * reciprocalSquareLength) / 65536 - startRatio; // The color mix difference when going down
+	// TODO: Optimize the cases where ratioDx == 0 (memset per line) or ratioDy == 0 (memcpy from first line)
+	for (int32_t y = viewport.top(); y < viewport.bottom(); y++) {
+		int64_t ratio = startRatio;
+		for (int32_t x = viewport.left(); x < viewport.right(); x++) {
+			int64_t saturatedRatio = ratio;
+			// TODO: Reuse this code section
+			if (saturatedRatio < 0) { saturatedRatio = 0; }
+			if (saturatedRatio > 65536) { saturatedRatio = 65536; }
+			int64_t mixedColor = ((luma1.getMantissa() * (65536 - ratio)) + (luma2.getMantissa() * ratio) + 2147483648ll) / 4294967296ll;
+			if (mixedColor < 0) { mixedColor = 0; }
+			if (mixedColor > 255) { mixedColor = 255; }
+			// TODO: Write the already saturated result using safe pointers to the target image
+			image_writePixel(targetImage, x, y, mixedColor);
+			ratio += ratioDx;
+		}
+		startRatio += ratioDy;
+	}
+}
+
+void dsr::media_fade_linear(ImageU8& targetImage, FixedPoint x1, FixedPoint y1, FixedPoint luma1, FixedPoint x2, FixedPoint y2, FixedPoint luma2) {
+	media_fade_region_linear(targetImage, image_getBound(targetImage), x1, y1, luma1, x2, y2, luma2);
+}
+
+void dsr::media_fade_region_radial(ImageU8& targetImage, const IRect& viewport, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma) {
+	assertExisting(targetImage);
+	if (innerLuma < 0) { innerLuma = FixedPoint::zero(); }
+	if (innerLuma > 255) { innerLuma = FixedPoint::fromWhole(255); }
+	if (outerLuma < 0) { outerLuma = FixedPoint::zero(); }
+	if (outerLuma > 255) { outerLuma = FixedPoint::fromWhole(255); }
+	// Subtracting half a pixel in the fade line is equivalent to adding half a pixel on X and Y
+	FixedPoint originX = centerX + viewport.left() - FixedPoint::half();
+	FixedPoint originY = centerY + viewport.top() - FixedPoint::half();
+	// Let outerRadius be slightly outside of innerRadius to prevent division by zero
+	if (outerRadius <= innerRadius) {
+		outerRadius = innerRadius + FixedPoint::epsilon();
+	}
+	FixedPoint reciprocalFadeLength = FixedPoint::one() / (outerRadius - innerRadius);
+	for (int32_t y = viewport.top(); y < viewport.bottom(); y++) {
+		for (int32_t x = viewport.left(); x < viewport.right(); x++) {
+			FixedPoint diffX = x - originX;
+			FixedPoint diffY = y - originY;
+			FixedPoint length = fixedPoint_squareRoot((diffX * diffX) + (diffY * diffY));
+			FixedPoint ratio = (length - innerRadius) * reciprocalFadeLength;
+			int64_t saturatedRatio = ratio.getMantissa();
+			// TODO: Reuse this code section
+			if (saturatedRatio < 0) { saturatedRatio = 0; }
+			if (saturatedRatio > 65536) { saturatedRatio = 65536; }
+			int64_t mixedColor = ((innerLuma.getMantissa() * (65536 - ratio.getMantissa())) + (outerLuma.getMantissa() * ratio.getMantissa()) + 2147483648ll) / 4294967296ll;
+			if (mixedColor < 0) { mixedColor = 0; }
+			if (mixedColor > 255) { mixedColor = 255; }
+			// TODO: Write the already saturated result using safe pointers to the target image
+			image_writePixel(targetImage, x, y, mixedColor);
+		}
+	}
+}
+
+void dsr::media_fade_radial(ImageU8& targetImage, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma) {
+	media_fade_region_radial(targetImage, image_getBound(targetImage), centerX, centerY, innerRadius, innerLuma, outerRadius, outerLuma);
+}

+ 59 - 0
Source/DFPSR/machine/mediaFilters.h

@@ -0,0 +1,59 @@
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_MEDIA_FILTERS
+#define DFPSR_MEDIA_FILTERS
+
+#include "../../DFPSR/includeFramework.h" // TODO: Replace with specific modules
+
+namespace dsr {
+
+// Aliasing between a target and input image will increase reference count when input is given,
+// detect target as shared and make a new allocation for the target.
+// In other words, aliasing between input and output cannot be used to reduce the number of allocations.
+
+void media_filter_add(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR);
+void media_filter_add(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar);
+
+void media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR);
+void media_filter_sub(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar);
+void media_filter_sub(AlignedImageU8& targetImage, FixedPoint scalar, AlignedImageU8 image);
+
+void media_filter_mul(AlignedImageU8& targetImage, AlignedImageU8 image, FixedPoint scalar);
+void media_filter_mul(AlignedImageU8& targetImage, AlignedImageU8 imageL, AlignedImageU8 imageR, FixedPoint scalar);
+
+// Fill a region of the image with a linear fade
+void media_fade_region_linear(ImageU8& targetImage, const IRect& viewport, FixedPoint x1, FixedPoint y1, FixedPoint luma1, FixedPoint x2, FixedPoint y2, FixedPoint luma2);
+// Fill the whole image with a linear fade
+void media_fade_linear(ImageU8& targetImage, FixedPoint x1, FixedPoint y1, FixedPoint luma1, FixedPoint x2, FixedPoint y2, FixedPoint luma2);
+
+// Fill a region of the image with a radial fade
+// Pre-condition: innerRadius < outerRadius
+//   outerRadius will silently be reassigned to innerRadius + epsilon if the criteria isn't met
+void media_fade_region_radial(ImageU8& targetImage, const IRect& viewport, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma);
+// Fill the whole image with a radial fade
+void media_fade_radial(ImageU8& targetImage, FixedPoint centerX, FixedPoint centerY, FixedPoint innerRadius, FixedPoint innerLuma, FixedPoint outerRadius, FixedPoint outerLuma);
+
+}
+
+#endif

+ 81 - 0
Source/DFPSR/math/FMatrix2x2.h

@@ -0,0 +1,81 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_FMATRIX2x2
+#define DFPSR_GEOMETRY_FMATRIX2x2
+
+#include <cassert>
+#include "FVector.h"
+
+namespace dsr {
+
+struct FMatrix2x2 {
+	FVector2D xAxis, yAxis;
+	FMatrix2x2() :
+	  xAxis(FVector2D(1.0f, 0.0f)),
+	  yAxis(FVector2D(0.0f, 1.0f)) {}
+	explicit FMatrix2x2(float uniformScale) :
+	  xAxis(FVector2D(uniformScale, 0.0f)),
+	  yAxis(FVector2D(0.0f, uniformScale)) {}
+	FMatrix2x2(const FVector2D &xAxis, const FVector2D &yAxis) :
+	  xAxis(xAxis),
+	  yAxis(yAxis) {}
+	// Transform the a vector by multiplying with the matrix
+	FVector2D transform(const FVector2D &p) const {
+		return FVector2D(
+		  p.x * this->xAxis.x + p.y * this->yAxis.x,
+		  p.x * this->xAxis.y + p.y * this->yAxis.y
+		);
+	}
+	// Transform the a vector by multiplying with the transpose of the matrix
+	// The transpose is the inverse for axis aligned normalized matrices
+	//   Axis aligned: Each non-self axis dot-product equals zero.
+	//   Normalized: The length of each axis equals one.
+	FVector2D transformTransposed(const FVector2D &p) const {
+		return FVector2D(
+		  p.x * this->xAxis.x + p.y * this->xAxis.y,
+		  p.x * this->yAxis.x + p.y * this->yAxis.y
+		);
+	}
+};
+
+inline FMatrix2x2 operator*(const FMatrix2x2 &m, const float &scale) {
+	return FMatrix2x2(m.xAxis * scale, m.yAxis * scale);
+}
+inline FMatrix2x2 operator*(const FMatrix2x2 &left, const FMatrix2x2 &right) {
+	return FMatrix2x2(right.transform(left.xAxis), right.transform(left.yAxis));
+}
+
+inline float determinant(const FMatrix2x2& m) {
+	return m.xAxis.x * m.yAxis.y - m.xAxis.y * m.yAxis.x;
+}
+
+// The full matrix inverse for any matrix where the determinant is not zero
+inline FMatrix2x2 inverse(const FMatrix2x2& m) {
+	return FMatrix2x2(FVector2D(m.yAxis.y, -m.xAxis.y), FVector2D(-m.yAxis.x, m.xAxis.x)) * (1.0f / determinant(m));
+}
+
+}
+
+#endif
+

+ 112 - 0
Source/DFPSR/math/FMatrix3x3.h

@@ -0,0 +1,112 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_FMATRIX3x3
+#define DFPSR_GEOMETRY_FMATRIX3x3
+
+#include <cassert>
+#include "FVector.h"
+
+namespace dsr {
+
+struct FMatrix3x3 {
+	FVector3D xAxis, yAxis, zAxis;
+	FMatrix3x3() :
+	  xAxis(FVector3D(1.0f, 0.0f, 0.0f)),
+	  yAxis(FVector3D(0.0f, 1.0f, 0.0f)),
+	  zAxis(FVector3D(0.0f, 0.0f, 1.0f)) {}
+	explicit FMatrix3x3(float uniformScale) :
+	  xAxis(FVector3D(uniformScale, 0.0f, 0.0f)),
+	  yAxis(FVector3D(0.0f, uniformScale, 0.0f)),
+	  zAxis(FVector3D(0.0f, 0.0f, uniformScale)) {}
+	FMatrix3x3(const FVector3D &xAxis, const FVector3D &yAxis, const FVector3D &zAxis) :
+	  xAxis(xAxis),
+	  yAxis(yAxis),
+	  zAxis(zAxis) {}
+	static FMatrix3x3 makeAxisSystem(const FVector3D &forward, const FVector3D &up) {
+		FMatrix3x3 result;
+	    FVector3D forwardNormalized = normalize(forward);
+		result.zAxis = forwardNormalized;
+		result.xAxis = normalize(crossProduct(normalize(up), forwardNormalized));
+		result.yAxis = normalize(crossProduct(forwardNormalized, result.xAxis));
+		return result;
+	}
+	// Transform the a vector by multiplying with the matrix
+	FVector3D transform(const FVector3D &p) const {
+		return FVector3D(
+		  p.x * this->xAxis.x + p.y * this->yAxis.x + p.z * this->zAxis.x,
+		  p.x * this->xAxis.y + p.y * this->yAxis.y + p.z * this->zAxis.y,
+		  p.x * this->xAxis.z + p.y * this->yAxis.z + p.z * this->zAxis.z
+		);
+	}
+	// Transform the a vector by multiplying with the transpose of the matrix
+	// The transpose is the inverse for axis aligned normalized matrices
+	//   Axis aligned: Each non-self axis dot-product equals zero.
+	//   Normalized: The length of each axis equals one.
+	FVector3D transformTransposed(const FVector3D &p) const {
+		return FVector3D(
+		  p.x * this->xAxis.x + p.y * this->xAxis.y + p.z * this->xAxis.z,
+		  p.x * this->yAxis.x + p.y * this->yAxis.y + p.z * this->yAxis.z,
+		  p.x * this->zAxis.x + p.y * this->zAxis.y + p.z * this->zAxis.z
+		);
+	}
+};
+
+inline FMatrix3x3 operator*(const FMatrix3x3 &m, const float &scale) {
+	return FMatrix3x3(m.xAxis * scale, m.yAxis * scale, m.zAxis * scale);
+}
+inline FMatrix3x3 operator*(const FMatrix3x3 &left, const FMatrix3x3 &right) {
+	return FMatrix3x3(right.transform(left.xAxis), right.transform(left.yAxis), right.transform(left.zAxis));
+}
+
+inline float determinant(const FMatrix3x3& m) {
+	return m.xAxis.x * m.yAxis.y * m.zAxis.z
+	     + m.zAxis.x * m.xAxis.y * m.yAxis.z
+	     + m.yAxis.x * m.zAxis.y * m.xAxis.z
+	     - m.xAxis.x * m.zAxis.y * m.yAxis.z
+	     - m.yAxis.x * m.xAxis.y * m.zAxis.z
+	     - m.zAxis.x * m.yAxis.y * m.xAxis.z;
+}
+
+inline FMatrix3x3 inverseUsingInvDet(const FMatrix3x3& m, float invDet) {
+	FMatrix3x3 result;
+    result.xAxis.x = invDet * (m.yAxis.y * m.zAxis.z - m.yAxis.z * m.zAxis.y);
+    result.xAxis.y = -invDet * (m.xAxis.y * m.zAxis.z - m.xAxis.z * m.zAxis.y);
+    result.xAxis.z = invDet * (m.xAxis.y * m.yAxis.z - m.xAxis.z * m.yAxis.y);
+    result.yAxis.x = -invDet * (m.yAxis.x * m.zAxis.z - m.yAxis.z * m.zAxis.x);
+    result.yAxis.y = invDet * (m.xAxis.x * m.zAxis.z - m.xAxis.z * m.zAxis.x);
+    result.yAxis.z = -invDet * (m.xAxis.x * m.yAxis.z - m.xAxis.z * m.yAxis.x);
+    result.zAxis.x = invDet * (m.yAxis.x * m.zAxis.y - m.yAxis.y * m.zAxis.x);
+    result.zAxis.y = -invDet * (m.xAxis.x * m.zAxis.y - m.xAxis.y * m.zAxis.x);
+    result.zAxis.z = invDet * (m.xAxis.x * m.yAxis.y - m.xAxis.y * m.yAxis.x);
+	return result;
+}
+
+inline FMatrix3x3 inverse(const FMatrix3x3& m) {
+	return inverseUsingInvDet(m, 1.0f / determinant(m));
+}
+
+}
+
+#endif
+

+ 56 - 0
Source/DFPSR/math/FPlane3D.h

@@ -0,0 +1,56 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_FPLANE3D
+#define DFPSR_GEOMETRY_FPLANE3D
+
+#include <math.h>
+#include "FVector.h"
+
+namespace dsr {
+
+struct FPlane3D {
+	FVector3D normal; // The plane's normal facing out
+	float offset; // The plane's translation along the normal
+	FPlane3D() : normal(FVector3D()), offset(0.0f) {}
+	FPlane3D(const FVector3D &normal, float offset) : normal(normalize(normal)), offset(offset) {}
+	// Get the closest distance between the point and the plane
+	// A negative distance is returned if the point is inside
+	float signedDistance(const FVector3D &point) const {
+		return dotProduct(this->normal, point) - this->offset;
+	}
+	bool inside(const FVector3D &point) const {
+		return this->signedDistance(point) <= 0.0f;
+	}
+	// Returns a point on the plane intersecting the line starting at point along direction
+	// Returns +-INF or NaN when there's no point of intersection
+	FVector3D rayIntersect(const FVector3D &point, const FVector3D &direction) {
+		float offset = -(this->offset + dotProduct(this->normal, point)) / dotProduct(this->normal, direction);
+		return point + (direction * offset);
+	}
+};
+
+}
+
+#endif
+

+ 133 - 0
Source/DFPSR/math/FVector.h

@@ -0,0 +1,133 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_FVECTOR
+#define DFPSR_GEOMETRY_FVECTOR
+
+#include "vectorMethods.h"
+
+namespace dsr {
+
+struct FVector2D {
+	VECTOR_BODY_2D(FVector2D, float, 0.0f);
+};
+struct FVector3D {
+	VECTOR_BODY_3D(FVector3D, float, 0.0f);
+};
+struct FVector4D {
+	VECTOR_BODY_4D(FVector4D, float, 0.0f);
+};
+
+OPERATORS_2D(FVector2D, float);
+OPERATORS_3D(FVector3D, float);
+OPERATORS_4D(FVector4D, float);
+SIGNED_OPERATORS_2D(FVector2D, float);
+SIGNED_OPERATORS_3D(FVector3D, float);
+SIGNED_OPERATORS_4D(FVector4D, float);
+SERIALIZATION_2D(FVector2D);
+SERIALIZATION_3D(FVector3D);
+SERIALIZATION_4D(FVector4D);
+
+inline bool operator==(const FVector2D &left, const FVector2D &right) {
+	return fabs(left.x - right.x) < 0.0001f && fabs(left.y - right.y) < 0.0001f;
+}
+inline bool operator==(const FVector3D &left, const FVector3D &right) {
+	return fabs(left.x - right.x) < 0.0001f && fabs(left.y - right.y) < 0.0001f && fabs(left.z - right.z) < 0.0001f;
+}
+inline bool operator==(const FVector4D &left, const FVector4D &right) {
+	return fabs(left.x - right.x) < 0.0001f && fabs(left.y - right.y) < 0.0001f && fabs(left.z - right.z) < 0.0001f && fabs(left.w - right.w) < 0.0001f;
+}
+
+inline bool operator!=(const FVector2D &left, const FVector2D &right) {
+	return !(left == right);
+}
+inline bool operator!=(const FVector3D &left, const FVector3D &right) {
+	return !(left == right);
+}
+inline bool operator!=(const FVector4D &left, const FVector4D &right) {
+	return !(left == right);
+}
+
+inline float dotProduct(const FVector2D &a, const FVector2D &b) {
+	return (a.x * b.x) + (a.y * b.y);
+}
+inline float dotProduct(const FVector3D &a, const FVector3D &b) {
+	return (a.x * b.x) + (a.y * b.y) + (a.z * b.z);
+}
+inline float dotProduct(const FVector4D &a, const FVector4D &b) {
+	return (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + (a.w * b.w);
+}
+
+inline float squareLength(const FVector2D &v) {
+	return v.x * v.x + v.y * v.y;
+}
+inline float squareLength(const FVector3D &v) {
+	return v.x * v.x + v.y * v.y + v.z * v.z;
+}
+inline float squareLength(const FVector4D &v) {
+	return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
+}
+
+inline float length(const FVector2D &v) {
+	return sqrtf(squareLength(v));
+}
+inline float length(const FVector3D &v) {
+	return sqrtf(squareLength(v));
+}
+inline float length(const FVector4D &v) {
+	return sqrtf(squareLength(v));
+}
+
+inline FVector3D crossProduct(const FVector3D &a, const FVector3D &b) {
+	return FVector3D(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+}
+
+inline FVector2D normalize(const FVector2D &v) {
+	float l = length(v);
+	if (l == 0.0f) {
+		return FVector2D(0.0f, 1.0f);
+	} else {
+		return v / length(v);
+	}
+}
+inline FVector3D normalize(const FVector3D &v) {
+	float l = length(v);
+	if (l == 0.0f) {
+		return FVector3D(0.0f, 0.0f, 1.0f);
+	} else {
+		return v / length(v);
+	}
+}
+inline FVector4D normalize(const FVector4D &v) {
+	float l = length(v);
+	if (l == 0.0f) {
+		return FVector4D(0.0f, 0.0f, 0.0f, 1.0f);
+	} else {
+		return v / length(v);
+	}
+}
+
+}
+
+#endif
+

+ 387 - 0
Source/DFPSR/math/FixedPoint.cpp

@@ -0,0 +1,387 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "FixedPoint.h"
+#include <cmath> // Only use the methods guaranteed to be exact, unless an approximation is requested
+
+using namespace dsr;
+
+/* This sum of 0.9999999999999999999 explains why including the 20:th decimal would cause overflow from rounding to closest.
+16602069666338596454 + 1660206966633859645 // 0.9 + 0.09
+18262276632972456099 + 166020696663385965 // 0.99 + 0.009
+18428297329635842064 + 16602069666338596 // 0.999 + 0.0009
+18444899399302180660 + 1660206966633860 // 0.9999 + 0.00009
+18446559606268814520 + 166020696663386 // 0.99999 + 0.000009
+18446725626965477906 + 16602069666339 // 0.999999 + 0.0000009
+18446742229035144245 + 1660206966634 // 0.9999999 + 0.00000009
+18446743889242110879 + 166020696663 // 0.99999999 + 0.000000009
+18446744055262807542 + 16602069666 // 0.999999999 + 0.0000000009
+18446744071864877208 + 1660206967 // 0.9999999999 + 0.00000000009
+18446744073525084175 + 166020697 // 0.99999999999 + 0.000000000009
+18446744073691104872 + 16602070 // 0.999999999999 + 0.0000000000009
+18446744073707706942 + 1660207 // 0.9999999999999 + 0.00000000000009
+18446744073709367149 + 166021 // 0.99999999999999 + 0.000000000000009
+18446744073709533170 + 16602 // 0.999999999999999 + 0.0000000000000009
+18446744073709549772 + 1660 // 0.9999999999999999 + 0.00000000000000009
+18446744073709551432 + 166 // 0.99999999999999999 + 0.000000000000000009
+18446744073709551598 + 17 // 0.999999999999999999 + 0.0000000000000000009
+18446744073709551615     // 0.9999999999999999999
+18446744073709551616    // 1.0
+*/
+
+// Including the 20:th decimal would cause overflow from rounding to closest.
+const int maxDecimals = 19;
+// Each group of 9 values contains the digit fractions for a certain location
+static const uint64_t decimalFractions64[maxDecimals * 9] = {
+	// Calculated using the Wolfram expression "round(18446744073709551616 * 1 / 10)" et cetera...
+	 1844674407370955162ull, // 2^64 * 0.1
+	 3689348814741910323ull, // 2^64 * 0.2
+	 5534023222112865485ull, // 2^64 * 0.3
+	 7378697629483820646ull, // 2^64 * 0.4
+	 9223372036854775808ull, // 2^64 * 0.5
+	11068046444225730970ull, // 2^64 * 0.6
+	12912720851596686131ull, // 2^64 * 0.7
+	14757395258967641293ull, // 2^64 * 0.8
+	16602069666338596454ull, // 2^64 * 0.9
+	 184467440737095516ull, // 2^64 * 0.01
+	 368934881474191032ull, // 2^64 * 0.02
+	 553402322211286548ull, // 2^64 * 0.03
+	 737869762948382065ull, // 2^64 * 0.04
+	 922337203685477581ull, // 2^64 * 0.05
+	1106804644422573097ull, // 2^64 * 0.06
+	1291272085159668613ull, // 2^64 * 0.07
+	1475739525896764129ull, // 2^64 * 0.08
+	1660206966633859645ull, // 2^64 * 0.09
+	 18446744073709552ull, // 2^64 * 0.001
+	 36893488147419103ull, // 2^64 * 0.002
+	 55340232221128655ull, // 2^64 * 0.003
+	 73786976294838206ull, // 2^64 * 0.004
+	 92233720368547758ull, // 2^64 * 0.005
+	110680464442257310ull, // 2^64 * 0.006
+	129127208515966861ull, // 2^64 * 0.007
+	147573952589676413ull, // 2^64 * 0.008
+	166020696663385965ull, // 2^64 * 0.009
+	 1844674407370955ull, // 2^64 * 0.0001
+	 3689348814741910ull, // 2^64 * 0.0002
+	 5534023222112865ull, // 2^64 * 0.0003
+	 7378697629483821ull, // 2^64 * 0.0004
+	 9223372036854776ull, // 2^64 * 0.0005
+	11068046444225731ull, // 2^64 * 0.0006
+	12912720851596686ull, // 2^64 * 0.0007
+	14757395258967641ull, // 2^64 * 0.0008
+	16602069666338596ull, // 2^64 * 0.0009
+	 184467440737096ull, // 2^64 * 0.00001
+	 368934881474191ull, // 2^64 * 0.00002
+	 553402322211287ull, // 2^64 * 0.00003
+	 737869762948382ull, // 2^64 * 0.00004
+	 922337203685478ull, // 2^64 * 0.00005
+	1106804644422573ull, // 2^64 * 0.00006
+	1291272085159669ull, // 2^64 * 0.00007
+	1475739525896764ull, // 2^64 * 0.00008
+	1660206966633860ull, // 2^64 * 0.00009
+	 18446744073710ull, // 2^64 * 0.000001
+	 36893488147419ull, // 2^64 * 0.000002
+	 55340232221129ull, // 2^64 * 0.000003
+	 73786976294838ull, // 2^64 * 0.000004
+	 92233720368548ull, // 2^64 * 0.000005
+	110680464442257ull, // 2^64 * 0.000006
+	129127208515967ull, // 2^64 * 0.000007
+	147573952589676ull, // 2^64 * 0.000008
+	166020696663386ull, // 2^64 * 0.000009
+	 1844674407371ull, // 2^64 * 0.0000001
+	 3689348814742ull, // 2^64 * 0.0000002
+	 5534023222113ull, // 2^64 * 0.0000003
+	 7378697629484ull, // 2^64 * 0.0000004
+	 9223372036855ull, // 2^64 * 0.0000005
+	11068046444226ull, // 2^64 * 0.0000006
+	12912720851597ull, // 2^64 * 0.0000007
+	14757395258968ull, // 2^64 * 0.0000008
+	16602069666339ull, // 2^64 * 0.0000009
+	 184467440737ull, // 2^64 * 0.00000001
+	 368934881474ull, // 2^64 * 0.00000002
+	 553402322211ull, // 2^64 * 0.00000003
+	 737869762948ull, // 2^64 * 0.00000004
+	 922337203685ull, // 2^64 * 0.00000005
+	1106804644423ull, // 2^64 * 0.00000006
+	1291272085160ull, // 2^64 * 0.00000007
+	1475739525897ull, // 2^64 * 0.00000008
+	1660206966634ull, // 2^64 * 0.00000009
+	 18446744074ull, // 2^64 * 0.000000001
+	 36893488147ull, // 2^64 * 0.000000002
+	 55340232221ull, // 2^64 * 0.000000003
+	 73786976295ull, // 2^64 * 0.000000004
+	 92233720369ull, // 2^64 * 0.000000005
+	110680464442ull, // 2^64 * 0.000000006
+	129127208516ull, // 2^64 * 0.000000007
+	147573952590ull, // 2^64 * 0.000000008
+	166020696663ull, // 2^64 * 0.000000009
+	 1844674407ull, // 2^64 * 0.0000000001
+	 3689348815ull, // 2^64 * 0.0000000002
+	 5534023222ull, // 2^64 * 0.0000000003
+	 7378697629ull, // 2^64 * 0.0000000004
+	 9223372037ull, // 2^64 * 0.0000000005
+	11068046444ull, // 2^64 * 0.0000000006
+	12912720852ull, // 2^64 * 0.0000000007
+	14757395259ull, // 2^64 * 0.0000000008
+	16602069666ull, // 2^64 * 0.0000000009
+	 184467441ull, // 2^64 * 0.00000000001
+	 368934881ull, // 2^64 * 0.00000000002
+	 553402322ull, // 2^64 * 0.00000000003
+	 737869763ull, // 2^64 * 0.00000000004
+	 922337204ull, // 2^64 * 0.00000000005
+	1106804644ull, // 2^64 * 0.00000000006
+	1291272085ull, // 2^64 * 0.00000000007
+	1475739526ull, // 2^64 * 0.00000000008
+	1660206967ull, // 2^64 * 0.00000000009
+	 18446744ull, // 2^64 * 0.000000000001
+	 36893488ull, // 2^64 * 0.000000000002
+	 55340232ull, // 2^64 * 0.000000000003
+	 73786976ull, // 2^64 * 0.000000000004
+	 92233720ull, // 2^64 * 0.000000000005
+	110680464ull, // 2^64 * 0.000000000006
+	129127209ull, // 2^64 * 0.000000000007
+	147573953ull, // 2^64 * 0.000000000008
+	166020697ull, // 2^64 * 0.000000000009
+	 1844674ull, // 2^64 * 0.0000000000001
+	 3689349ull, // 2^64 * 0.0000000000002
+	 5534023ull, // 2^64 * 0.0000000000003
+	 7378698ull, // 2^64 * 0.0000000000004
+	 9223372ull, // 2^64 * 0.0000000000005
+	11068046ull, // 2^64 * 0.0000000000006
+	12912721ull, // 2^64 * 0.0000000000007
+	14757395ull, // 2^64 * 0.0000000000008
+	16602070ull, // 2^64 * 0.0000000000009
+	 184467ull, // 2^64 * 0.00000000000001
+	 368935ull, // 2^64 * 0.00000000000002
+	 553402ull, // 2^64 * 0.00000000000003
+	 737870ull, // 2^64 * 0.00000000000004
+	 922337ull, // 2^64 * 0.00000000000005
+	1106805ull, // 2^64 * 0.00000000000006
+	1291272ull, // 2^64 * 0.00000000000007
+	1475740ull, // 2^64 * 0.00000000000008
+	1660207ull, // 2^64 * 0.00000000000009
+	 18447ull, // 2^64 * 0.000000000000001
+	 36893ull, // 2^64 * 0.000000000000002
+	 55340ull, // 2^64 * 0.000000000000003
+	 73787ull, // 2^64 * 0.000000000000004
+	 92234ull, // 2^64 * 0.000000000000005
+	110680ull, // 2^64 * 0.000000000000006
+	129127ull, // 2^64 * 0.000000000000007
+	147574ull, // 2^64 * 0.000000000000008
+	166021ull, // 2^64 * 0.000000000000009
+	 1845ull, // 2^64 * 0.0000000000000001
+	 3689ull, // 2^64 * 0.0000000000000002
+	 5534ull, // 2^64 * 0.0000000000000003
+	 7379ull, // 2^64 * 0.0000000000000004
+	 9223ull, // 2^64 * 0.0000000000000005
+	11068ull, // 2^64 * 0.0000000000000006
+	12913ull, // 2^64 * 0.0000000000000007
+	14757ull, // 2^64 * 0.0000000000000008
+	16602ull, // 2^64 * 0.0000000000000009
+	 184ull, // 2^64 * 0.00000000000000001
+	 369ull, // 2^64 * 0.00000000000000002
+	 553ull, // 2^64 * 0.00000000000000003
+	 738ull, // 2^64 * 0.00000000000000004
+	 922ull, // 2^64 * 0.00000000000000005
+	1107ull, // 2^64 * 0.00000000000000006
+	1291ull, // 2^64 * 0.00000000000000007
+	1476ull, // 2^64 * 0.00000000000000008
+	1660ull, // 2^64 * 0.00000000000000009
+	 18ull, // 2^64 * 0.000000000000000001
+	 37ull, // 2^64 * 0.000000000000000002
+	 55ull, // 2^64 * 0.000000000000000003
+	 74ull, // 2^64 * 0.000000000000000004
+	 92ull, // 2^64 * 0.000000000000000005
+	111ull, // 2^64 * 0.000000000000000006
+	129ull, // 2^64 * 0.000000000000000007
+	148ull, // 2^64 * 0.000000000000000008
+	166ull, // 2^64 * 0.000000000000000009
+	 2ull, // 2^64 * 0.0000000000000000001
+	 4ull, // 2^64 * 0.0000000000000000002
+	 6ull, // 2^64 * 0.0000000000000000003
+	 7ull, // 2^64 * 0.0000000000000000004
+	 9ull, // 2^64 * 0.0000000000000000005
+	11ull, // 2^64 * 0.0000000000000000006
+	13ull, // 2^64 * 0.0000000000000000007
+	15ull, // 2^64 * 0.0000000000000000008
+	17ull, // 2^64 * 0.0000000000000000009
+};
+// Index 0 returns 0.1 in the 64-bit fraction system
+// Index 1 represents 0.01, et cetera
+static const uint64_t getDecimalFraction64(int decimalPosition, int digit) {
+	if (decimalPosition < 0 || decimalPosition >= maxDecimals || digit < 1 || digit > 9) {
+		return 0;
+	} else {
+		return decimalFractions64[(decimalPosition * 9) + (digit - 1)];
+	}
+}
+
+FixedPoint::FixedPoint() : mantissa(0) {}
+
+FixedPoint::FixedPoint(int64_t newMantissa) {
+	clampForInt32(newMantissa);
+	this->mantissa = newMantissa;
+}
+
+FixedPoint FixedPoint::fromWhole(int64_t wholeInteger) {
+	clampForSaturatedWhole(wholeInteger);
+	return FixedPoint(wholeInteger * 65536); // Does this need to saturate again?
+}
+
+FixedPoint FixedPoint::fromMantissa(int64_t mantissa) {
+	return FixedPoint(mantissa);
+}
+
+FixedPoint FixedPoint::fromText(const ReadableString& text) {
+	ReadableString content = string_removeOuterWhiteSpace(text);
+	bool isSigned = content.findFirst(U'-') > -1; // Should also be last
+	int decimal = content.findFirst(U'.');
+	int colon = content.findFirst(U':');
+	int64_t result = 0;
+	if (decimal > -1 && colon == -1) {
+		// Floating-point decimal
+		// TODO: Give warnings for incorrect whole integers
+		int64_t wholeInteger = string_parseInteger(content.before(decimal));
+		ReadableString decimals = content.after(decimal);
+		uint64_t fraction = 0; // Extra high precision for accumulation
+		for (int i = 0; i < decimals.length(); i++) {
+			DsrChar digit = decimals[i];
+			if (digit >= U'1' && digit <= U'9') {
+				fraction += getDecimalFraction64(i, digit - U'0');
+			} // else if (digit != U'0') // TODO: Give warnings for any non-digit characters.
+		}
+		// Truncate the fraction down to 32-bits before safely rounding to closest 16-bit fraction
+		int64_t signedFraction = ((fraction >> 32) + 32768) >> 16; // Convert to closest 16-bit fraction
+		if (isSigned) { signedFraction = -signedFraction; }
+		result = (wholeInteger * 65536) + signedFraction; // Does this need to saturate again?
+	} else if (decimal == -1 && colon > -1) {
+		// Whole integer and 16-bit fraction
+		// TODO: Give warnings for incorrect integers
+		int64_t wholeInteger = string_parseInteger(content.before(colon));
+		int64_t fraction = string_parseInteger(content.after(colon));
+		clampForSaturatedWhole(wholeInteger);
+		if (isSigned) { fraction = -fraction; }
+		result = (wholeInteger * 65536) + fraction;
+	} else if (decimal == -1 && colon == -1) {
+		// Whole
+		int64_t wholeInteger = string_parseInteger(content);
+		clampForSaturatedWhole(wholeInteger);
+		result = wholeInteger * 65536; // Does this need to saturate again?
+	} // TODO: Give a warning if both . and : is used!
+	return FixedPoint(result);
+}
+
+FixedPoint FixedPoint::zero() {
+	return FixedPoint(0);
+}
+FixedPoint FixedPoint::epsilon() {
+	return FixedPoint(1);
+}
+FixedPoint FixedPoint::half() {
+	return FixedPoint(32768);
+}
+FixedPoint FixedPoint::one() {
+	return FixedPoint(65536);
+}
+
+int32_t dsr::fixedPoint_round(const FixedPoint& value) {
+	int64_t mantissa = value.getMantissa();
+	int32_t offset = mantissa >= 0 ? 32768 : -32768;
+	return (mantissa + offset) / 65536;
+}
+
+double dsr::fixedPoint_approximate(const FixedPoint& value) {
+	return ((double)value.getMantissa()) * (1.0 / 65536.0);
+}
+
+String& dsr::string_toStreamIndented(String& target, const FixedPoint& value, const ReadableString& indentation) {
+	// TODO: Make own fixed-point serialization which cannot resort to scientific notation
+	string_append(target, indentation, fixedPoint_approximate(value));
+	return target;
+}
+
+FixedPoint dsr::fixedPoint_min(const FixedPoint &left, const FixedPoint &right) {
+	int64_t result = left.getMantissa();
+	int64_t other = right.getMantissa();
+	if (other < result) result = other;
+	return FixedPoint(result);
+}
+
+FixedPoint dsr::fixedPoint_max(const FixedPoint &left, const FixedPoint &right) {
+	int64_t result = left.getMantissa();
+	int64_t other = right.getMantissa();
+	if (other > result) result = other;
+	return FixedPoint(result);
+}
+
+FixedPoint dsr::fixedPoint_divide(const FixedPoint &left, const FixedPoint &right) {
+	int64_t mantissa = 0;
+	if (right.getMantissa() == 0) {
+		if (left.getMantissa() > 0) {
+			mantissa = 2147483647; // Saturate from positive infinity
+		} else if (left.getMantissa() < 0) {
+			mantissa = -2147483648; // Saturate from negative infinity
+		}
+	} else {
+		mantissa = (left.getMantissa() * 65536) / right.getMantissa();
+	}
+	return FixedPoint(mantissa);
+}
+FixedPoint dsr::fixedPoint_divide(const FixedPoint &left, int64_t right) {
+	int64_t mantissa = 0;
+	if (right == 0) {
+		if (left.getMantissa() > 0) {
+			mantissa = 2147483647; // Saturate from positive infinity
+		} else if (left.getMantissa() < 0) {
+			mantissa = -2147483648; // Saturate from negative infinity
+		}
+	} else {
+		mantissa = left.getMantissa() / right;
+	}
+	return FixedPoint(mantissa);
+}
+
+// 48-bit to 24-bit unsigned integer square root.
+//   Returns the root of square rounded down.
+static uint64_t integer_squareRoot_U48(uint64_t square) {
+	// Even thou a double is used, the C++ standard guarantees exact results.
+	// Source: https://en.cppreference.com/w/cpp/numeric/math/sqrt
+	//   "std::sqrt is required by the IEEE standard to be exact.
+	//    The only other operations required to be exact are the arithmetic
+	//    operators and the function std::fma. After rounding to the return
+	//    type (using default rounding mode), the result of std::sqrt is
+	//    indistinguishable from the infinitely precise result.
+	//    In other words, the error is less than 0.5 ulp."
+	return (uint64_t)(std::sqrt((double)square));
+}
+
+FixedPoint dsr::fixedPoint_squareRoot(const FixedPoint& value) {
+	int64_t mantissa = value.getMantissa();
+	if (mantissa <= 0) {
+		// The real part of 0 + i * sqrt(value) is always zero
+		return FixedPoint(0);
+	} else {
+		return FixedPoint(integer_squareRoot_U48(((uint64_t)mantissa) << 16));
+	}
+}

+ 184 - 0
Source/DFPSR/math/FixedPoint.h

@@ -0,0 +1,184 @@
+
+// zlib open source license
+//
+// Copyright (c) 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_FIXED_POINT
+#define DFPSR_FIXED_POINT
+
+#include "../base/text.h"
+
+namespace dsr {
+
+// One extra unit in early clamping allows fractions to extend the range further
+// int16_t goes from -32768 to +32767, but when having additional fractions, one can get close to the -32769 to 32768 range
+inline void clampForSaturatedWhole(int64_t& value) {
+	if (value > 32768) { value = 32768; }
+	if (value < -32769) { value = -32769; }
+}
+inline void clampForInt32(int64_t& value) {
+	if (value > 2147483647) { value = 2147483647; }
+	if (value < -2147483648) { value = -2147483648; }
+}
+
+// A deterministic saturated fixed point number for graphics and virtual machines.
+//   Uses 16-bits for whole signed integers and 16-bits for the remaining 1/65536 fractions.
+//   The fromMantissa constructor can be used to store 32-bit indices directly in the mantissa.
+//     If used as a value, the index is taken as 1/65536 fractions.
+//     Retreive correctly using getMantissa.
+//   Default initialized to zero for convenience.
+struct FixedPoint {
+private:
+	int32_t mantissa = 0;
+public:
+	FixedPoint();
+	// TODO: Can comparisons use an implicit conversion from whole integers to reduce complexity?
+	explicit FixedPoint(int64_t newMantissa);
+	static FixedPoint fromWhole(int64_t wholeInteger);
+	static FixedPoint fromMantissa(int64_t mantissa);
+	static FixedPoint zero();
+	static FixedPoint epsilon();
+	static FixedPoint half();
+	static FixedPoint one();
+	static FixedPoint fromText(const ReadableString& content);
+	inline int64_t getMantissa() const {
+		return (int64_t)this->mantissa;
+	}
+};
+
+String& string_toStreamIndented(String& target, const FixedPoint& value, const ReadableString& indentation);
+
+// Addition and subtraction is faster against its own type, by being in the same scale
+inline FixedPoint operator+(const FixedPoint &left, const FixedPoint &right) {
+	return FixedPoint(left.getMantissa() + right.getMantissa());
+}
+inline FixedPoint operator+(const FixedPoint &left, int32_t right) {
+	return FixedPoint(left.getMantissa() + (right * 65536));
+}
+inline FixedPoint operator+(int32_t left, const FixedPoint &right) {
+	return FixedPoint((left * 65536) + right.getMantissa());
+}
+inline FixedPoint operator-(const FixedPoint &left, const FixedPoint &right) {
+	return FixedPoint(left.getMantissa() - right.getMantissa());
+}
+inline FixedPoint operator-(const FixedPoint &left, int32_t right) {
+	return FixedPoint(left.getMantissa() - (right * 65536));
+}
+inline FixedPoint operator-(int32_t left, const FixedPoint &right) {
+	return FixedPoint((left * 65536) - right.getMantissa());
+}
+
+// Multiplication is faster against whole integers, by not having to reduce the result
+inline FixedPoint operator*(const FixedPoint &left, const FixedPoint &right) {
+	return FixedPoint((left.getMantissa() * right.getMantissa()) / 65536);
+}
+inline FixedPoint operator*(const FixedPoint &left, int64_t right) {
+	clampForSaturatedWhole(right);
+	return FixedPoint(left.getMantissa() * right);
+}
+inline FixedPoint operator*(int64_t left, const FixedPoint &right) {
+	clampForSaturatedWhole(left);
+	return FixedPoint(left * right.getMantissa());
+}
+
+int32_t fixedPoint_round(const FixedPoint& value);
+double fixedPoint_approximate(const FixedPoint& value);
+
+FixedPoint fixedPoint_min(const FixedPoint &left, const FixedPoint &right);
+FixedPoint fixedPoint_max(const FixedPoint &left, const FixedPoint &right);
+FixedPoint fixedPoint_divide(const FixedPoint &left, const FixedPoint &right);
+FixedPoint fixedPoint_divide(const FixedPoint &left, int64_t right);
+inline FixedPoint operator/(const FixedPoint &left, const FixedPoint &right) {
+	return fixedPoint_divide(left, right);
+}
+inline FixedPoint operator/(const FixedPoint &left, int64_t right) {
+	return fixedPoint_divide(left, right);
+}
+inline FixedPoint operator/(int64_t left, const FixedPoint &right) {
+	return fixedPoint_divide(FixedPoint::fromWhole(left), right);
+}
+
+// Gets the real element of value's square root.
+//   Because square roots of negative numbers are only using the imaginary dimension, this results in zero for all non-positive inputs.
+FixedPoint fixedPoint_squareRoot(const FixedPoint& value);
+
+inline bool operator==(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() == right.getMantissa();
+}
+inline bool operator==(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() == right * 65536;
+}
+inline bool operator==(int64_t left, const FixedPoint &right) {
+	return left * 65536 == right.getMantissa();
+}
+inline bool operator!=(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() != right.getMantissa();
+}
+inline bool operator!=(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() != right * 65536;
+}
+inline bool operator!=(int64_t left, const FixedPoint &right) {
+	return left * 65536 != right.getMantissa();
+}
+inline bool operator>(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() > right.getMantissa();
+}
+inline bool operator>(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() > right * 65536;
+}
+inline bool operator>(int64_t left, const FixedPoint &right) {
+	return left * 65536 > right.getMantissa();
+}
+inline bool operator<(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() < right.getMantissa();
+}
+inline bool operator<(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() < right * 65536;
+}
+inline bool operator<(int64_t left, const FixedPoint &right) {
+	return left * 65536 < right.getMantissa();
+}
+inline bool operator>=(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() >= right.getMantissa();
+}
+inline bool operator>=(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() >= right * 65536;
+}
+inline bool operator>=(int64_t left, const FixedPoint &right) {
+	return left * 65536 >= right.getMantissa();
+}
+inline bool operator<=(const FixedPoint &left, const FixedPoint &right) {
+	return left.getMantissa() <= right.getMantissa();
+}
+inline bool operator<=(const FixedPoint &left, int64_t right) {
+	return left.getMantissa() <= right * 65536;
+}
+inline bool operator<=(int64_t left, const FixedPoint &right) {
+	return left * 65536 <= right.getMantissa();
+}
+
+// TODO: Equality and other comparisons
+
+}
+
+#endif
+

+ 110 - 0
Source/DFPSR/math/IRect.h

@@ -0,0 +1,110 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_IRECT
+#define DFPSR_GEOMETRY_IRECT
+
+#include <stdint.h>
+#include <math.h>
+#include "IVector.h"
+
+namespace dsr {
+
+class IRect {
+private:
+	int32_t l, t, w, h;
+public:
+	IRect() : l(0), t(0), w(0), h(0) {}
+	IRect(int32_t left, int32_t top, int32_t width, int32_t height) : l(left), t(top), w(width), h(height) {}
+public:
+	int32_t left() const { return this->l; }
+	int32_t top() const { return this->t; }
+	int32_t width() const { return this->w; }
+	int32_t height() const { return this->h; }
+	int32_t right() const { return this->l + this->w; }
+	int32_t bottom() const { return this->t + this->h; }
+	IVector2D size() const { return IVector2D(this->w, this->h); }
+	int32_t area() const { return this->w * this->h; }
+	IVector2D upperLeft() const { return IVector2D(this->l, this->t); }
+	IVector2D upperRight() const { return IVector2D(this->l + this->w, this->t); }
+	IVector2D lowerLeft() const { return IVector2D(this->l, this->t + this->h); }
+	IVector2D lowerRight() const { return IVector2D(this->l + this->w, this->t + this->h); }
+	bool hasArea() const { return this->w > 0 && this->h > 0; }
+	IRect expanded(int units) const { return IRect(this->l - units, this->t - units, this->w + units * 2, this->h + units * 2); }
+	// Returns the intersection between a and b or a rectangle that has no area if overlaps(a, b) is false
+	static IRect cut(const IRect &a, const IRect &b) {
+		int32_t left = std::max(a.left(), b.left());
+		int32_t top = std::max(a.top(), b.top());
+		int32_t right = std::min(a.right(), b.right());
+		int32_t bottom = std::min(a.bottom(), b.bottom());
+		return IRect(left, top, right - left, bottom - top);
+	}
+	// Returns true iff the rectangles have an overlapping area
+	// Equivalent to hasArea(a * b)
+	static bool overlaps(const IRect& a, const IRect& b) {
+		return a.left() < b.right() && a.right() > b.left() && a.top() < b.bottom() && a.bottom() > b.top();
+	}
+	// Returns true iff the rectangles touches
+	static inline bool touches(const IRect& a, const IRect& b) {
+		return a.left() <= b.right() && a.right() >= b.left() && a.top() <= b.bottom() && a.bottom() >= b.top();
+	}
+	// Create the rectangle from exclusive intervals
+	static IRect FromBounds(int32_t left, int32_t top, int32_t right, int32_t bottom) {
+		return IRect(left, top, right - left, bottom - top);
+	}
+	// Create the rectangle from a size
+	static IRect FromSize(int32_t width, int32_t height) {
+		return IRect(0, 0, width, height);
+	}
+	static IRect FromSize(IVector2D size) {
+		return IRect(0, 0, size.x, size.y);
+	}
+};
+
+// Move without resizing
+inline IRect operator+(const IRect &old, const IVector2D &offset) {
+	return IRect(old.left() + offset.x, old.top() + offset.y, old.width(), old.height());
+}
+
+// Scale everything around origin
+inline IRect operator*(const IRect &old, int32_t scalar) {
+	return IRect(old.left() * scalar, old.top() * scalar, old.width() * scalar, old.height() * scalar);
+}
+
+// Check equality
+inline bool operator==(const IRect &a, const IRect &b) {
+	return a.left() == b.left() && a.top() == b.top() && a.width() == b.width() && a.height() == b.height();
+}
+inline bool operator!=(const IRect &a, const IRect &b) {
+	return !(a == b);
+}
+
+inline String& string_toStreamIndented(String& target, const IRect& source, const ReadableString& indentation) {
+	string_append(target, indentation, U"(", source.left(), U",", source.top(), U",", source.width(), U",", source.height(), U")");
+	return target;
+}
+
+}
+
+#endif
+

+ 71 - 0
Source/DFPSR/math/IVector.h

@@ -0,0 +1,71 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_IVECTOR
+#define DFPSR_GEOMETRY_IVECTOR
+
+#include "vectorMethods.h"
+
+namespace dsr {
+
+struct IVector2D {
+	VECTOR_BODY_2D(IVector2D, int32_t, 0);
+};
+struct IVector3D {
+	VECTOR_BODY_3D(IVector3D, int32_t, 0);
+};
+struct IVector4D {
+	VECTOR_BODY_4D(IVector4D, int32_t, 0);
+};
+
+inline int32_t dotProduct(const IVector2D &a, const IVector2D &b) {
+	return (a.x * b.x) + (a.y * b.y);
+}
+inline int32_t dotProduct(const IVector3D &a, const IVector3D &b) {
+	return (a.x * b.x) + (a.y * b.y) + (a.z * b.z);
+}
+inline int32_t dotProduct(const IVector4D &a, const IVector4D &b) {
+	return (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + (a.w * b.w);
+}
+
+inline IVector3D crossProduct(const IVector3D &a, const IVector3D &b) {
+	return IVector3D(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+}
+
+OPERATORS_2D(IVector2D, int32_t);
+OPERATORS_3D(IVector3D, int32_t);
+OPERATORS_4D(IVector4D, int32_t);
+SIGNED_OPERATORS_2D(IVector2D, int32_t);
+SIGNED_OPERATORS_3D(IVector3D, int32_t);
+SIGNED_OPERATORS_4D(IVector4D, int32_t);
+EXACT_COMPARE_2D(IVector2D);
+EXACT_COMPARE_3D(IVector3D);
+EXACT_COMPARE_4D(IVector4D);
+SERIALIZATION_2D(IVector2D);
+SERIALIZATION_3D(IVector3D);
+SERIALIZATION_4D(IVector4D);
+
+}
+
+#endif
+

+ 57 - 0
Source/DFPSR/math/LVector.h

@@ -0,0 +1,57 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_LVECTOR
+#define DFPSR_GEOMETRY_LVECTOR
+
+#include "vectorMethods.h"
+
+namespace dsr {
+
+struct LVector2D {
+	VECTOR_BODY_2D(LVector2D, int64_t, 0);
+};
+struct LVector3D {
+	VECTOR_BODY_3D(LVector3D, int64_t, 0);
+};
+struct LVector4D {
+	VECTOR_BODY_4D(LVector4D, int64_t, 0);
+};
+
+OPERATORS_2D(LVector2D, int64_t);
+OPERATORS_3D(LVector3D, int64_t);
+OPERATORS_4D(LVector4D, int64_t);
+SIGNED_OPERATORS_2D(LVector2D, int64_t);
+SIGNED_OPERATORS_3D(LVector3D, int64_t);
+SIGNED_OPERATORS_4D(LVector4D, int64_t);
+EXACT_COMPARE_2D(LVector2D);
+EXACT_COMPARE_3D(LVector3D);
+EXACT_COMPARE_4D(LVector4D);
+SERIALIZATION_2D(LVector2D);
+SERIALIZATION_3D(LVector3D);
+SERIALIZATION_4D(LVector4D);
+
+}
+
+#endif
+

+ 91 - 0
Source/DFPSR/math/Transform3D.h

@@ -0,0 +1,91 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_TRANSFORM3D
+#define DFPSR_GEOMETRY_TRANSFORM3D
+
+#include "FVector.h"
+#include "FMatrix3x3.h"
+
+namespace dsr {
+
+class Transform3D {
+public:
+	FVector3D position;
+	FMatrix3x3 transform;
+	Transform3D() : position(0.0f, 0.0f, 0.0f), transform(FVector3D(1.0f, 0.0f, 0.0f), FVector3D(0.0f, 1.0f, 0.0f), FVector3D(0.0f, 0.0f, 1.0f)) {}
+	Transform3D(const FVector3D &position, const FMatrix3x3 &transform) :
+	  position(position),
+	  transform(transform) {}
+
+	// Transform the point by multiplying with the matrix
+	FVector3D transformPoint(const FVector3D &p) const {
+		return this->transform.transform(p) + this->position;
+	}
+	// Transform the vector by multiplying with the matrix
+	FVector3D transformVector(const FVector3D &p) const {
+		return this->transform.transform(p);
+	}
+	// Transform the a vector by multiplying with the transpose of the 3x3 matrix
+	// The transpose is the inverse for axis aligned normalized matrices
+	// Precondition: The transform must be normalized and axis aligned (Allows rotation but no shear nor scaling)
+	FVector3D transformPointTransposedInverse(const FVector3D &p) const {
+		return this->transform.transformTransposed(p - this->position);
+	}
+};
+
+inline Transform3D operator*(const Transform3D &left, const Transform3D &right) {
+	return Transform3D(right.transformPoint(left.position), left.transform * right.transform);
+}
+
+// The determinant of a transform is the volume of a cube transformed by the matrix.
+//   Inside-out transforms have a negative volume. (mirrored by negating one axis or swapping two)
+inline float determinant(const Transform3D& m) {
+	return determinant(m.transform);
+}
+
+inline Transform3D inverseUsingInvDet(const Transform3D& m, float invDet) {
+	Transform3D result;
+    result.transform.xAxis.x = invDet * (m.transform.yAxis.y * m.transform.zAxis.z - m.transform.yAxis.z * m.transform.zAxis.y);
+    result.transform.xAxis.y = -invDet * (m.transform.xAxis.y * m.transform.zAxis.z - m.transform.xAxis.z * m.transform.zAxis.y);
+    result.transform.xAxis.z = invDet * (m.transform.xAxis.y * m.transform.yAxis.z - m.transform.xAxis.z * m.transform.yAxis.y);
+    result.transform.yAxis.x = -invDet * (m.transform.yAxis.x * m.transform.zAxis.z - m.transform.yAxis.z * m.transform.zAxis.x);
+    result.transform.yAxis.y = invDet * (m.transform.xAxis.x * m.transform.zAxis.z - m.transform.xAxis.z * m.transform.zAxis.x);
+    result.transform.yAxis.z = -invDet * (m.transform.xAxis.x * m.transform.yAxis.z - m.transform.xAxis.z * m.transform.yAxis.x);
+    result.transform.zAxis.x = invDet * (m.transform.yAxis.x * m.transform.zAxis.y - m.transform.yAxis.y * m.transform.zAxis.x);
+    result.transform.zAxis.y = -invDet * (m.transform.xAxis.x * m.transform.zAxis.y - m.transform.xAxis.y * m.transform.zAxis.x);
+    result.transform.zAxis.z = invDet * (m.transform.xAxis.x * m.transform.yAxis.y - m.transform.xAxis.y * m.transform.yAxis.x);
+    result.position.x = -(m.position.x * result.transform.xAxis.x + m.position.y * result.transform.yAxis.x + m.position.z * result.transform.zAxis.x);
+    result.position.y = -(m.position.x * result.transform.xAxis.y + m.position.y * result.transform.yAxis.y + m.position.z * result.transform.zAxis.y);
+    result.position.z = -(m.position.x * result.transform.xAxis.z + m.position.y * result.transform.yAxis.z + m.position.z * result.transform.zAxis.z);
+	return result;
+}
+
+inline Transform3D inverse(const Transform3D& m) {
+	return inverseUsingInvDet(m, 1.0f / determinant(m));
+}
+
+}
+
+#endif
+

+ 57 - 0
Source/DFPSR/math/UVector.h

@@ -0,0 +1,57 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_UVECTOR
+#define DFPSR_GEOMETRY_UVECTOR
+
+#include "vectorMethods.h"
+
+namespace dsr {
+
+struct UVector2D {
+	VECTOR_BODY_2D(UVector2D, uint32_t, 0u);
+};
+struct UVector3D {
+	VECTOR_BODY_3D(UVector3D, uint32_t, 0u);
+};
+struct UVector4D {
+	VECTOR_BODY_4D(UVector4D, uint32_t, 0u);
+};
+
+OPERATORS_2D(UVector2D, uint32_t);
+OPERATORS_3D(UVector3D, uint32_t);
+OPERATORS_4D(UVector4D, uint32_t);
+SIGNED_OPERATORS_2D(UVector2D, uint32_t);
+SIGNED_OPERATORS_3D(UVector3D, uint32_t);
+SIGNED_OPERATORS_4D(UVector4D, uint32_t);
+EXACT_COMPARE_2D(UVector2D);
+EXACT_COMPARE_3D(UVector3D);
+EXACT_COMPARE_4D(UVector4D);
+SERIALIZATION_2D(UVector2D);
+SERIALIZATION_3D(UVector3D);
+SERIALIZATION_4D(UVector4D);
+
+}
+
+#endif
+

+ 15 - 0
Source/DFPSR/math/includeMath.h

@@ -0,0 +1,15 @@
+
+// Header for including the most commonly needed parts of the math framework
+
+#include "FVector.h"
+#include "UVector.h"
+#include "IVector.h"
+#include "LVector.h"
+#include "IRect.h"
+#include "FMatrix2x2.h"
+#include "FMatrix3x3.h"
+#include "Transform3D.h"
+#include "FPlane3D.h"
+#include "scalar.h"
+#include "FixedPoint.h"
+

+ 123 - 0
Source/DFPSR/math/scalar.h

@@ -0,0 +1,123 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_MATH_SCALAR
+#define DFPSR_MATH_SCALAR
+
+namespace dsr {
+
+// Preconditions:
+//   0 <= a <= 255
+//   0 <= b <= 255
+// Postconditions:
+//   Returns the normalized multiplication of a and b, where the 0..255 range represents decimal values from 0.0 to 1.0.
+//   The result may not be less than zero or larger than any of the inputs.
+// Examples:
+//   mulByte_8(0, 0) = 0
+//   mulByte_8(x, 0) = 0
+//   mulByte_8(0, x) = 0
+//   mulByte_8(x, 255) = x
+//   mulByte_8(255, x) = x
+//   mulByte_8(255, 255) = 255
+static inline uint32_t mulByte_8(uint32_t a, uint32_t b) {
+	// Approximate the reciprocal of an unsigned byte's maximum value 255 for normalization
+	//   256³ / 255 ≈ 65793
+	// Truncation goes down, so add half a unit before rounding to get the closest value
+	//   2^24 / 2 = 8388608
+	// No overflow for unsigned 32-bit integers
+	//   255² * 65793 + 8388608 = 4286578433 < 2^32
+	return (a * b * 65793 + 8388608) >> 24;
+}
+
+// Returns a modulo b where 0 <= a < b
+inline int signedModulo(int a, int b) {
+	int result = 0;
+	if (b > 0) {
+		if (a >= 0) {
+			result = a % b; // Simple modulo
+		} else {
+			result = (b - (-a % b)) % b; // Negative modulo
+		}
+	}
+	return result;
+}
+
+inline int roundUp(int size, int alignment) {
+	return size + (alignment - 1) - signedModulo(size - 1, alignment);
+}
+
+inline int roundDown(int size, int alignment) {
+	return size - signedModulo(size, alignment);
+}
+
+inline float absDiff(float a, float b) {
+	float result = a - b;
+	if (result < 0.0f) {
+		result = -result;
+	}
+	return result;
+}
+
+inline uint8_t absDiff(uint8_t a, uint8_t b) {
+	int result = (int)a - (int)b;
+	if (result < 0) {
+		result = -result;
+	}
+	return (uint8_t)result;
+}
+
+inline uint16_t absDiff(uint16_t a, uint16_t b) {
+	int result = (int)a - (int)b;
+	if (result < 0) {
+		result = -result;
+	}
+	return (uint16_t)result;
+}
+
+template <typename T>
+inline void swap(T &a, T &b) {
+	T temp = a;
+	a = b;
+	b = temp;
+}
+
+// True iff high and low bytes are equal
+//   Equivalent to value % 257 == 0 because A + B * 256 = A * 257 when A = B.
+inline bool isUniformByteU16(uint16_t value) {
+	return (value & 0x00FF) == ((value & 0xFF00) >> 8);
+}
+
+inline int64_t safeRoundInt64(float value) {
+	// Only keep values within resonable bounds that will not overflow from a few multiplications
+	if (value > -1048576.0f && value < 1048576.0f) {
+		return (int64_t)value;
+	} else {
+		// Infinity or NaN
+		return 0;
+	}
+}
+
+}
+
+#endif
+

+ 255 - 0
Source/DFPSR/math/vectorMethods.h

@@ -0,0 +1,255 @@
+// zlib open source license
+//
+// Copyright (c) 2017 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#ifndef DFPSR_GEOMETRY_VECTOR_METHODS
+#define DFPSR_GEOMETRY_VECTOR_METHODS
+
+#include <stdint.h>
+#include <cassert>
+#include <math.h>
+#include "../base/text.h"
+
+// Since using templates for vector operands may include unwanted function
+// definitions that does not make any sense and will crash when called,
+// these macros allow picking specific methods that make sense for the element
+// type and asserting that they can all be called in all possible combinations.
+
+#define VECTOR_BODY_2D(VECTOR_TYPE, ELEMENT_TYPE, DEFAULT_VALUE) \
+ELEMENT_TYPE x, y; \
+VECTOR_TYPE() : x(DEFAULT_VALUE), y(DEFAULT_VALUE) {} \
+VECTOR_TYPE(ELEMENT_TYPE x, ELEMENT_TYPE y) : x(x), y(y) {} \
+explicit VECTOR_TYPE(ELEMENT_TYPE s) : x(s), y(s) {} \
+ELEMENT_TYPE& operator[] (int index) { \
+	assert(index >= 0 || index < 2); \
+	if (index <= 0) { \
+		return this->x; \
+	} else { \
+		return this->y; \
+	} \
+}
+
+#define VECTOR_BODY_3D(VECTOR_TYPE, ELEMENT_TYPE, DEFAULT_VALUE) \
+ELEMENT_TYPE x, y, z; \
+VECTOR_TYPE() : x(DEFAULT_VALUE), y(DEFAULT_VALUE), z(DEFAULT_VALUE) {} \
+VECTOR_TYPE(ELEMENT_TYPE x, ELEMENT_TYPE y, ELEMENT_TYPE z) : x(x), y(y), z(z) {} \
+explicit VECTOR_TYPE(ELEMENT_TYPE s) : x(s), y(s), z(s) {} \
+ELEMENT_TYPE& operator[] (int index) { \
+	assert(index >= 0 || index < 3); \
+	if (index <= 0) { \
+		return this->x; \
+	} else if (index == 1) { \
+		return this->y; \
+	} else { \
+		return this->z; \
+	} \
+}
+
+#define VECTOR_BODY_4D(VECTOR_TYPE, ELEMENT_TYPE, DEFAULT_VALUE) \
+ELEMENT_TYPE x, y, z, w; \
+VECTOR_TYPE() : x(DEFAULT_VALUE), y(DEFAULT_VALUE), z(DEFAULT_VALUE), w(DEFAULT_VALUE) {} \
+VECTOR_TYPE(ELEMENT_TYPE x, ELEMENT_TYPE y, ELEMENT_TYPE z, ELEMENT_TYPE w) : x(x), y(y), z(z), w(w) {} \
+explicit VECTOR_TYPE(ELEMENT_TYPE s) : x(s), y(s), z(s), w(s) {} \
+ELEMENT_TYPE& operator[] (int index) { \
+	assert(index >= 0 || index < 4); \
+	if (index <= 0) { \
+		return this->x; \
+	} else if (index == 1) { \
+		return this->y; \
+	} else if (index == 2) { \
+		return this->z; \
+	} else { \
+		return this->w; \
+	} \
+}
+
+#define OPERATORS_2D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x + right.x, left.y + right.y); \
+} \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x + right, left.y + right); \
+} \
+inline VECTOR_TYPE operator+(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left + right.x, left + right.y); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x - right.x, left.y - right.y); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x - right, left.y - right); \
+} \
+inline VECTOR_TYPE operator-(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left - right.x, left - right.y); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x * right.x, left.y * right.y); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x * right, left.y * right); \
+} \
+inline VECTOR_TYPE operator*(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left * right.x, left * right.y); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x / right.x, left.y / right.y); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x / right, left.y / right); \
+} \
+inline VECTOR_TYPE operator/(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left / right.x, left / right.y); \
+}
+
+#define OPERATORS_3D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x + right.x, left.y + right.y, left.z + right.z); \
+} \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x + right, left.y + right, left.z + right); \
+} \
+inline VECTOR_TYPE operator+(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left + right.x, left + right.y, left + right.z); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x - right.x, left.y - right.y, left.z - right.z); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x - right, left.y - right, left.z - right); \
+} \
+inline VECTOR_TYPE operator-(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left - right.x, left - right.y, left - right.z); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x * right.x, left.y * right.y, left.z * right.z); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x * right, left.y * right, left.z * right); \
+} \
+inline VECTOR_TYPE operator*(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left * right.x, left * right.y, left * right.z); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x / right.x, left.y / right.y, left.z / right.z); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x / right, left.y / right, left.z / right); \
+} \
+inline VECTOR_TYPE operator/(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left / right.x, left / right.y, left / right.z); \
+}
+
+#define OPERATORS_4D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x + right.x, left.y + right.y, left.z + right.z, left.w + right.w); \
+} \
+inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x + right, left.y + right, left.z + right, left.w + right); \
+} \
+inline VECTOR_TYPE operator+(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left + right.x, left + right.y, left + right.z, left + right.w); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x - right.x, left.y - right.y, left.z - right.z, left.w - right.w); \
+} \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x - right, left.y - right, left.z - right, left.w - right); \
+} \
+inline VECTOR_TYPE operator-(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left - right.x, left - right.y, left - right.z, left - right.w); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x * right.x, left.y * right.y, left.z * right.z, left.w * right.w); \
+} \
+inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x * right, left.y * right, left.z * right, left.w * right); \
+} \
+inline VECTOR_TYPE operator*(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left * right.x, left * right.y, left * right.z, left * right.w); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left.x / right.x, left.y / right.y, left.z / right.z, left.w / right.w); \
+} \
+inline VECTOR_TYPE operator/(const VECTOR_TYPE &left, ELEMENT_TYPE right) { \
+	return VECTOR_TYPE(left.x / right, left.y / right, left.z / right, left.w / right); \
+} \
+inline VECTOR_TYPE operator/(ELEMENT_TYPE left, const VECTOR_TYPE &right) { \
+	return VECTOR_TYPE(left / right.x, left / right.y, left / right.z, left / right.w); \
+}
+
+#define SIGNED_OPERATORS_2D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &v) { \
+	return VECTOR_TYPE(-v.x, -v.y); \
+}
+
+#define SIGNED_OPERATORS_3D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &v) { \
+	return VECTOR_TYPE(-v.x, -v.y, -v.z); \
+}
+
+#define SIGNED_OPERATORS_4D(VECTOR_TYPE, ELEMENT_TYPE) \
+inline VECTOR_TYPE operator-(const VECTOR_TYPE &v) { \
+	return VECTOR_TYPE(-v.x, -v.y, -v.z, -v.w); \
+}
+
+#define OPPOSITE_COMPARE_2D(VECTOR_TYPE) \
+inline bool operator!=(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return !(left == right); \
+}
+
+#define EXACT_COMPARE_2D(VECTOR_TYPE) \
+inline bool operator==(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return left.x == right.x && left.y == right.y; \
+} \
+OPPOSITE_COMPARE_2D(VECTOR_TYPE)
+
+#define EXACT_COMPARE_3D(VECTOR_TYPE) \
+inline bool operator==(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return left.x == right.x && left.y == right.y && left.z == right.z; \
+} \
+OPPOSITE_COMPARE_2D(VECTOR_TYPE)
+
+#define EXACT_COMPARE_4D(VECTOR_TYPE) \
+inline bool operator==(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
+	return left.x == right.x && left.y == right.y && left.z == right.z && left.w == right.w; \
+} \
+OPPOSITE_COMPARE_2D(VECTOR_TYPE)
+
+#define SERIALIZATION_2D(VECTOR_TYPE) \
+inline String& string_toStreamIndented(String& target, const VECTOR_TYPE& source, const ReadableString& indentation) { \
+	string_append(target, indentation, source.x, U",", source.y); \
+	return target; \
+}
+
+#define SERIALIZATION_3D(VECTOR_TYPE) \
+inline String& string_toStreamIndented(String& target, const VECTOR_TYPE& source, const ReadableString& indentation) { \
+	string_append(target, indentation, source.x, U",", source.y, U",", source.z); \
+	return target; \
+}
+
+#define SERIALIZATION_4D(VECTOR_TYPE) \
+inline String& string_toStreamIndented(String& target, const VECTOR_TYPE& source, const ReadableString& indentation) { \
+	string_append(target, indentation, source.x, U",", source.y, U",", source.z, U",", source.w); \
+	return target; \
+}
+
+#endif

+ 183 - 0
Source/DFPSR/persistent/ClassFactory.cpp

@@ -0,0 +1,183 @@
+// zlib open source license
+//
+// Copyright (c) 2018 to 2019 David Forsgren Piuva
+// 
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// 
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 
+//    1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 
+//    2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 
+//    3. This notice may not be removed or altered from any source
+//    distribution.
+
+#include "ClassFactory.h"
+
+using namespace dsr;
+
+// A global list of registered persistent classes
+struct ConstructorInfo {
+public:
+	String type;
+	decltype(&classConstructor) defaultConstructor;
+public:
+	ConstructorInfo(String type, decltype(&classConstructor) defaultConstructor) : type(type), defaultConstructor(defaultConstructor) {}
+};
+static List<ConstructorInfo> persistentClasses;
+
+std::shared_ptr<StructureDefinition> Persistent::getStructure() const {
+	return std::shared_ptr<StructureDefinition>();
+}
+
+static int findPersistentClass(const String &type) {
+	for (int i = 0; i < persistentClasses.length(); i++) {
+		if (string_match(persistentClasses[i].type, type)) {
+			return i;
+		}
+	}
+	return -1;
+}
+
+String Persistent::getClassName() const {
+	return this->getStructure()->name;
+}
+
+void Persistent::registerPersistentClass() {
+	int existingIndex = findPersistentClass(this->getClassName());
+	// If a class of the name doesn't already exist
+	if (existingIndex == -1) {
+		// Register its constructor using the name
+		persistentClasses.push(ConstructorInfo(this->getClassName(), this->getConstructor()));
+	}
+}
+
+bool Persistent::addChild(std::shared_ptr<Persistent> child) {
+	return false;
+}
+
+int Persistent::getChildCount() const {
+	return 0;
+}
+
+std::shared_ptr<Persistent> Persistent::getChild(int index) const {
+	return std::shared_ptr<Persistent>();
+}
+
+void Persistent::setProperty(const ReadableString &key, const ReadableString &value) {
+	Persistent* target = this->findAttribute(key);
+	if (target == nullptr) {
+		printText("setProperty: ", key, " in ", this->getClassName(), " could not be found.\n");
+	} else {
+		if (!target->assignValue(value)) {
+			printText("setProperty: The input ", value, " could not be assigned to property ", key, " because of incorrect format.\n");
+		}
+	}
+}
+
+Persistent* Persistent::findAttribute(const ReadableString &name) {
+	return nullptr;
+}
+
+void Persistent::declareAttributes(StructureDefinition &target) const {}
+
+bool Persistent::assignValue(const ReadableString &content) {
+	printText("Warning! assignValue is not implemented for ", this->getClassName(), ".\n");
+	return false;
+}
+
+String& Persistent::toStreamIndented(String& out, const ReadableString& indentation) const {
+	std::shared_ptr<StructureDefinition> structure = this->getStructure();
+	if (structure.get() == nullptr) {
+		throwError(U"Failed to get the structure of a class being serialized.\n");
+	}
+	string_append(out, indentation, U"Begin : ", structure->name, U"\n");
+	String nextIndentation = indentation + U"	";
+	// Save parameters
+	for (int i = 0; i < structure->length(); i++) {
+		String name = structure->attributes[i].name;
+		Persistent* value = ((Persistent*)this)->findAttribute(name); // Override const
+		if (value == nullptr) {
+			printText("Warning! ", name, " in ", structure->name, " was declared but not found from findAttribute.\n");
+		} else {
+			string_append(out, nextIndentation, name, U" = ");
+			value->toStream(out);
+			string_append(out, U"\n");
+		}
+	}
+	// Save child objects
+	for (int c = 0; c < this->getChildCount(); c++) {
+		this->getChild(c)->toStreamIndented(out, nextIndentation);
+	}
+	string_append(out, indentation, U"End\n");
+	return out;
+}
+
+std::shared_ptr<Persistent> dsr::createPersistentClass(const String &type, bool mustExist) {
+	// Look for the component
+	int existingIndex = findPersistentClass(type);
+	if (existingIndex > -1) {
+		return persistentClasses[existingIndex].defaultConstructor();
+	}
+	if (mustExist) {
+		throwError(U"Failed to default create a class named ", type, U". Call registerPersistentClass on a temporary instance of the class to register the name.\n");
+	}
+	// Failed to load by name
+	return std::shared_ptr<Persistent>(); // Null
+}
+
+std::shared_ptr<Persistent> dsr::createPersistentClassFromText(const ReadableString &text) {
+	std::shared_ptr<Persistent> rootObject, newObject;
+	List<std::shared_ptr<Persistent>> stack;
+	List<ReadableString> lines = text.split(U'\n');
+	for (int l = 0; l < lines.length(); l++) {
+		ReadableString line = lines[l];
+		int equalityIndex = line.findFirst('=');
+		if (equalityIndex > -1) {
+			// Assignment
+			String key = string_removeAllWhiteSpace(line.before(equalityIndex));
+			String value = string_removeAllWhiteSpace(line.after(equalityIndex));
+			stack.last()->setProperty(key, value);
+		} else {
+			int colonIndex = line.findFirst(':');
+			if (colonIndex > -1) {
+				// Declaration
+				String keyword = string_removeAllWhiteSpace(line.before(colonIndex));
+				if (string_caseInsensitiveMatch(keyword, U"Begin")) {
+					String type = string_removeAllWhiteSpace(line.after(colonIndex));
+					newObject = dsr::createPersistentClass(type);
+					if (rootObject.get() == nullptr) {
+						rootObject = newObject;
+					} else {
+						if (!(stack.last()->addChild(newObject))) {
+							throwError(U"Failed to add a child object!\n");
+						}
+					}
+					stack.push(newObject);
+				}
+			} else {
+				// Single keyword or empty line
+				String keyword = string_removeAllWhiteSpace(line);
+				if (string_caseInsensitiveMatch(keyword, U"End")) {
+					if (stack.length() > 0) {
+						stack.pop();
+					} else {
+						throwError(U"Using end outside of root object!\n");
+					}
+				}
+			}
+		}
+	}
+	// Return the root component which is null on failure
+	return rootObject;
+}
+

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.