Browse Source

Updated external libraries

raysan5 4 years ago
parent
commit
700d448d75

+ 191 - 17
src/external/cgltf.h

@@ -1,7 +1,7 @@
 /**
 /**
  * cgltf - a single-file glTF 2.0 parser written in C99.
  * cgltf - a single-file glTF 2.0 parser written in C99.
  *
  *
- * Version: 1.10
+ * Version: 1.11
  *
  *
  * Website: https://github.com/jkuhlmann/cgltf
  * Website: https://github.com/jkuhlmann/cgltf
  *
  *
@@ -234,6 +234,12 @@ typedef enum cgltf_light_type {
 	cgltf_light_type_spot,
 	cgltf_light_type_spot,
 } cgltf_light_type;
 } cgltf_light_type;
 
 
+typedef enum cgltf_data_free_method {
+	cgltf_data_free_method_none,
+	cgltf_data_free_method_file_release,
+	cgltf_data_free_method_memory_free,
+} cgltf_data_free_method;
+
 typedef struct cgltf_extras {
 typedef struct cgltf_extras {
 	cgltf_size start_offset;
 	cgltf_size start_offset;
 	cgltf_size end_offset;
 	cgltf_size end_offset;
@@ -250,6 +256,7 @@ typedef struct cgltf_buffer
 	cgltf_size size;
 	cgltf_size size;
 	char* uri;
 	char* uri;
 	void* data; /* loaded by cgltf_load_buffers */
 	void* data; /* loaded by cgltf_load_buffers */
+	cgltf_data_free_method data_free_method;
 	cgltf_extras extras;
 	cgltf_extras extras;
 	cgltf_size extensions_count;
 	cgltf_size extensions_count;
 	cgltf_extension* extensions;
 	cgltf_extension* extensions;
@@ -372,6 +379,8 @@ typedef struct cgltf_texture
 	char* name;
 	char* name;
 	cgltf_image* image;
 	cgltf_image* image;
 	cgltf_sampler* sampler;
 	cgltf_sampler* sampler;
+	cgltf_bool has_basisu;
+	cgltf_image* basisu_image;
 	cgltf_extras extras;
 	cgltf_extras extras;
 	cgltf_size extensions_count;
 	cgltf_size extensions_count;
 	cgltf_extension* extensions;
 	cgltf_extension* extensions;
@@ -382,6 +391,7 @@ typedef struct cgltf_texture_transform
 	cgltf_float offset[2];
 	cgltf_float offset[2];
 	cgltf_float rotation;
 	cgltf_float rotation;
 	cgltf_float scale[2];
 	cgltf_float scale[2];
+	cgltf_bool has_texcoord;
 	cgltf_int texcoord;
 	cgltf_int texcoord;
 } cgltf_texture_transform;
 } cgltf_texture_transform;
 
 
@@ -595,6 +605,7 @@ typedef struct cgltf_light {
 	cgltf_float range;
 	cgltf_float range;
 	cgltf_float spot_inner_cone_angle;
 	cgltf_float spot_inner_cone_angle;
 	cgltf_float spot_outer_cone_angle;
 	cgltf_float spot_outer_cone_angle;
+	cgltf_extras extras;
 } cgltf_light;
 } cgltf_light;
 
 
 struct cgltf_node {
 struct cgltf_node {
@@ -768,6 +779,7 @@ cgltf_result cgltf_load_buffers(
 
 
 cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data);
 cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data);
 
 
+void cgltf_decode_string(char* string);
 void cgltf_decode_uri(char* uri);
 void cgltf_decode_uri(char* uri);
 
 
 cgltf_result cgltf_validate(cgltf_data* data);
 cgltf_result cgltf_validate(cgltf_data* data);
@@ -813,7 +825,7 @@ cgltf_result cgltf_copy_extras_json(const cgltf_data* data, const cgltf_extras*
 #include <limits.h> /* For UINT_MAX etc */
 #include <limits.h> /* For UINT_MAX etc */
 #include <float.h>  /* For FLT_MAX */
 #include <float.h>  /* For FLT_MAX */
 
 
-#if !defined(CGLTF_MALLOC) || !defined(CGLTF_FREE) || !defined(CGLTF_ATOI) || !defined(CGLTF_ATOF)
+#if !defined(CGLTF_MALLOC) || !defined(CGLTF_FREE) || !defined(CGLTF_ATOI) || !defined(CGLTF_ATOF) || !defined(CGLTF_ATOLL)
 #include <stdlib.h> /* For malloc, free, atoi, atof */
 #include <stdlib.h> /* For malloc, free, atoi, atof */
 #endif
 #endif
 
 
@@ -883,6 +895,9 @@ static const uint32_t GlbMagicBinChunk = 0x004E4942;
 #ifndef CGLTF_ATOF
 #ifndef CGLTF_ATOF
 #define CGLTF_ATOF(str) atof(str)
 #define CGLTF_ATOF(str) atof(str)
 #endif
 #endif
+#ifndef CGLTF_ATOLL
+#define CGLTF_ATOLL(str) atoll(str)
+#endif
 #ifndef CGLTF_VALIDATE_ENABLE_ASSERTS
 #ifndef CGLTF_VALIDATE_ENABLE_ASSERTS
 #define CGLTF_VALIDATE_ENABLE_ASSERTS 0
 #define CGLTF_VALIDATE_ENABLE_ASSERTS 0
 #endif
 #endif
@@ -932,7 +947,12 @@ static cgltf_result cgltf_default_file_read(const struct cgltf_memory_options* m
 	{
 	{
 		fseek(file, 0, SEEK_END);
 		fseek(file, 0, SEEK_END);
 
 
+#ifdef _WIN32
+		__int64 length = _ftelli64(file);
+#else
 		long length = ftell(file);
 		long length = ftell(file);
+#endif
+
 		if (length < 0)
 		if (length < 0)
 		{
 		{
 			fclose(file);
 			fclose(file);
@@ -1120,8 +1140,8 @@ cgltf_result cgltf_parse_file(const cgltf_options* options, const char* path, cg
 		return cgltf_result_invalid_options;
 		return cgltf_result_invalid_options;
 	}
 	}
 
 
-	void (*memory_free)(void*, void*) = options->memory.free ? options->memory.free : &cgltf_default_free;
 	cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read;
 	cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read;
+	void (*file_release)(const struct cgltf_memory_options*, const struct cgltf_file_options*, void* data) = options->file.release ? options->file.release : cgltf_default_file_release;
 
 
 	void* file_data = NULL;
 	void* file_data = NULL;
 	cgltf_size file_size = 0;
 	cgltf_size file_size = 0;
@@ -1135,7 +1155,7 @@ cgltf_result cgltf_parse_file(const cgltf_options* options, const char* path, cg
 
 
 	if (result != cgltf_result_success)
 	if (result != cgltf_result_success)
 	{
 	{
-		memory_free(options->memory.user_data, file_data);
+		file_release(&options->memory, &options->file, file_data);
 		return result;
 		return result;
 	}
 	}
 
 
@@ -1246,6 +1266,72 @@ static int cgltf_unhex(char ch)
 		-1;
 		-1;
 }
 }
 
 
+void cgltf_decode_string(char* string)
+{
+	char* read = strchr(string, '\\');
+	if (read == NULL)
+	{
+		return;
+	}
+	char* write = string;
+	char* last = string;
+
+	while (read)
+	{
+		// Copy characters since last escaped sequence
+		cgltf_size written = read - last;
+		strncpy(write, last, written);
+		write += written;
+
+		// jsmn already checked that all escape sequences are valid
+		++read;
+		switch (*read++)
+		{
+		case '\"': *write++ = '\"'; break;
+		case '/':  *write++ = '/';  break;
+		case '\\': *write++ = '\\'; break;
+		case 'b':  *write++ = '\b'; break;
+		case 'f':  *write++ = '\f'; break;
+		case 'r':  *write++ = '\r'; break;
+		case 'n':  *write++ = '\n'; break;
+		case 't':  *write++ = '\t'; break;
+		case 'u':
+		{
+			// UCS-2 codepoint \uXXXX to UTF-8
+			int character = 0;
+			for (cgltf_size i = 0; i < 4; ++i)
+			{
+				character = (character << 4) + cgltf_unhex(*read++);
+			}
+
+			if (character <= 0x7F)
+			{
+				*write++ = character & 0xFF;
+			}
+			else if (character <= 0x7FF)
+			{
+				*write++ = 0xC0 | ((character >> 6) & 0xFF);
+				*write++ = 0x80 | (character & 0x3F);
+			}
+			else
+			{
+				*write++ = 0xE0 | ((character >> 12) & 0xFF);
+				*write++ = 0x80 | ((character >> 6) & 0x3F);
+				*write++ = 0x80 | (character & 0x3F);
+			}
+			break;
+		}
+		default:
+			break;
+		}
+
+		last = read;
+		read = strchr(read, '\\');
+	}
+
+	strcpy(write, last);
+}
+
 void cgltf_decode_uri(char* uri)
 void cgltf_decode_uri(char* uri)
 {
 {
 	char* write = uri;
 	char* write = uri;
@@ -1291,6 +1377,7 @@ cgltf_result cgltf_load_buffers(const cgltf_options* options, cgltf_data* data,
 		}
 		}
 
 
 		data->buffers[0].data = (void*)data->bin;
 		data->buffers[0].data = (void*)data->bin;
+		data->buffers[0].data_free_method = cgltf_data_free_method_none;
 	}
 	}
 
 
 	for (cgltf_size i = 0; i < data->buffers_count; ++i)
 	for (cgltf_size i = 0; i < data->buffers_count; ++i)
@@ -1314,6 +1401,7 @@ cgltf_result cgltf_load_buffers(const cgltf_options* options, cgltf_data* data,
 			if (comma && comma - uri >= 7 && strncmp(comma - 7, ";base64", 7) == 0)
 			if (comma && comma - uri >= 7 && strncmp(comma - 7, ";base64", 7) == 0)
 			{
 			{
 				cgltf_result res = cgltf_load_buffer_base64(options, data->buffers[i].size, comma + 1, &data->buffers[i].data);
 				cgltf_result res = cgltf_load_buffer_base64(options, data->buffers[i].size, comma + 1, &data->buffers[i].data);
+				data->buffers[i].data_free_method = cgltf_data_free_method_memory_free;
 
 
 				if (res != cgltf_result_success)
 				if (res != cgltf_result_success)
 				{
 				{
@@ -1328,6 +1416,7 @@ cgltf_result cgltf_load_buffers(const cgltf_options* options, cgltf_data* data,
 		else if (strstr(uri, "://") == NULL && gltf_path)
 		else if (strstr(uri, "://") == NULL && gltf_path)
 		{
 		{
 			cgltf_result res = cgltf_load_buffer_file(options, data->buffers[i].size, uri, gltf_path, &data->buffers[i].data);
 			cgltf_result res = cgltf_load_buffer_file(options, data->buffers[i].size, uri, gltf_path, &data->buffers[i].data);
+			data->buffers[i].data_free_method = cgltf_data_free_method_file_release;
 
 
 			if (res != cgltf_result_success)
 			if (res != cgltf_result_success)
 			{
 			{
@@ -1655,10 +1744,15 @@ void cgltf_free(cgltf_data* data)
 	{
 	{
 		data->memory.free(data->memory.user_data, data->buffers[i].name);
 		data->memory.free(data->memory.user_data, data->buffers[i].name);
 
 
-		if (data->buffers[i].data != data->bin)
+		if (data->buffers[i].data_free_method == cgltf_data_free_method_file_release)
 		{
 		{
 			file_release(&data->memory, &data->file, data->buffers[i].data);
 			file_release(&data->memory, &data->file, data->buffers[i].data);
 		}
 		}
+		else if (data->buffers[i].data_free_method == cgltf_data_free_method_memory_free)
+		{
+			data->memory.free(data->memory.user_data, data->buffers[i].data);
+		}
+
 		data->memory.free(data->memory.user_data, data->buffers[i].uri);
 		data->memory.free(data->memory.user_data, data->buffers[i].uri);
 
 
 		cgltf_free_extensions(data, data->buffers[i].extensions, data->buffers[i].extensions_count);
 		cgltf_free_extensions(data, data->buffers[i].extensions, data->buffers[i].extensions_count);
@@ -2259,6 +2353,7 @@ cgltf_size cgltf_accessor_read_index(const cgltf_accessor* accessor, cgltf_size
 #define CGLTF_ERROR_LEGACY -3
 #define CGLTF_ERROR_LEGACY -3
 
 
 #define CGLTF_CHECK_TOKTYPE(tok_, type_) if ((tok_).type != (type_)) { return CGLTF_ERROR_JSON; }
 #define CGLTF_CHECK_TOKTYPE(tok_, type_) if ((tok_).type != (type_)) { return CGLTF_ERROR_JSON; }
+#define CGLTF_CHECK_TOKTYPE_RETTYPE(tok_, type_, ret_) if ((tok_).type != (type_)) { return (ret_)CGLTF_ERROR_JSON; }
 #define CGLTF_CHECK_KEY(tok_) if ((tok_).type != JSMN_STRING || (tok_).size == 0) { return CGLTF_ERROR_JSON; } /* checking size for 0 verifies that a value follows the key */
 #define CGLTF_CHECK_KEY(tok_) if ((tok_).type != JSMN_STRING || (tok_).size == 0) { return CGLTF_ERROR_JSON; } /* checking size for 0 verifies that a value follows the key */
 
 
 #define CGLTF_PTRINDEX(type, idx) (type*)((cgltf_size)idx + 1)
 #define CGLTF_PTRINDEX(type, idx) (type*)((cgltf_size)idx + 1)
@@ -2283,6 +2378,16 @@ static int cgltf_json_to_int(jsmntok_t const* tok, const uint8_t* json_chunk)
 	return CGLTF_ATOI(tmp);
 	return CGLTF_ATOI(tmp);
 }
 }
 
 
+static cgltf_size cgltf_json_to_size(jsmntok_t const* tok, const uint8_t* json_chunk)
+{
+	CGLTF_CHECK_TOKTYPE_RETTYPE(*tok, JSMN_PRIMITIVE, cgltf_size);
+	char tmp[128];
+	int size = (cgltf_size)(tok->end - tok->start) < sizeof(tmp) ? tok->end - tok->start : (int)(sizeof(tmp) - 1);
+	strncpy(tmp, (const char*)json_chunk + tok->start, size);
+	tmp[size] = 0;
+	return (cgltf_size)CGLTF_ATOLL(tmp);
+}
+
 static cgltf_float cgltf_json_to_float(jsmntok_t const* tok, const uint8_t* json_chunk)
 static cgltf_float cgltf_json_to_float(jsmntok_t const* tok, const uint8_t* json_chunk)
 {
 {
 	CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE);
 	CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE);
@@ -3024,7 +3129,7 @@ static int cgltf_parse_json_accessor_sparse(cgltf_options* options, jsmntok_t co
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 				{
 				{
 					++i;
 					++i;
-					out_sparse->indices_byte_offset = cgltf_json_to_int(tokens + i, json_chunk);
+					out_sparse->indices_byte_offset = cgltf_json_to_size(tokens + i, json_chunk);
 					++i;
 					++i;
 				}
 				}
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
@@ -3073,7 +3178,7 @@ static int cgltf_parse_json_accessor_sparse(cgltf_options* options, jsmntok_t co
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 				else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 				{
 				{
 					++i;
 					++i;
-					out_sparse->values_byte_offset = cgltf_json_to_int(tokens + i, json_chunk);
+					out_sparse->values_byte_offset = cgltf_json_to_size(tokens + i, json_chunk);
 					++i;
 					++i;
 				}
 				}
 				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
 				else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
@@ -3142,7 +3247,7 @@ static int cgltf_parse_json_accessor(cgltf_options* options, jsmntok_t const* to
 		{
 		{
 			++i;
 			++i;
 			out_accessor->offset =
 			out_accessor->offset =
-					cgltf_json_to_int(tokens+i, json_chunk);
+					cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0)
@@ -3268,6 +3373,7 @@ static int cgltf_parse_json_texture_transform(jsmntok_t const* tokens, int i, co
 		else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0)
 		else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0)
 		{
 		{
 			++i;
 			++i;
+			out_texture_transform->has_texcoord = 1;
 			out_texture_transform->texcoord = cgltf_json_to_int(tokens + i, json_chunk);
 			out_texture_transform->texcoord = cgltf_json_to_int(tokens + i, json_chunk);
 			++i;
 			++i;
 		}
 		}
@@ -3885,7 +3991,62 @@ static int cgltf_parse_json_texture(cgltf_options* options, jsmntok_t const* tok
 		}
 		}
 		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
 		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0)
 		{
 		{
-			i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_texture->extensions_count, &out_texture->extensions);
+			++i;
+
+			CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+			if (out_texture->extensions)
+			{
+				return CGLTF_ERROR_JSON;
+			}
+
+			int extensions_size = tokens[i].size;
+			++i;
+			out_texture->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size);
+			out_texture->extensions_count = 0;
+
+			if (!out_texture->extensions)
+			{
+				return CGLTF_ERROR_NOMEM;
+			}
+
+			for (int k = 0; k < extensions_size; ++k)
+			{
+				CGLTF_CHECK_KEY(tokens[i]);
+
+				if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_texture_basisu") == 0)
+				{
+					out_texture->has_basisu = 1;
+					++i;
+					CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
+					int num_properties = tokens[i].size;
+					++i;
+
+					for (int t = 0; t < num_properties; ++t)
+					{
+						CGLTF_CHECK_KEY(tokens[i]);
+
+						if (cgltf_json_strcmp(tokens + i, json_chunk, "source") == 0)
+						{
+							++i;
+							out_texture->basisu_image = CGLTF_PTRINDEX(cgltf_image, cgltf_json_to_int(tokens + i, json_chunk));
+							++i;
+						}
+						else
+						{
+							i = cgltf_skip_json(tokens, i + 1);
+						}
+					}
+				}
+				else
+				{
+					i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_texture->extensions[out_texture->extensions_count++]));
+				}
+
+				if (i < 0)
+				{
+					return i;
+				}
+			}
 		}
 		}
 		else
 		else
 		{
 		{
@@ -4192,19 +4353,19 @@ static int cgltf_parse_json_meshopt_compression(cgltf_options* options, jsmntok_
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0)
 		{
 		{
 			++i;
 			++i;
-			out_meshopt_compression->offset = cgltf_json_to_int(tokens+i, json_chunk);
+			out_meshopt_compression->offset = cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
 		{
 		{
 			++i;
 			++i;
-			out_meshopt_compression->size = cgltf_json_to_int(tokens+i, json_chunk);
+			out_meshopt_compression->size = cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
 		{
 		{
 			++i;
 			++i;
-			out_meshopt_compression->stride = cgltf_json_to_int(tokens+i, json_chunk);
+			out_meshopt_compression->stride = cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0)
@@ -4290,21 +4451,21 @@ static int cgltf_parse_json_buffer_view(cgltf_options* options, jsmntok_t const*
 		{
 		{
 			++i;
 			++i;
 			out_buffer_view->offset =
 			out_buffer_view->offset =
-					cgltf_json_to_int(tokens+i, json_chunk);
+					cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0)
 		{
 		{
 			++i;
 			++i;
 			out_buffer_view->size =
 			out_buffer_view->size =
-					cgltf_json_to_int(tokens+i, json_chunk);
+					cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0)
 		{
 		{
 			++i;
 			++i;
 			out_buffer_view->stride =
 			out_buffer_view->stride =
-					cgltf_json_to_int(tokens+i, json_chunk);
+					cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0)
@@ -4422,7 +4583,7 @@ static int cgltf_parse_json_buffer(cgltf_options* options, jsmntok_t const* toke
 		{
 		{
 			++i;
 			++i;
 			out_buffer->size =
 			out_buffer->size =
-					cgltf_json_to_int(tokens+i, json_chunk);
+					cgltf_json_to_size(tokens+i, json_chunk);
 			++i;
 			++i;
 		}
 		}
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "uri") == 0)
 		else if (cgltf_json_strcmp(tokens+i, json_chunk, "uri") == 0)
@@ -4737,6 +4898,14 @@ static int cgltf_parse_json_light(cgltf_options* options, jsmntok_t const* token
 {
 {
 	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
 	CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT);
 
 
+	out_light->color[0] = 1.f;
+	out_light->color[1] = 1.f;
+	out_light->color[2] = 1.f;
+	out_light->intensity = 1.f;
+
+	out_light->spot_inner_cone_angle = 0.f;
+	out_light->spot_outer_cone_angle = 3.1415926535f / 4.0f;
+
 	int size = tokens[i].size;
 	int size = tokens[i].size;
 	++i;
 	++i;
 
 
@@ -4817,6 +4986,10 @@ static int cgltf_parse_json_light(cgltf_options* options, jsmntok_t const* token
 				}
 				}
 			}
 			}
 		}
 		}
+		else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0)
+		{
+			i = cgltf_parse_json_extras(tokens, i + 1, json_chunk, &out_light->extras);
+		}
 		else
 		else
 		{
 		{
 			i = cgltf_skip_json(tokens, i+1);
 			i = cgltf_skip_json(tokens, i+1);
@@ -5851,6 +6024,7 @@ static int cgltf_fixup_pointers(cgltf_data* data)
 	for (cgltf_size i = 0; i < data->textures_count; ++i)
 	for (cgltf_size i = 0; i < data->textures_count; ++i)
 	{
 	{
 		CGLTF_PTRFIXUP(data->textures[i].image, data->images, data->images_count);
 		CGLTF_PTRFIXUP(data->textures[i].image, data->images, data->images_count);
+		CGLTF_PTRFIXUP(data->textures[i].basisu_image, data->images, data->images_count);
 		CGLTF_PTRFIXUP(data->textures[i].sampler, data->samplers, data->samplers_count);
 		CGLTF_PTRFIXUP(data->textures[i].sampler, data->samplers, data->samplers_count);
 	}
 	}
 
 
@@ -6305,7 +6479,7 @@ static void jsmn_init(jsmn_parser *parser) {
 
 
 /* cgltf is distributed under MIT license:
 /* cgltf is distributed under MIT license:
  *
  *
- * Copyright (c) 2018 Johannes Kuhlmann
+ * Copyright (c) 2018-2021 Johannes Kuhlmann
 
 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * of this software and associated documentation files (the "Software"), to deal

+ 10 - 4
src/external/dr_flac.h

@@ -1,6 +1,6 @@
 /*
 /*
 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.29 - 2021-04-02
+dr_flac - v0.12.31 - 2021-08-16
 
 
 David Reid - [email protected]
 David Reid - [email protected]
 
 
@@ -232,7 +232,7 @@ extern "C" {
 
 
 #define DRFLAC_VERSION_MAJOR     0
 #define DRFLAC_VERSION_MAJOR     0
 #define DRFLAC_VERSION_MINOR     12
 #define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  29
+#define DRFLAC_VERSION_REVISION  31
 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
 
 
 #include <stddef.h> /* For size_t. */
 #include <stddef.h> /* For size_t. */
@@ -261,7 +261,7 @@ typedef unsigned int            drflac_uint32;
         #pragma GCC diagnostic pop
         #pragma GCC diagnostic pop
     #endif
     #endif
 #endif
 #endif
-#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
     typedef drflac_uint64       drflac_uintptr;
     typedef drflac_uint64       drflac_uintptr;
 #else
 #else
     typedef drflac_uint32       drflac_uintptr;
     typedef drflac_uint32       drflac_uintptr;
@@ -11516,7 +11516,7 @@ static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned
         DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
         DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
     } else {                                                                                                                                                        \
     } else {                                                                                                                                                        \
         drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
         drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
-        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+        if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) {                                                                                                            \
             goto on_error;  /* The decoded data is too big. */                                                                                                      \
             goto on_error;  /* The decoded data is too big. */                                                                                                      \
         }                                                                                                                                                           \
         }                                                                                                                                                           \
                                                                                                                                                                     \
                                                                                                                                                                     \
@@ -11851,6 +11851,12 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 /*
 /*
 REVISION HISTORY
 REVISION HISTORY
 ================
 ================
+v0.12.31 - 2021-08-16
+  - Silence some warnings.
+
+v0.12.30 - 2021-07-31
+  - Fix platform detection for ARM64.
+
 v0.12.29 - 2021-04-02
 v0.12.29 - 2021-04-02
   - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking.
   - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking.
   - Fix a decoding error due to an incorrect validation check.
   - Fix a decoding error due to an incorrect validation check.

+ 96 - 59
src/external/dr_mp3.h

@@ -1,6 +1,6 @@
 /*
 /*
 MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
 MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_mp3 - v0.6.27 - 2021-02-21
+dr_mp3 - v0.6.31 - 2021-08-22
 
 
 David Reid - [email protected]
 David Reid - [email protected]
 
 
@@ -95,7 +95,7 @@ extern "C" {
 
 
 #define DRMP3_VERSION_MAJOR     0
 #define DRMP3_VERSION_MAJOR     0
 #define DRMP3_VERSION_MINOR     6
 #define DRMP3_VERSION_MINOR     6
-#define DRMP3_VERSION_REVISION  27
+#define DRMP3_VERSION_REVISION  31
 #define DRMP3_VERSION_STRING    DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION)
 #define DRMP3_VERSION_STRING    DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION)
 
 
 #include <stddef.h> /* For size_t. */
 #include <stddef.h> /* For size_t. */
@@ -124,7 +124,7 @@ typedef unsigned int            drmp3_uint32;
         #pragma GCC diagnostic pop
         #pragma GCC diagnostic pop
     #endif
     #endif
 #endif
 #endif
-#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
     typedef drmp3_uint64        drmp3_uintptr;
     typedef drmp3_uint64        drmp3_uintptr;
 #else
 #else
     typedef drmp3_uint32        drmp3_uintptr;
     typedef drmp3_uint32        drmp3_uintptr;
@@ -701,7 +701,7 @@ static int drmp3_have_simd(void)
 
 
 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
 #define DRMP3_HAVE_ARMV6 1
 #define DRMP3_HAVE_ARMV6 1
-static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(int32_t a)
+static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(drmp3_int32 a)
 {
 {
     drmp3_int32 x = 0;
     drmp3_int32 x = 0;
     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
@@ -712,6 +712,31 @@ static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_ar
 #endif
 #endif
 
 
 
 
+/* Standard library stuff. */
+#ifndef DRMP3_ASSERT
+#include <assert.h>
+#define DRMP3_ASSERT(expression) assert(expression)
+#endif
+#ifndef DRMP3_COPY_MEMORY
+#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
+#endif
+#ifndef DRMP3_MOVE_MEMORY
+#define DRMP3_MOVE_MEMORY(dst, src, sz) memmove((dst), (src), (sz))
+#endif
+#ifndef DRMP3_ZERO_MEMORY
+#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
+#endif
+#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p)))
+#ifndef DRMP3_MALLOC
+#define DRMP3_MALLOC(sz) malloc((sz))
+#endif
+#ifndef DRMP3_REALLOC
+#define DRMP3_REALLOC(p, sz) realloc((p), (sz))
+#endif
+#ifndef DRMP3_FREE
+#define DRMP3_FREE(p) free((p))
+#endif
+
 typedef struct
 typedef struct
 {
 {
     const drmp3_uint8 *buf;
     const drmp3_uint8 *buf;
@@ -978,7 +1003,7 @@ static int drmp3_L12_dequantize_granule(float *grbuf, drmp3_bs *bs, drmp3_L12_sc
 static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst)
 static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst)
 {
 {
     int i, k;
     int i, k;
-    memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
+    DRMP3_COPY_MEMORY(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
     for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
     for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
     {
     {
         for (k = 0; k < 12; k++)
         for (k = 0; k < 12; k++)
@@ -1123,14 +1148,14 @@ static void drmp3_L3_read_scalefactors(drmp3_uint8 *scf, drmp3_uint8 *ist_pos, c
         int cnt = scf_count[i];
         int cnt = scf_count[i];
         if (scfsi & 8)
         if (scfsi & 8)
         {
         {
-            memcpy(scf, ist_pos, cnt);
+            DRMP3_COPY_MEMORY(scf, ist_pos, cnt);
         } else
         } else
         {
         {
             int bits = scf_size[i];
             int bits = scf_size[i];
             if (!bits)
             if (!bits)
             {
             {
-                memset(scf, 0, cnt);
-                memset(ist_pos, 0, cnt);
+                DRMP3_ZERO_MEMORY(scf, cnt);
+                DRMP3_ZERO_MEMORY(ist_pos, cnt);
             } else
             } else
             {
             {
                 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
                 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
@@ -1390,12 +1415,22 @@ static void drmp3_L3_midside_stereo(float *left, int n)
     int i = 0;
     int i = 0;
     float *right = left + 576;
     float *right = left + 576;
 #if DRMP3_HAVE_SIMD
 #if DRMP3_HAVE_SIMD
-    if (drmp3_have_simd()) for (; i < n - 3; i += 4)
+    if (drmp3_have_simd())
     {
     {
-        drmp3_f4 vl = DRMP3_VLD(left + i);
-        drmp3_f4 vr = DRMP3_VLD(right + i);
-        DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr));
-        DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr));
+        for (; i < n - 3; i += 4)
+        {
+            drmp3_f4 vl = DRMP3_VLD(left + i);
+            drmp3_f4 vr = DRMP3_VLD(right + i);
+            DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr));
+            DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr));
+        }
+#ifdef __GNUC__
+        /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
+         * For more info see: https://github.com/lieff/minimp3/issues/88
+         */
+        if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
+            return;
+#endif
     }
     }
 #endif
 #endif
     for (; i < n; i++)
     for (; i < n; i++)
@@ -1505,7 +1540,7 @@ static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sf
             *dst++ = src[2*len];
             *dst++ = src[2*len];
         }
         }
     }
     }
-    memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
+    DRMP3_COPY_MEMORY(grbuf, scratch, (dst - scratch)*sizeof(float));
 }
 }
 
 
 static void drmp3_L3_antialias(float *grbuf, int nbands)
 static void drmp3_L3_antialias(float *grbuf, int nbands)
@@ -1674,8 +1709,8 @@ static void drmp3_L3_imdct_short(float *grbuf, float *overlap, int nbands)
     for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
     for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
     {
     {
         float tmp[18];
         float tmp[18];
-        memcpy(tmp, grbuf, sizeof(tmp));
-        memcpy(grbuf, overlap, 6*sizeof(float));
+        DRMP3_COPY_MEMORY(tmp, grbuf, sizeof(tmp));
+        DRMP3_COPY_MEMORY(grbuf, overlap, 6*sizeof(float));
         drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6);
         drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6);
         drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
         drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
         drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6);
         drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6);
@@ -1719,7 +1754,7 @@ static void drmp3_L3_save_reservoir(drmp3dec *h, drmp3dec_scratch *s)
     }
     }
     if (remains > 0)
     if (remains > 0)
     {
     {
-        memmove(h->reserv_buf, s->maindata + pos, remains);
+        DRMP3_MOVE_MEMORY(h->reserv_buf, s->maindata + pos, remains);
     }
     }
     h->reserv = remains;
     h->reserv = remains;
 }
 }
@@ -1728,8 +1763,8 @@ static int drmp3_L3_restore_reservoir(drmp3dec *h, drmp3_bs *bs, drmp3dec_scratc
 {
 {
     int frame_bytes = (bs->limit - bs->pos)/8;
     int frame_bytes = (bs->limit - bs->pos)/8;
     int bytes_have = DRMP3_MIN(h->reserv, main_data_begin);
     int bytes_have = DRMP3_MIN(h->reserv, main_data_begin);
-    memcpy(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin));
-    memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
+    DRMP3_COPY_MEMORY(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin));
+    DRMP3_COPY_MEMORY(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
     drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
     drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
     return h->reserv >= main_data_begin;
     return h->reserv >= main_data_begin;
 }
 }
@@ -2136,7 +2171,7 @@ static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int
         drmp3d_DCT_II(grbuf + 576*i, nbands);
         drmp3d_DCT_II(grbuf + 576*i, nbands);
     }
     }
 
 
-    memcpy(lins, qmf_state, sizeof(float)*15*64);
+    DRMP3_COPY_MEMORY(lins, qmf_state, sizeof(float)*15*64);
 
 
     for (i = 0; i < nbands; i += 2)
     for (i = 0; i < nbands; i += 2)
     {
     {
@@ -2152,7 +2187,7 @@ static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int
     } else
     } else
 #endif
 #endif
     {
     {
-        memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
+        DRMP3_COPY_MEMORY(qmf_state, lins + nbands*64, sizeof(float)*15*64);
     }
     }
 }
 }
 
 
@@ -2230,7 +2265,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     }
     }
     if (!frame_size)
     if (!frame_size)
     {
     {
-        memset(dec, 0, sizeof(drmp3dec));
+        DRMP3_ZERO_MEMORY(dec, sizeof(drmp3dec));
         i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
         i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
         if (!frame_size || i + frame_size > mp3_bytes)
         if (!frame_size || i + frame_size > mp3_bytes)
         {
         {
@@ -2240,7 +2275,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     }
     }
 
 
     hdr = mp3 + i;
     hdr = mp3 + i;
-    memcpy(dec->header, hdr, DRMP3_HDR_SIZE);
+    DRMP3_COPY_MEMORY(dec->header, hdr, DRMP3_HDR_SIZE);
     info->frame_bytes = i + frame_size;
     info->frame_bytes = i + frame_size;
     info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
     info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
     info->hz = drmp3_hdr_sample_rate_hz(hdr);
     info->hz = drmp3_hdr_sample_rate_hz(hdr);
@@ -2266,7 +2301,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
         {
         {
             for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
             for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
             {
             {
-                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
                 drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
                 drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
             }
             }
@@ -2285,7 +2320,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
 
 
         drmp3_L12_read_scale_info(hdr, bs_frame, sci);
         drmp3_L12_read_scale_info(hdr, bs_frame, sci);
 
 
-        memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+        DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
         for (i = 0, igr = 0; igr < 3; igr++)
         for (i = 0, igr = 0; igr < 3; igr++)
         {
         {
             if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
             if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
@@ -2293,7 +2328,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
                 i = 0;
                 i = 0;
                 drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
                 drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
-                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
                 pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels);
                 pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels);
             }
             }
             if (bs_frame->pos > bs_frame->limit)
             if (bs_frame->pos > bs_frame->limit)
@@ -2396,28 +2431,6 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
 #endif
 #endif
 
 
 
 
-/* Standard library stuff. */
-#ifndef DRMP3_ASSERT
-#include <assert.h>
-#define DRMP3_ASSERT(expression) assert(expression)
-#endif
-#ifndef DRMP3_COPY_MEMORY
-#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
-#endif
-#ifndef DRMP3_ZERO_MEMORY
-#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
-#endif
-#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p)))
-#ifndef DRMP3_MALLOC
-#define DRMP3_MALLOC(sz) malloc((sz))
-#endif
-#ifndef DRMP3_REALLOC
-#define DRMP3_REALLOC(p, sz) realloc((p), (sz))
-#endif
-#ifndef DRMP3_FREE
-#define DRMP3_FREE(p) free((p))
-#endif
-
 #define DRMP3_COUNTOF(x)        (sizeof(x) / sizeof(x[0]))
 #define DRMP3_COUNTOF(x)        (sizeof(x) / sizeof(x[0]))
 #define DRMP3_CLAMP(x, lo, hi)  (DRMP3_MAX(lo, DRMP3_MIN(x, hi)))
 #define DRMP3_CLAMP(x, lo, hi)  (DRMP3_MAX(lo, DRMP3_MIN(x, hi)))
 
 
@@ -2649,7 +2662,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
 
 
             /* First we need to move the data down. */
             /* First we need to move the data down. */
             if (pMP3->pData != NULL) {
             if (pMP3->pData != NULL) {
-                memmove(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+                DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
             }
             }
 
 
             pMP3->dataConsumed = 0;
             pMP3->dataConsumed = 0;
@@ -2709,7 +2722,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             size_t bytesRead;
             size_t bytesRead;
 
 
             /* First we need to move the data down. */
             /* First we need to move the data down. */
-            memmove(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
             pMP3->dataConsumed = 0;
             pMP3->dataConsumed = 0;
 
 
             if (pMP3->dataCapacity == pMP3->dataSize) {
             if (pMP3->dataCapacity == pMP3->dataSize) {
@@ -2754,12 +2767,22 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sampl
         return 0;
         return 0;
     }
     }
 
 
-    pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info);
-    if (pcmFramesRead > 0) {
-        pMP3->pcmFramesConsumedInMP3Frame  = 0;
-        pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
-        pMP3->mp3FrameChannels             = info.channels;
-        pMP3->mp3FrameSampleRate           = info.hz;
+    for (;;) {
+        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info);
+        if (pcmFramesRead > 0) {
+            pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header);
+            pMP3->pcmFramesConsumedInMP3Frame  = 0;
+            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
+            pMP3->mp3FrameChannels             = info.channels;
+            pMP3->mp3FrameSampleRate           = info.hz;
+            break;
+        } else if (info.frame_bytes > 0) {
+            /* No frames were read, but it looks like we skipped past one. Read the next MP3 frame. */
+            pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+        } else {
+            /* Nothing at all was read. Abort. */
+            break;
+        }
     }
     }
 
 
     /* Consume the data. */
     /* Consume the data. */
@@ -2822,7 +2845,7 @@ static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drm
     }
     }
 
 
     /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
     /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
-    if (!drmp3_decode_next_frame(pMP3)) {
+    if (drmp3_decode_next_frame(pMP3) == 0) {
         drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);    /* The call above may have allocated memory. Need to make sure it's freed before aborting. */
         drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);    /* The call above may have allocated memory. Need to make sure it's freed before aborting. */
         return DRMP3_FALSE; /* Not a valid MP3 stream. */
         return DRMP3_FALSE; /* Not a valid MP3 stream. */
     }
     }
@@ -4177,7 +4200,7 @@ static float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig,
 
 
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float);
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(float);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(float);
-            if (newFramesBufferSize > DRMP3_SIZE_MAX) {
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
                 break;
                 break;
             }
             }
 
 
@@ -4244,7 +4267,7 @@ static drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pC
 
 
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16);
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(drmp3_int16);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(drmp3_int16);
-            if (newFramesBufferSize > DRMP3_SIZE_MAX) {
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
                 break;
                 break;
             }
             }
 
 
@@ -4450,6 +4473,20 @@ counts rather than sample counts.
 /*
 /*
 REVISION HISTORY
 REVISION HISTORY
 ================
 ================
+v0.6.31 - 2021-08-22
+  - Fix a bug when loading from memory.
+
+v0.6.30 - 2021-08-16
+  - Silence some warnings.
+  - Replace memory operations with DRMP3_* macros.
+
+v0.6.29 - 2021-08-08
+  - Bring up to date with minimp3.
+
+v0.6.28 - 2021-07-31
+  - Fix platform detection for ARM64.
+  - Fix a compilation error with C89.
+
 v0.6.27 - 2021-02-21
 v0.6.27 - 2021-02-21
   - Fix a warning due to referencing _MSC_VER when it is undefined.
   - Fix a warning due to referencing _MSC_VER when it is undefined.
 
 

File diff suppressed because it is too large
+ 1229 - 128
src/external/dr_wav.h


+ 230 - 121
src/external/msf_gif.h

@@ -13,19 +13,31 @@ USAGE EXAMPLE:
 
 
     int width = 480, height = 320, centisecondsPerFrame = 5, bitDepth = 16;
     int width = 480, height = 320, centisecondsPerFrame = 5, bitDepth = 16;
     MsfGifState gifState = {};
     MsfGifState gifState = {};
+    // msf_gif_bgra_flag = true; //optionally, set this flag if your pixels are in BGRA format instead of RGBA
+    // msf_gif_alpha_threshold = 128; //optionally, enable transparency (see function documentation below for details)
     msf_gif_begin(&gifState, width, height);
     msf_gif_begin(&gifState, width, height);
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 1
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 1
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 2
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 2
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 3, etc...
     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 3, etc...
     MsfGifResult result = msf_gif_end(&gifState);
     MsfGifResult result = msf_gif_end(&gifState);
-    FILE * fp = fopen("MyGif.gif", "wb");
-    fwrite(result.data, result.dataSize, 1, fp);
-    fclose(fp);
+    if (result.data) {
+        FILE * fp = fopen("MyGif.gif", "wb");
+        fwrite(result.data, result.dataSize, 1, fp);
+        fclose(fp);
+    }
     msf_gif_free(result);
     msf_gif_free(result);
 
 
 Detailed function documentation can be found in the header section below.
 Detailed function documentation can be found in the header section below.
 
 
 
 
+ERROR HANDLING:
+
+    If memory allocation fails, the functions will signal the error via their return values.
+    If one function call fails, the library will free all of its allocations,
+    and all subsequent calls will safely no-op and return 0 until the next call to `msf_gif_begin()`.
+    Therefore, it's safe to check only the return value of `msf_gif_end()`.
+
+
 REPLACING MALLOC:
 REPLACING MALLOC:
 
 
     This library uses malloc+realloc+free internally for memory allocation.
     This library uses malloc+realloc+free internally for memory allocation.
@@ -39,10 +51,20 @@ REPLACING MALLOC:
     If your allocator needs a context pointer, you can set the `customAllocatorContext` field of the MsfGifState struct
     If your allocator needs a context pointer, you can set the `customAllocatorContext` field of the MsfGifState struct
     before calling msf_gif_begin(), and it will be passed to all subsequent allocator macro calls.
     before calling msf_gif_begin(), and it will be passed to all subsequent allocator macro calls.
 
 
+    The maximum number of bytes the library will allocate to encode a single gif is bounded by the following formula:
+    `(2 * 1024 * 1024) + (width * height * 8) + ((1024 + width * height * 1.5) * 3 * frameCount)`
+    The peak heap memory usage in bytes, if using a general-purpose heap allocator, is bounded by the following formula:
+    `(2 * 1024 * 1024) + (width * height * 9.5) + 1024 + (16 * frameCount) + (2 * sizeOfResultingGif)
+
+
 See end of file for license information.
 See end of file for license information.
 */
 */
 
 
-//version 2.1
+//version 2.2
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// HEADER                                                                                                           ///
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
 #ifndef MSF_GIF_H
 #ifndef MSF_GIF_H
 #define MSF_GIF_H
 #define MSF_GIF_H
@@ -63,12 +85,24 @@ typedef struct { //internal use
     int depth, count, rbits, gbits, bbits;
     int depth, count, rbits, gbits, bbits;
 } MsfCookedFrame;
 } MsfCookedFrame;
 
 
+typedef struct MsfGifBuffer {
+    struct MsfGifBuffer * next;
+    size_t size;
+    uint8_t data[1];
+} MsfGifBuffer;
+
+typedef size_t (* MsfGifFileWriteFunc) (const void * buffer, size_t size, size_t count, void * stream);
 typedef struct {
 typedef struct {
+    MsfGifFileWriteFunc fileWriteFunc;
+    void * fileWriteData;
     MsfCookedFrame previousFrame;
     MsfCookedFrame previousFrame;
-    uint8_t * listHead;
-    uint8_t * listTail;
+    MsfCookedFrame currentFrame;
+    int16_t * lzwMem;
+    MsfGifBuffer * listHead;
+    MsfGifBuffer * listTail;
     int width, height;
     int width, height;
     void * customAllocatorContext;
     void * customAllocatorContext;
+    int framesSubmitted; //needed for transparency to work correctly (because we reach into the previous frame)
 } MsfGifState;
 } MsfGifState;
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -83,7 +117,8 @@ extern "C" {
 int msf_gif_begin(MsfGifState * handle, int width, int height);
 int msf_gif_begin(MsfGifState * handle, int width, int height);
 
 
 /**
 /**
- * @param pixelData            Pointer to raw framebuffer data. Rows must be contiguous in memory, in RGBA8 format.
+ * @param pixelData            Pointer to raw framebuffer data. Rows must be contiguous in memory, in RGBA8 format
+ *                             (or BGRA8 if you have set `msf_gif_bgra_flag = true`).
  *                             Note: This function does NOT free `pixelData`. You must free it yourself afterwards.
  *                             Note: This function does NOT free `pixelData`. You must free it yourself afterwards.
  * @param centiSecondsPerFrame How many hundredths of a second this frame should be displayed for.
  * @param centiSecondsPerFrame How many hundredths of a second this frame should be displayed for.
  *                             Note: This being specified in centiseconds is a limitation of the GIF format.
  *                             Note: This being specified in centiseconds is a limitation of the GIF format.
@@ -111,6 +146,35 @@ MsfGifResult msf_gif_end(MsfGifState * handle);
  */
  */
 void msf_gif_free(MsfGifResult result);
 void msf_gif_free(MsfGifResult result);
 
 
+//The gif format only supports 1-bit transparency, meaning a pixel will either be fully transparent or fully opaque.
+//Pixels with an alpha value less than the alpha threshold will be treated as transparent.
+//To enable exporting transparent gifs, set it to a value between 1 and 255 (inclusive) before calling msf_gif_frame().
+//Setting it to 0 causes the alpha channel to be ignored. Its initial value is 0.
+extern int msf_gif_alpha_threshold;
+
+//Set `msf_gif_bgra_flag = true` before calling `msf_gif_frame()` if your pixels are in BGRA byte order instead of RBGA.
+extern int msf_gif_bgra_flag;
+
+
+
+//TO-FILE FUNCTIONS
+//These functions are equivalent to the ones above, but they write results to a file incrementally,
+//instead of building a buffer in memory. This can result in lower memory usage when saving large gifs,
+//because memory usage is bounded by only the size of a single frame, and is not dependent on the number of frames.
+//There is currently no reason to use these unless you are on a memory-constrained platform.
+//If in doubt about which API to use, for now you should use the normal (non-file) functions above.
+//The signature of MsfGifFileWriteFunc matches fwrite for convenience, so that you can use the C file API like so:
+//  FILE * fp = fopen("MyGif.gif", "wb");
+//  msf_gif_begin_to_file(&handle, width, height, (MsfGifFileWriteFunc) fwrite, (void *) fp);
+//  msf_gif_frame_to_file(...)
+//  msf_gif_end_to_file(&handle);
+//  fclose(fp);
+//If you use a custom file write function, you must take care to return the same values that fwrite() would return.
+//Note that all three functions will potentially write to the file.
+int msf_gif_begin_to_file(MsfGifState * handle, int width, int height, MsfGifFileWriteFunc func, void * filePointer);
+int msf_gif_frame_to_file(MsfGifState * handle, uint8_t * pixelData, int centiSecondsPerFame, int maxBitDepth, int pitchInBytes);
+int msf_gif_end_to_file(MsfGifState * handle); //returns 0 on error and non-zero on success
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif //__cplusplus
 #endif //__cplusplus
@@ -125,10 +189,6 @@ void msf_gif_free(MsfGifResult result);
 #ifndef MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 #ifndef MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 #define MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 #define MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 
 
-#ifndef MSF_GIF_BUFFER_INIT_SIZE
-#define MSF_GIF_BUFFER_INIT_SIZE 1024 * 1024 * 4 //4MB by default, you can increase this if you want to realloc less
-#endif
-
 //ensure the library user has either defined all of malloc/realloc/free, or none
 //ensure the library user has either defined all of malloc/realloc/free, or none
 #if defined(MSF_GIF_MALLOC) && defined(MSF_GIF_REALLOC) && defined(MSF_GIF_FREE) //ok
 #if defined(MSF_GIF_MALLOC) && defined(MSF_GIF_REALLOC) && defined(MSF_GIF_FREE) //ok
 #elif !defined(MSF_GIF_MALLOC) && !defined(MSF_GIF_REALLOC) && !defined(MSF_GIF_FREE) //ok
 #elif !defined(MSF_GIF_MALLOC) && !defined(MSF_GIF_REALLOC) && !defined(MSF_GIF_FREE) //ok
@@ -189,13 +249,21 @@ static inline int msf_imax(int a, int b) { return b < a? a : b; }
 #include <emmintrin.h>
 #include <emmintrin.h>
 #endif
 #endif
 
 
-static MsfCookedFrame msf_cook_frame(void * allocContext, uint8_t * raw, uint8_t * used,
-                                     int width, int height, int pitch, int depth)
+int msf_gif_alpha_threshold = 0;
+int msf_gif_bgra_flag = 0;
+
+static void msf_cook_frame(MsfCookedFrame * frame, uint8_t * raw, uint8_t * used,
+                           int width, int height, int pitch, int depth)
 { MsfTimeFunc
 { MsfTimeFunc
     //bit depth for each channel
     //bit depth for each channel
-    const static int rdepths[17] = { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5 };
-    const static int gdepths[17] = { 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
-    const static int bdepths[17] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5 };
+    const static int rdepthsArray[17] = { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5 };
+    const static int gdepthsArray[17] = { 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
+    const static int bdepthsArray[17] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5 };
+    //this extra level of indirection looks unnecessary but we need to explicitly decay the arrays to pointers
+    //in order to be able to swap them because of C's annoying not-quite-pointers, not-quite-value-types stack arrays.
+    const int * rdepths = msf_gif_bgra_flag? bdepthsArray : rdepthsArray;
+    const int * gdepths =                                   gdepthsArray;
+    const int * bdepths = msf_gif_bgra_flag? rdepthsArray : bdepthsArray;
 
 
     const static int ditherKernel[16] = {
     const static int ditherKernel[16] = {
          0 << 12,  8 << 12,  2 << 12, 10 << 12,
          0 << 12,  8 << 12,  2 << 12, 10 << 12,
@@ -204,13 +272,11 @@ static MsfCookedFrame msf_cook_frame(void * allocContext, uint8_t * raw, uint8_t
         15 << 12,  7 << 12, 13 << 12,  5 << 12,
         15 << 12,  7 << 12, 13 << 12,  5 << 12,
     };
     };
 
 
-    uint32_t * cooked = (uint32_t *) MSF_GIF_MALLOC(allocContext, width * height * sizeof(uint32_t));
-    if (!cooked) { MsfCookedFrame blank = {0}; return blank; }
-
+    uint32_t * cooked = frame->pixels;
     int count = 0;
     int count = 0;
     MsfTimeLoop("do") do {
     MsfTimeLoop("do") do {
         int rbits = rdepths[depth], gbits = gdepths[depth], bbits = bdepths[depth];
         int rbits = rdepths[depth], gbits = gdepths[depth], bbits = bdepths[depth];
-        int paletteSize = 1 << (rbits + gbits + bbits);
+        int paletteSize = (1 << (rbits + gbits + bbits)) + 1;
         memset(used, 0, paletteSize * sizeof(uint8_t));
         memset(used, 0, paletteSize * sizeof(uint8_t));
 
 
         //TODO: document what this math does and why it's correct
         //TODO: document what this math does and why it's correct
@@ -230,7 +296,6 @@ static MsfCookedFrame msf_cook_frame(void * allocContext, uint8_t * raw, uint8_t
             #if (defined (__SSE2__) || defined (_M_X64) || _M_IX86_FP == 2) && !defined(MSF_GIF_NO_SSE2)
             #if (defined (__SSE2__) || defined (_M_X64) || _M_IX86_FP == 2) && !defined(MSF_GIF_NO_SSE2)
                 __m128i k = _mm_loadu_si128((__m128i *) &ditherKernel[(y & 3) * 4]);
                 __m128i k = _mm_loadu_si128((__m128i *) &ditherKernel[(y & 3) * 4]);
                 __m128i k2 = _mm_or_si128(_mm_srli_epi32(k, rbits), _mm_slli_epi32(_mm_srli_epi32(k, bbits), 16));
                 __m128i k2 = _mm_or_si128(_mm_srli_epi32(k, rbits), _mm_slli_epi32(_mm_srli_epi32(k, bbits), 16));
-                // MsfTimeLoop("SIMD")
                 for (; x < width - 3; x += 4) {
                 for (; x < width - 3; x += 4) {
                     uint8_t * pixels = &raw[y * pitch + x * 4];
                     uint8_t * pixels = &raw[y * pitch + x * 4];
                     __m128i p = _mm_loadu_si128((__m128i *) pixels);
                     __m128i p = _mm_loadu_si128((__m128i *) pixels);
@@ -246,17 +311,30 @@ static MsfCookedFrame msf_cook_frame(void * allocContext, uint8_t * raw, uint8_t
                     __m128i g2 = _mm_adds_epu16(g1, _mm_srli_epi32(k, gbits));
                     __m128i g2 = _mm_adds_epu16(g1, _mm_srli_epi32(k, gbits));
                     __m128i g3 = _mm_and_si128(_mm_srli_epi32(g2, 16 - rbits - gbits), _mm_set1_epi32(gmask));
                     __m128i g3 = _mm_and_si128(_mm_srli_epi32(g2, 16 - rbits - gbits), _mm_set1_epi32(gmask));
 
 
+                    __m128i out = _mm_or_si128(_mm_or_si128(r3, g3), b3);
+
+                    //mask in transparency based on threshold
+                    //NOTE: we can theoretically do a sub instead of srli by doing an unsigned compare via bias
+                    //      to maybe save a TINY amount of throughput? but lol who cares maybe I'll do it later -m
+                    __m128i invAlphaMask = _mm_cmplt_epi32(_mm_srli_epi32(p, 24), _mm_set1_epi32(msf_gif_alpha_threshold));
+                    out = _mm_or_si128(_mm_and_si128(invAlphaMask, _mm_set1_epi32(paletteSize - 1)), _mm_andnot_si128(invAlphaMask, out));
+
                     //TODO: does storing this as a __m128i then reading it back as a uint32_t violate strict aliasing?
                     //TODO: does storing this as a __m128i then reading it back as a uint32_t violate strict aliasing?
                     uint32_t * c = &cooked[y * width + x];
                     uint32_t * c = &cooked[y * width + x];
-                    __m128i out = _mm_or_si128(_mm_or_si128(r3, g3), b3);
                     _mm_storeu_si128((__m128i *) c, out);
                     _mm_storeu_si128((__m128i *) c, out);
                 }
                 }
             #endif
             #endif
 
 
             //scalar cleanup loop
             //scalar cleanup loop
-            // MsfTimeLoop("scalar")
             for (; x < width; ++x) {
             for (; x < width; ++x) {
                 uint8_t * p = &raw[y * pitch + x * 4];
                 uint8_t * p = &raw[y * pitch + x * 4];
+
+                //transparent pixel if alpha is low
+                if (p[3] < msf_gif_alpha_threshold) {
+                    cooked[y * width + x] = paletteSize - 1;
+                    continue;
+                }
+
                 int dx = x & 3, dy = y & 3;
                 int dx = x & 3, dy = y & 3;
                 int k = ditherKernel[dy * 4 + dx];
                 int k = ditherKernel[dy * 4 + dx];
                 cooked[y * width + x] =
                 cooked[y * width + x] =
@@ -267,30 +345,25 @@ static MsfCookedFrame msf_cook_frame(void * allocContext, uint8_t * raw, uint8_t
         }
         }
 
 
         count = 0;
         count = 0;
-        MsfTimeLoop("mark and count") for (int i = 0; i < width * height; ++i) {
+        MsfTimeLoop("mark") for (int i = 0; i < width * height; ++i) {
             used[cooked[i]] = 1;
             used[cooked[i]] = 1;
         }
         }
 
 
-        //count used colors
-        MsfTimeLoop("count") for (int j = 0; j < paletteSize; ++j) {
+        //count used colors, transparent is ignored
+        MsfTimeLoop("count") for (int j = 0; j < paletteSize - 1; ++j) {
             count += used[j];
             count += used[j];
         }
         }
     } while (count >= 256 && --depth);
     } while (count >= 256 && --depth);
 
 
     MsfCookedFrame ret = { cooked, depth, count, rdepths[depth], gdepths[depth], bdepths[depth] };
     MsfCookedFrame ret = { cooked, depth, count, rdepths[depth], gdepths[depth], bdepths[depth] };
-    return ret;
+    *frame = ret;
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /// Frame Compression                                                                                                ///
 /// Frame Compression                                                                                                ///
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
-typedef struct {
-    uint8_t * next;
-    size_t size;
-} MsfBufferHeader;
-
-static inline int msf_put_code(uint8_t * * writeHead, uint32_t * blockBits, int len, uint32_t code) {
+static inline void msf_put_code(uint8_t * * writeHead, uint32_t * blockBits, int len, uint32_t code) {
     //insert new code into block buffer
     //insert new code into block buffer
     int idx = *blockBits / 8;
     int idx = *blockBits / 8;
     int bit = *blockBits % 8;
     int bit = *blockBits % 8;
@@ -308,8 +381,6 @@ static inline int msf_put_code(uint8_t * * writeHead, uint32_t * blockBits, int
         (*writeHead)[0] = 255;
         (*writeHead)[0] = 255;
         memset((*writeHead) + 4, 0, 256);
         memset((*writeHead) + 4, 0, 256);
     }
     }
-
-    return 1;
 }
 }
 
 
 typedef struct {
 typedef struct {
@@ -324,30 +395,29 @@ static inline void msf_lzw_reset(MsfStridedList * lzw, int tableSize, int stride
     lzw->stride = stride;
     lzw->stride = stride;
 }
 }
 
 
-static uint8_t * msf_compress_frame(void * allocContext, int width, int height, int centiSeconds,
-                                    MsfCookedFrame frame, MsfCookedFrame previous, uint8_t * used)
+static MsfGifBuffer * msf_compress_frame(void * allocContext, int width, int height, int centiSeconds,
+                                         MsfCookedFrame frame, MsfGifState * handle, uint8_t * used, int16_t * lzwMem)
 { MsfTimeFunc
 { MsfTimeFunc
     //NOTE: we reserve enough memory for theoretical the worst case upfront because it's a reasonable amount,
     //NOTE: we reserve enough memory for theoretical the worst case upfront because it's a reasonable amount,
     //      and prevents us from ever having to check size or realloc during compression
     //      and prevents us from ever having to check size or realloc during compression
-    int maxBufSize = sizeof(MsfBufferHeader) + 32 + 256 * 3 + width * height * 3 / 2; //headers + color table + data
-    uint8_t * allocation = (uint8_t *) MSF_GIF_MALLOC(allocContext, maxBufSize);
-    if (!allocation) { return NULL; }
-    uint8_t * writeBase = allocation + sizeof(MsfBufferHeader);
-    uint8_t * writeHead = writeBase;
-    int lzwAllocSize = 4096 * (frame.count + 1) * sizeof(int16_t);
-    MsfStridedList lzw = { (int16_t *) MSF_GIF_MALLOC(allocContext, lzwAllocSize) };
-    if (!lzw.data) { MSF_GIF_FREE(allocContext, allocation, maxBufSize); return NULL; }
+    int maxBufSize = offsetof(MsfGifBuffer, data) + 32 + 256 * 3 + width * height * 3 / 2; //headers + color table + data
+    MsfGifBuffer * buffer = (MsfGifBuffer *) MSF_GIF_MALLOC(allocContext, maxBufSize);
+    if (!buffer) { return NULL; }
+    uint8_t * writeHead = buffer->data;
+    MsfStridedList lzw = { lzwMem };
 
 
     //allocate tlb
     //allocate tlb
     int totalBits = frame.rbits + frame.gbits + frame.bbits;
     int totalBits = frame.rbits + frame.gbits + frame.bbits;
-    int tlbSize = 1 << totalBits;
-    uint8_t tlb[1 << 16]; //only 64k, so stack allocating is fine
+    int tlbSize = (1 << totalBits) + 1;
+    uint8_t tlb[(1 << 16) + 1]; //only 64k, so stack allocating is fine
 
 
     //generate palette
     //generate palette
     typedef struct { uint8_t r, g, b; } Color3;
     typedef struct { uint8_t r, g, b; } Color3;
     Color3 table[256] = { {0} };
     Color3 table[256] = { {0} };
     int tableIdx = 1; //we start counting at 1 because 0 is the transparent color
     int tableIdx = 1; //we start counting at 1 because 0 is the transparent color
-    MsfTimeLoop("table") for (int i = 0; i < tlbSize; ++i) {
+    //transparent is always last in the table
+    tlb[tlbSize-1] = 0;
+    MsfTimeLoop("table") for (int i = 0; i < tlbSize-1; ++i) {
         if (used[i]) {
         if (used[i]) {
             tlb[i] = tableIdx;
             tlb[i] = tableIdx;
             int rmask = (1 << frame.rbits) - 1;
             int rmask = (1 << frame.rbits) - 1;
@@ -363,19 +433,32 @@ static uint8_t * msf_compress_frame(void * allocContext, int width, int height,
             table[tableIdx].r = r | r >> frame.rbits | r >> (frame.rbits * 2) | r >> (frame.rbits * 3);
             table[tableIdx].r = r | r >> frame.rbits | r >> (frame.rbits * 2) | r >> (frame.rbits * 3);
             table[tableIdx].g = g | g >> frame.gbits | g >> (frame.gbits * 2) | g >> (frame.gbits * 3);
             table[tableIdx].g = g | g >> frame.gbits | g >> (frame.gbits * 2) | g >> (frame.gbits * 3);
             table[tableIdx].b = b | b >> frame.bbits | b >> (frame.bbits * 2) | b >> (frame.bbits * 3);
             table[tableIdx].b = b | b >> frame.bbits | b >> (frame.bbits * 2) | b >> (frame.bbits * 3);
+            if (msf_gif_bgra_flag) {
+                uint8_t temp = table[tableIdx].r;
+                table[tableIdx].r = table[tableIdx].b;
+                table[tableIdx].b = temp;
+            }
             ++tableIdx;
             ++tableIdx;
         }
         }
     }
     }
+    int hasTransparentPixels = used[tlbSize-1];
 
 
     //SPEC: "Because of some algorithmic constraints however, black & white images which have one color bit
     //SPEC: "Because of some algorithmic constraints however, black & white images which have one color bit
     //       must be indicated as having a code size of 2."
     //       must be indicated as having a code size of 2."
     int tableBits = msf_imax(2, msf_bit_log(tableIdx - 1));
     int tableBits = msf_imax(2, msf_bit_log(tableIdx - 1));
     int tableSize = 1 << tableBits;
     int tableSize = 1 << tableBits;
     //NOTE: we don't just compare `depth` field here because it will be wrong for the first frame and we will segfault
     //NOTE: we don't just compare `depth` field here because it will be wrong for the first frame and we will segfault
+    MsfCookedFrame previous = handle->previousFrame;
     int hasSamePal = frame.rbits == previous.rbits && frame.gbits == previous.gbits && frame.bbits == previous.bbits;
     int hasSamePal = frame.rbits == previous.rbits && frame.gbits == previous.gbits && frame.bbits == previous.bbits;
+    int framesCompatible = hasSamePal && !hasTransparentPixels;
 
 
     //NOTE: because __attribute__((__packed__)) is annoyingly compiler-specific, we do this unreadable weirdness
     //NOTE: because __attribute__((__packed__)) is annoyingly compiler-specific, we do this unreadable weirdness
     char headerBytes[19] = "\x21\xF9\x04\x05\0\0\0\0" "\x2C\0\0\0\0\0\0\0\0\x80";
     char headerBytes[19] = "\x21\xF9\x04\x05\0\0\0\0" "\x2C\0\0\0\0\0\0\0\0\x80";
+    //NOTE: we need to check the frame number because if we reach into the buffer prior to the first frame,
+    //      we'll just clobber the file header instead, which is a bug
+    if (hasTransparentPixels && handle->framesSubmitted > 0) {
+        handle->listTail->data[3] = 0x09; //set the previous frame's disposal to background, so transparency is possible
+    }
     memcpy(&headerBytes[4], &centiSeconds, 2);
     memcpy(&headerBytes[4], &centiSeconds, 2);
     memcpy(&headerBytes[13], &width, 2);
     memcpy(&headerBytes[13], &width, 2);
     memcpy(&headerBytes[15], &height, 2);
     memcpy(&headerBytes[15], &height, 2);
@@ -397,12 +480,10 @@ static uint8_t * msf_compress_frame(void * allocContext, int width, int height,
     msf_lzw_reset(&lzw, tableSize, tableIdx);
     msf_lzw_reset(&lzw, tableSize, tableIdx);
     msf_put_code(&writeHead, &blockBits, msf_bit_log(lzw.len - 1), tableSize);
     msf_put_code(&writeHead, &blockBits, msf_bit_log(lzw.len - 1), tableSize);
 
 
-    int lastCode = hasSamePal && frame.pixels[0] == previous.pixels[0]? 0 : tlb[frame.pixels[0]];
+    int lastCode = framesCompatible && frame.pixels[0] == previous.pixels[0]? 0 : tlb[frame.pixels[0]];
     MsfTimeLoop("compress") for (int i = 1; i < width * height; ++i) {
     MsfTimeLoop("compress") for (int i = 1; i < width * height; ++i) {
         //PERF: branching vs. branchless version of this line is observed to have no discernable impact on speed
         //PERF: branching vs. branchless version of this line is observed to have no discernable impact on speed
-        int color = hasSamePal && frame.pixels[i] == previous.pixels[i]? 0 : tlb[frame.pixels[i]];
-        //PERF: branchless version must use && otherwise it will segfault on frame 1, but it's well-predicted so OK
-        // int color = (!(hasSamePal && frame.pixels[i] == previous.pixels[i])) * tlb[frame.pixels[i]];
+        int color = framesCompatible && frame.pixels[i] == previous.pixels[i]? 0 : tlb[frame.pixels[i]];
         int code = (&lzw.data[lastCode * lzw.stride])[color];
         int code = (&lzw.data[lastCode * lzw.stride])[color];
         if (code < 0) {
         if (code < 0) {
             //write to code stream
             //write to code stream
@@ -424,9 +505,6 @@ static uint8_t * msf_compress_frame(void * allocContext, int width, int height,
         }
         }
     }
     }
 
 
-    MSF_GIF_FREE(allocContext, lzw.data, lzwAllocSize);
-    MSF_GIF_FREE(allocContext, previous.pixels, width * height * sizeof(uint32_t));
-
     //write code for leftover index buffer contents, then the end code
     //write code for leftover index buffer contents, then the end code
     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len - 1)), lastCode);
     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len - 1)), lastCode);
     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len)), tableSize + 1);
     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len)), tableSize + 1);
@@ -439,38 +517,72 @@ static uint8_t * msf_compress_frame(void * allocContext, int width, int height,
     }
     }
     *writeHead++ = 0; //terminating block
     *writeHead++ = 0; //terminating block
 
 
-    //filling in buffer header and shrink buffer to fit data
-    MsfBufferHeader * header = (MsfBufferHeader *) allocation;
-    header->next = NULL;
-    header->size = writeHead - writeBase;
-    uint8_t * moved = (uint8_t *) MSF_GIF_REALLOC(allocContext, allocation, maxBufSize, writeHead - allocation);
-    if (!moved) { MSF_GIF_FREE(allocContext, allocation, maxBufSize); return NULL; }
+    //fill in buffer header and shrink buffer to fit data
+    buffer->next = NULL;
+    buffer->size = writeHead - buffer->data;
+    MsfGifBuffer * moved =
+        (MsfGifBuffer *) MSF_GIF_REALLOC(allocContext, buffer, maxBufSize, offsetof(MsfGifBuffer, data) + buffer->size);
+    if (!moved) { MSF_GIF_FREE(allocContext, buffer, maxBufSize); return NULL; }
     return moved;
     return moved;
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-/// Incremental API                                                                                                  ///
+/// To-memory API                                                                                                    ///
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
+static const int lzwAllocSize = 4096 * 256 * sizeof(int16_t);
+
+//NOTE: by C standard library conventions, freeing NULL should be a no-op,
+//      but just in case the user's custom free doesn't follow that rule, we do null checks on our end as well.
+static void msf_free_gif_state(MsfGifState * handle) {
+    if (handle->previousFrame.pixels) MSF_GIF_FREE(handle->customAllocatorContext, handle->previousFrame.pixels,
+                                                   handle->width * handle->height * sizeof(uint32_t));
+    if (handle->currentFrame.pixels)  MSF_GIF_FREE(handle->customAllocatorContext, handle->currentFrame.pixels,
+                                                   handle->width * handle->height * sizeof(uint32_t));
+    if (handle->lzwMem) MSF_GIF_FREE(handle->customAllocatorContext, handle->lzwMem, lzwAllocSize);
+    for (MsfGifBuffer * node = handle->listHead; node;) {
+        MsfGifBuffer * next = node->next; //NOTE: we have to copy the `next` pointer BEFORE freeing the node holding it
+        MSF_GIF_FREE(handle->customAllocatorContext, node, offsetof(MsfGifBuffer, data) + node->size);
+        node = next;
+    }
+    handle->listHead = NULL; //this implicitly marks the handle as invalid until the next msf_gif_begin() call
+}
+
 int msf_gif_begin(MsfGifState * handle, int width, int height) { MsfTimeFunc
 int msf_gif_begin(MsfGifState * handle, int width, int height) { MsfTimeFunc
+    //NOTE: we cannot stomp the entire struct to zero because we must preserve `customAllocatorContext`.
     MsfCookedFrame empty = {0}; //god I hate MSVC...
     MsfCookedFrame empty = {0}; //god I hate MSVC...
     handle->previousFrame = empty;
     handle->previousFrame = empty;
+    handle->currentFrame = empty;
     handle->width = width;
     handle->width = width;
     handle->height = height;
     handle->height = height;
+    handle->framesSubmitted = 0;
+
+    //allocate memory for LZW buffer
+    //NOTE: Unfortunately we can't just use stack memory for the LZW table because it's 2MB,
+    //      which is more stack space than most operating systems give by default,
+    //      and we can't realistically expect users to be willing to override that just to use our library,
+    //      so we have to allocate this on the heap.
+    handle->lzwMem = (int16_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, lzwAllocSize);
+    handle->previousFrame.pixels =
+        (uint32_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, handle->width * handle->height * sizeof(uint32_t));
+    handle->currentFrame.pixels =
+        (uint32_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, handle->width * handle->height * sizeof(uint32_t));
 
 
     //setup header buffer header (lol)
     //setup header buffer header (lol)
-    handle->listHead = (uint8_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, sizeof(MsfBufferHeader) + 32);
-    if (!handle->listHead) { return 0; }
+    handle->listHead = (MsfGifBuffer *) MSF_GIF_MALLOC(handle->customAllocatorContext, offsetof(MsfGifBuffer, data) + 32);
+    if (!handle->listHead || !handle->lzwMem || !handle->previousFrame.pixels || !handle->currentFrame.pixels) {
+        msf_free_gif_state(handle);
+        return 0;
+    }
     handle->listTail = handle->listHead;
     handle->listTail = handle->listHead;
-    MsfBufferHeader * header = (MsfBufferHeader *) handle->listHead;
-    header->next = NULL;
-    header->size = 32;
+    handle->listHead->next = NULL;
+    handle->listHead->size = 32;
 
 
     //NOTE: because __attribute__((__packed__)) is annoyingly compiler-specific, we do this unreadable weirdness
     //NOTE: because __attribute__((__packed__)) is annoyingly compiler-specific, we do this unreadable weirdness
-    char headerBytes[33] = "GIF89a\0\0\0\0\x10\0\0" "\x21\xFF\x0BNETSCAPE2.0\x03\x01\0\0\0";
+    char headerBytes[33] = "GIF89a\0\0\0\0\x70\0\0" "\x21\xFF\x0BNETSCAPE2.0\x03\x01\0\0\0";
     memcpy(&headerBytes[6], &width, 2);
     memcpy(&headerBytes[6], &width, 2);
     memcpy(&headerBytes[8], &height, 2);
     memcpy(&headerBytes[8], &height, 2);
-    memcpy(handle->listHead + sizeof(MsfBufferHeader), headerBytes, 32);
+    memcpy(handle->listHead->data, headerBytes, 32);
     return 1;
     return 1;
 }
 }
 
 
@@ -482,86 +594,83 @@ int msf_gif_frame(MsfGifState * handle, uint8_t * pixelData, int centiSecondsPer
     if (pitchInBytes == 0) pitchInBytes = handle->width * 4;
     if (pitchInBytes == 0) pitchInBytes = handle->width * 4;
     if (pitchInBytes < 0) pixelData -= pitchInBytes * (handle->height - 1);
     if (pitchInBytes < 0) pixelData -= pitchInBytes * (handle->height - 1);
 
 
-    uint8_t used[1 << 16]; //only 64k, so stack allocating is fine
-    MsfCookedFrame frame =
-        msf_cook_frame(handle->customAllocatorContext, pixelData, used, handle->width, handle->height, pitchInBytes,
-            msf_imin(maxBitDepth, handle->previousFrame.depth + 160 / msf_imax(1, handle->previousFrame.count)));
-    //TODO: de-duplicate cleanup code
-    if (!frame.pixels) {
-        MSF_GIF_FREE(handle->customAllocatorContext,
-                     handle->previousFrame.pixels, handle->width * handle->height * sizeof(uint32_t));
-        for (uint8_t * node = handle->listHead; node;) {
-            MsfBufferHeader * header = (MsfBufferHeader *) node;
-            node = header->next;
-            MSF_GIF_FREE(handle->customAllocatorContext, header, sizeof(MsfBufferHeader) + header->size);
-        }
-        handle->listHead = handle->listTail = NULL;
-        return 0;
-    }
+    uint8_t used[(1 << 16) + 1]; //only 64k, so stack allocating is fine
+    msf_cook_frame(&handle->currentFrame, pixelData, used, handle->width, handle->height, pitchInBytes,
+        msf_imin(maxBitDepth, handle->previousFrame.depth + 160 / msf_imax(1, handle->previousFrame.count)));
 
 
-    uint8_t * buffer = msf_compress_frame(handle->customAllocatorContext,
-        handle->width, handle->height, centiSecondsPerFame, frame, handle->previousFrame, used);
-    ((MsfBufferHeader *) handle->listTail)->next = buffer;
+    MsfGifBuffer * buffer = msf_compress_frame(handle->customAllocatorContext, handle->width, handle->height,
+        centiSecondsPerFame, handle->currentFrame, handle, used, handle->lzwMem);
+    if (!buffer) { msf_free_gif_state(handle); return 0; }
+    handle->listTail->next = buffer;
     handle->listTail = buffer;
     handle->listTail = buffer;
-    if (!buffer) {
-        MSF_GIF_FREE(handle->customAllocatorContext, frame.pixels, handle->width * handle->height * sizeof(uint32_t));
-        MSF_GIF_FREE(handle->customAllocatorContext,
-                     handle->previousFrame.pixels, handle->width * handle->height * sizeof(uint32_t));
-        for (uint8_t * node = handle->listHead; node;) {
-            MsfBufferHeader * header = (MsfBufferHeader *) node;
-            node = header->next;
-            MSF_GIF_FREE(handle->customAllocatorContext, header, sizeof(MsfBufferHeader) + header->size);
-        }
-        handle->listHead = handle->listTail = NULL;
-        return 0;
-    }
 
 
-    handle->previousFrame = frame;
+    //swap current and previous frames
+    MsfCookedFrame tmp = handle->previousFrame;
+    handle->previousFrame = handle->currentFrame;
+    handle->currentFrame = tmp;
+
+    handle->framesSubmitted += 1;
     return 1;
     return 1;
 }
 }
 
 
 MsfGifResult msf_gif_end(MsfGifState * handle) { MsfTimeFunc
 MsfGifResult msf_gif_end(MsfGifState * handle) { MsfTimeFunc
     if (!handle->listHead) { MsfGifResult empty = {0}; return empty; }
     if (!handle->listHead) { MsfGifResult empty = {0}; return empty; }
 
 
-    MSF_GIF_FREE(handle->customAllocatorContext,
-                 handle->previousFrame.pixels, handle->width * handle->height * sizeof(uint32_t));
-
     //first pass: determine total size
     //first pass: determine total size
     size_t total = 1; //1 byte for trailing marker
     size_t total = 1; //1 byte for trailing marker
-    for (uint8_t * node = handle->listHead; node;) {
-        MsfBufferHeader * header = (MsfBufferHeader *) node;
-        node = header->next;
-        total += header->size;
-    }
+    for (MsfGifBuffer * node = handle->listHead; node; node = node->next) { total += node->size; }
 
 
     //second pass: write data
     //second pass: write data
     uint8_t * buffer = (uint8_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, total);
     uint8_t * buffer = (uint8_t *) MSF_GIF_MALLOC(handle->customAllocatorContext, total);
     if (buffer) {
     if (buffer) {
         uint8_t * writeHead = buffer;
         uint8_t * writeHead = buffer;
-        for (uint8_t * node = handle->listHead; node;) {
-            MsfBufferHeader * header = (MsfBufferHeader *) node;
-            memcpy(writeHead, node + sizeof(MsfBufferHeader), header->size);
-            writeHead += header->size;
-            node = header->next;
+        for (MsfGifBuffer * node = handle->listHead; node; node = node->next) {
+            memcpy(writeHead, node->data, node->size);
+            writeHead += node->size;
         }
         }
         *writeHead++ = 0x3B;
         *writeHead++ = 0x3B;
     }
     }
 
 
     //third pass: free buffers
     //third pass: free buffers
-    for (uint8_t * node = handle->listHead; node;) {
-        MsfBufferHeader * header = (MsfBufferHeader *) node;
-        node = header->next;
-        MSF_GIF_FREE(handle->customAllocatorContext, header, sizeof(MsfBufferHeader) + header->size);
-    }
+    msf_free_gif_state(handle);
 
 
     MsfGifResult ret = { buffer, total, total, handle->customAllocatorContext };
     MsfGifResult ret = { buffer, total, total, handle->customAllocatorContext };
     return ret;
     return ret;
 }
 }
 
 
-void msf_gif_free(MsfGifResult result) {
+void msf_gif_free(MsfGifResult result) { MsfTimeFunc
     if (result.data) { MSF_GIF_FREE(result.contextPointer, result.data, result.allocSize); }
     if (result.data) { MSF_GIF_FREE(result.contextPointer, result.data, result.allocSize); }
 }
 }
 
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/// To-file API                                                                                                      ///
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+int msf_gif_begin_to_file(MsfGifState * handle, int width, int height, MsfGifFileWriteFunc func, void * filePointer) {
+    handle->fileWriteFunc = func;
+    handle->fileWriteData = filePointer;
+    return msf_gif_begin(handle, width, height);
+}
+
+int msf_gif_frame_to_file(MsfGifState * handle, uint8_t * pixelData, int centiSecondsPerFame, int maxBitDepth, int pitchInBytes) {
+    if (!msf_gif_frame(handle, pixelData, centiSecondsPerFame, maxBitDepth, pitchInBytes)) { return 0; }
+
+    //NOTE: this is a somewhat hacky implementation which is not perfectly efficient, but it's good enough for now
+    MsfGifBuffer * head = handle->listHead;
+    if (!handle->fileWriteFunc(head->data, head->size, 1, handle->fileWriteData)) { msf_free_gif_state(handle); return 0; }
+    handle->listHead = head->next;
+    MSF_GIF_FREE(handle->customAllocatorContext, head, offsetof(MsfGifBuffer, data) + head->size);
+    return 1;
+}
+
+int msf_gif_end_to_file(MsfGifState * handle) {
+    //NOTE: this is a somewhat hacky implementation which is not perfectly efficient, but it's good enough for now
+    MsfGifResult result = msf_gif_end(handle);
+    int ret = (int) handle->fileWriteFunc(result.data, result.dataSize, 1, handle->fileWriteData);
+    msf_gif_free(result);
+    return ret;
+}
+
 #endif //MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 #endif //MSF_GIF_ALREADY_IMPLEMENTED_IN_THIS_TRANSLATION_UNIT
 #endif //MSF_GIF_IMPL
 #endif //MSF_GIF_IMPL
 
 
@@ -570,7 +679,7 @@ void msf_gif_free(MsfGifResult result) {
 This software is available under 2 licenses -- choose whichever you prefer.
 This software is available under 2 licenses -- choose whichever you prefer.
 ------------------------------------------------------------------------------
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
 ALTERNATIVE A - MIT License
-Copyright (c) 2020 Miles Fogle
+Copyright (c) 2021 Miles Fogle
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 the Software without restriction, including without limitation the rights to

+ 10 - 11
src/external/sdefl.h

@@ -1,5 +1,4 @@
-/*
-# Small Deflate
+/*# Small Deflate
 `sdefl` is a small bare bone lossless compression library in ANSI C (ISO C90)
 `sdefl` is a small bare bone lossless compression library in ANSI C (ISO C90)
 which implements the Deflate (RFC 1951) compressed data format specification standard.
 which implements the Deflate (RFC 1951) compressed data format specification standard.
 It is mainly tuned to get as much speed and compression ratio from as little code
 It is mainly tuned to get as much speed and compression ratio from as little code
@@ -33,16 +32,16 @@ this file implementation in *one* C or C++ file to prevent collisions.
 
 
 | Compressor name         | Compression| Decompress.| Compr. size | Ratio |
 | Compressor name         | Compression| Decompress.| Compr. size | Ratio |
 | ------------------------| -----------| -----------| ----------- | ----- |
 | ------------------------| -----------| -----------| ----------- | ----- |
-| sdefl 1.0 -0            |   127 MB/s |   233 MB/s |    40004116 | 39.88 |
-| sdefl 1.0 -1            |   111 MB/s |   259 MB/s |    38940674 | 38.82 |
-| sdefl 1.0 -5            |    45 MB/s |   275 MB/s |    36577183 | 36.46 |
-| sdefl 1.0 -7            |    38 MB/s |   276 MB/s |    36523781 | 36.41 |
-| zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
-| zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
-| zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
 | miniz 1.0 -1            |   122 MB/s |   208 MB/s |    48510028 | 48.51 |
 | miniz 1.0 -1            |   122 MB/s |   208 MB/s |    48510028 | 48.51 |
 | miniz 1.0 -6            |    27 MB/s |   260 MB/s |    36513697 | 36.51 |
 | miniz 1.0 -6            |    27 MB/s |   260 MB/s |    36513697 | 36.51 |
 | miniz 1.0 -9            |    23 MB/s |   261 MB/s |    36460101 | 36.46 |
 | miniz 1.0 -9            |    23 MB/s |   261 MB/s |    36460101 | 36.46 |
+| zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
+| zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
+| zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
+| sdefl 1.0 -0            |   127 MB/s |   371 MB/s |    40004116 | 39.88 |
+| sdefl 1.0 -1            |   111 MB/s |   398 MB/s |    38940674 | 38.82 |
+| sdefl 1.0 -5            |    45 MB/s |   420 MB/s |    36577183 | 36.46 |
+| sdefl 1.0 -7            |    38 MB/s |   423 MB/s |    36523781 | 36.41 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
@@ -398,8 +397,8 @@ sdefl_precode(struct sdefl_symcnt *cnt, unsigned *freqs, unsigned *items,
     if (offlen[cnt->off - 1]) break;
     if (offlen[cnt->off - 1]) break;
 
 
   total = (unsigned)(cnt->lit + cnt->off);
   total = (unsigned)(cnt->lit + cnt->off);
-  memcpy(lens, litlen, sizeof(unsigned char) * cnt->lit);
-  memcpy(lens + cnt->lit, offlen, sizeof(unsigned char) * cnt->off);
+  memcpy(lens, litlen, sizeof(unsigned char) * (size_t)cnt->lit);
+  memcpy(lens + cnt->lit, offlen, sizeof(unsigned char) * (size_t)cnt->off);
   do {
   do {
     unsigned len = lens[run_start];
     unsigned len = lens[run_start];
     unsigned run_end = run_start;
     unsigned run_end = run_start;

+ 202 - 82
src/external/sinfl.h

@@ -33,16 +33,16 @@ this file implementation in *one* C or C++ file to prevent collisions.
 
 
 | Compressor name         | Compression| Decompress.| Compr. size | Ratio |
 | Compressor name         | Compression| Decompress.| Compr. size | Ratio |
 | ------------------------| -----------| -----------| ----------- | ----- |
 | ------------------------| -----------| -----------| ----------- | ----- |
-| sdefl 1.0 -0            |   127 MB/s |   233 MB/s |    40004116 | 39.88 |
-| sdefl 1.0 -1            |   111 MB/s |   259 MB/s |    38940674 | 38.82 |
-| sdefl 1.0 -5            |    45 MB/s |   275 MB/s |    36577183 | 36.46 |
-| sdefl 1.0 -7            |    38 MB/s |   276 MB/s |    36523781 | 36.41 |
-| zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
-| zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
-| zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
 | miniz 1.0 -1            |   122 MB/s |   208 MB/s |    48510028 | 48.51 |
 | miniz 1.0 -1            |   122 MB/s |   208 MB/s |    48510028 | 48.51 |
 | miniz 1.0 -6            |    27 MB/s |   260 MB/s |    36513697 | 36.51 |
 | miniz 1.0 -6            |    27 MB/s |   260 MB/s |    36513697 | 36.51 |
 | miniz 1.0 -9            |    23 MB/s |   261 MB/s |    36460101 | 36.46 |
 | miniz 1.0 -9            |    23 MB/s |   261 MB/s |    36460101 | 36.46 |
+| zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
+| zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
+| zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
+| sdefl 1.0 -0            |   127 MB/s |   371 MB/s |    40004116 | 39.88 |
+| sdefl 1.0 -1            |   111 MB/s |   398 MB/s |    38940674 | 38.82 |
+| sdefl 1.0 -5            |    45 MB/s |   420 MB/s |    36577183 | 36.46 |
+| sdefl 1.0 -7            |    38 MB/s |   423 MB/s |    36523781 | 36.41 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
@@ -51,7 +51,7 @@ this file implementation in *one* C or C++ file to prevent collisions.
 ### Compression
 ### Compression
 Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
 Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
 
 
-| File    |   Original | `sdefl 0`  	| `sdefl 5` 	| `sdefl 7` |
+| File    |   Original | `sdefl 0`    | `sdefl 5`   | `sdefl 7` |
 | :------ | ---------: | -----------------: | ---------: | ----------: |
 | :------ | ---------: | -----------------: | ---------: | ----------: |
 | dickens | 10.192.446 |  4,260,187|  3,845,261|   3,833,657 |
 | dickens | 10.192.446 |  4,260,187|  3,845,261|   3,833,657 |
 | mozilla | 51.220.480 | 20,774,706 | 19,607,009 |  19,565,867 |
 | mozilla | 51.220.480 | 20,774,706 | 19,607,009 |  19,565,867 |
@@ -121,12 +121,15 @@ extern "C" {
 #define SINFL_OFF_TBL_SIZE 402
 #define SINFL_OFF_TBL_SIZE 402
 
 
 struct sinfl {
 struct sinfl {
-  int bits, bitcnt;
+  const unsigned char *bitptr;
+  unsigned long long bitbuf;
+  int bitcnt;
+
   unsigned lits[SINFL_LIT_TBL_SIZE];
   unsigned lits[SINFL_LIT_TBL_SIZE];
   unsigned dsts[SINFL_OFF_TBL_SIZE];
   unsigned dsts[SINFL_OFF_TBL_SIZE];
 };
 };
-extern int sinflate(void *out, const void *in, int size);
-extern int zsinflate(void *out, const void *in, int size);
+extern int sinflate(void *out, int cap, const void *in, int size);
+extern int zsinflate(void *out, int cap, const void *in, int size);
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
@@ -137,6 +140,33 @@ extern int zsinflate(void *out, const void *in, int size);
 #ifdef SINFL_IMPLEMENTATION
 #ifdef SINFL_IMPLEMENTATION
 
 
 #include <string.h> /* memcpy, memset */
 #include <string.h> /* memcpy, memset */
+#include <assert.h> /* assert */
+
+#if defined(__GNUC__) || defined(__clang__)
+#define sinfl_likely(x)       __builtin_expect((x),1)
+#define sinfl_unlikely(x)     __builtin_expect((x),0)
+#else
+#define sinfl_likely(x)       (x)
+#define sinfl_unlikely(x)     (x)
+#endif
+
+#ifndef SINFL_NO_SIMD
+#if __x86_64__ || defined(_WIN32) || defined(_WIN64)
+  #include <emmintrin.h>
+  #define sinfl_char16 __m128i
+  #define sinfl_char16_ld(p) _mm_loadu_si128((const __m128i *)(void*)(p))
+  #define sinfl_char16_str(d,v)  _mm_storeu_si128((__m128i*)(void*)(d), v)
+  #define sinfl_char16_char(c) _mm_set1_epi8(c)
+#elif defined(__arm__) || defined(__aarch64__)
+  #include <arm_neon.h>
+  #define sinfl_char16 uint8x16_t
+  #define sinfl_char16_ld(p) vld1q_u8((const unsigned char*)(p))
+  #define sinfl_char16_str(d,v) vst1q_u8((unsigned char*)(d), v)
+  #define sinfl_char16_char(c) vdupq_n_u8(c)
+#else
+  #define SINFL_NO_SIMD
+#endif
+#endif
 
 
 static int
 static int
 sinfl_bsr(unsigned n) {
 sinfl_bsr(unsigned n) {
@@ -147,20 +177,66 @@ sinfl_bsr(unsigned n) {
   return 31 - __builtin_clz(n);
   return 31 - __builtin_clz(n);
 #endif
 #endif
 }
 }
+static unsigned long long
+sinfl_read64(const void *p) {
+  unsigned long long n;
+  memcpy(&n, p, 8);
+  return n;
+}
+#ifndef SINFL_NO_SIMD
+static unsigned char*
+sinfl_write128(unsigned char *dst, sinfl_char16 w) {
+  sinfl_char16_str(dst, w);
+  return dst + 8;
+}
+static void
+sinfl_copy128(unsigned char **dst, unsigned char **src) {
+  sinfl_char16 n = sinfl_char16_ld(*src);
+  sinfl_char16_str(*dst, n);
+  *dst += 16, *src += 16;
+}
+#else
+static unsigned char*
+sinfl_write64(unsigned char *dst, unsigned long long w) {
+  memcpy(dst, &w, 8);
+  return dst + 8;
+}
+static void
+sinfl_copy64(unsigned char **dst, unsigned char **src) {
+  unsigned long long n;
+  memcpy(&n, *src, 8);
+  memcpy(*dst, &n, 8);
+  *dst += 8, *src += 8;
+}
+#endif
+static void
+sinfl_refill(struct sinfl *s) {
+  s->bitbuf |= sinfl_read64(s->bitptr) << s->bitcnt;
+  s->bitptr += (63 - s->bitcnt) >> 3;
+  s->bitcnt |= 56; /* bitcount is in range [56,63] */
+}
 static int
 static int
-sinfl_get(const unsigned char **src, const unsigned char *end, struct sinfl *s,
-          int n) {
-  const unsigned char *in = *src;
-  int v = s->bits & ((1 << n)-1);
-  s->bits >>= n;
-  s->bitcnt = s->bitcnt - n;
-  s->bitcnt = s->bitcnt < 0 ? 0 : s->bitcnt;
-  while (s->bitcnt < 16 && in < end) {
-    s->bits |= (*in++) << s->bitcnt;
-    s->bitcnt += 8;
-  }
-  *src = in;
-  return v;
+sinfl_peek(struct sinfl *s, int cnt) {
+  assert(cnt >= 0 && cnt <= 56);
+  assert(cnt <= s->bitcnt);
+  return s->bitbuf & ((1ull << cnt) - 1);
+}
+static void
+sinfl_consume(struct sinfl *s, int cnt) {
+  assert(cnt <= s->bitcnt);
+  s->bitbuf >>= cnt;
+  s->bitcnt -= cnt;
+}
+static int
+sinfl__get(struct sinfl *s, int cnt) {
+  int res = sinfl_peek(s, cnt);
+  sinfl_consume(s, cnt);
+  return res;
+}
+static int
+sinfl_get(struct sinfl *s, int cnt) {
+  sinfl_refill(s);
+  return sinfl__get(s, cnt);
 }
 }
 struct sinfl_gen {
 struct sinfl_gen {
   int len;
   int len;
@@ -276,22 +352,22 @@ sinfl_build(unsigned *tbl, unsigned char *lens, int tbl_bits, int maxlen,
   }
   }
 }
 }
 static int
 static int
-sinfl_decode(const unsigned char **in, const unsigned char *end,
-             struct sinfl *s, const unsigned *tbl, int bit_len) {
-  int idx = s->bits & ((1 << bit_len) - 1);
+sinfl_decode(struct sinfl *s, const unsigned *tbl, int bit_len) {
+  sinfl_refill(s);
+  {int idx = sinfl_peek(s, bit_len);
   unsigned key = tbl[idx];
   unsigned key = tbl[idx];
   if (key & 0x10) {
   if (key & 0x10) {
     /* sub-table lookup */
     /* sub-table lookup */
     int len = key & 0x0f;
     int len = key & 0x0f;
-    sinfl_get(in, end, s, bit_len);
-    idx = s->bits & ((1 << len)-1);
+    sinfl_consume(s, bit_len);
+    idx = sinfl_peek(s, len);
     key = tbl[((key >> 16) & 0xffff) + (unsigned)idx];
     key = tbl[((key >> 16) & 0xffff) + (unsigned)idx];
   }
   }
-  sinfl_get(in, end, s, key & 0x0f);
-  return (key >> 16) & 0x0fff;
+  sinfl_consume(s, key & 0x0f);
+  return (key >> 16) & 0x0fff;}
 }
 }
 static int
 static int
-sinfl_decompress(unsigned char *out, const unsigned char *in, int size) {
+sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) {
   static const unsigned char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
   static const unsigned char order[] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
   static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
   static const short dbase[30+2] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
       257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
       257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
@@ -302,19 +378,22 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) {
   static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
   static const unsigned char lbits[29+2] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,
       4,4,4,5,5,5,5,0,0,0};
       4,4,4,5,5,5,5,0,0,0};
 
 
+  const unsigned char *oe = out + cap;
   const unsigned char *e = in + size, *o = out;
   const unsigned char *e = in + size, *o = out;
   enum sinfl_states {hdr,stored,fixed,dyn,blk};
   enum sinfl_states {hdr,stored,fixed,dyn,blk};
   enum sinfl_states state = hdr;
   enum sinfl_states state = hdr;
   struct sinfl s = {0};
   struct sinfl s = {0};
   int last = 0;
   int last = 0;
 
 
-  sinfl_get(&in,e,&s,0); /* buffer input */
-  while (in < e || s.bitcnt) {
+  s.bitptr = in;
+  while (1) {
     switch (state) {
     switch (state) {
     case hdr: {
     case hdr: {
-      int type = 0; /* block header */
-      last = sinfl_get(&in,e,&s,1);
-      type = sinfl_get(&in,e,&s,2);
+      /* block header */
+      int type = 0;
+      sinfl_refill(&s);
+      last = sinfl__get(&s,1);
+      type = sinfl__get(&s,2);
 
 
       switch (type) {default: return (int)(out-o);
       switch (type) {default: return (int)(out-o);
       case 0x00: state = stored; break;
       case 0x00: state = stored; break;
@@ -322,10 +401,12 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) {
       case 0x02: state = dyn; break;}
       case 0x02: state = dyn; break;}
     } break;
     } break;
     case stored: {
     case stored: {
-      int len; /* uncompressed block */
-      sinfl_get(&in,e,&s,s.bitcnt & 7);
-      len = sinfl_get(&in,e,&s,16);
-      //int nlen = sinfl_get(&in,e,&s,16);
+      /* uncompressed block */
+      int len;
+      sinfl_refill(&s);
+      sinfl__get(&s,s.bitcnt & 7);
+      len = sinfl__get(&s,16);
+      //int nlen = sinfl__get(&s,16);   // @raysan5: Unused variable?
       in -= 2; s.bitcnt = 0;
       in -= 2; s.bitcnt = 0;
 
 
       if (len > (e-in) || !len)
       if (len > (e-in) || !len)
@@ -353,72 +434,111 @@ sinfl_decompress(unsigned char *out, const unsigned char *in, int size) {
         int n, i;
         int n, i;
         unsigned hlens[SINFL_PRE_TBL_SIZE];
         unsigned hlens[SINFL_PRE_TBL_SIZE];
         unsigned char nlens[19] = {0}, lens[288+32];
         unsigned char nlens[19] = {0}, lens[288+32];
-        int nlit = 257 + sinfl_get(&in,e,&s,5);
-        int ndist = 1 + sinfl_get(&in,e,&s,5);
-        int nlen = 4 + sinfl_get(&in,e,&s,4);
+
+        sinfl_refill(&s);
+        {int nlit = 257 + sinfl__get(&s,5);
+        int ndist = 1 + sinfl__get(&s,5);
+        int nlen = 4 + sinfl__get(&s,4);
         for (n = 0; n < nlen; n++)
         for (n = 0; n < nlen; n++)
-          nlens[order[n]] = (unsigned char)sinfl_get(&in,e,&s,3);
+          nlens[order[n]] = (unsigned char)sinfl_get(&s,3);
         sinfl_build(hlens, nlens, 7, 7, 19);
         sinfl_build(hlens, nlens, 7, 7, 19);
 
 
         /* decode code lengths */
         /* decode code lengths */
         for (n = 0; n < nlit + ndist;) {
         for (n = 0; n < nlit + ndist;) {
-          int sym = sinfl_decode(&in, e, &s, hlens, 7);
+          int sym = sinfl_decode(&s, hlens, 7);
           switch (sym) {default: lens[n++] = (unsigned char)sym; break;
           switch (sym) {default: lens[n++] = (unsigned char)sym; break;
-          case 16: for (i=3+sinfl_get(&in,e,&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
-          case 17: for (i=3+sinfl_get(&in,e,&s,3);i;i--,n++) lens[n]=0; break;
-          case 18: for (i=11+sinfl_get(&in,e,&s,7);i;i--,n++) lens[n]=0; break;}
+          case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
+          case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break;
+          case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;}
         }
         }
         /* build lit/dist tables */
         /* build lit/dist tables */
         sinfl_build(s.lits, lens, 10, 15, nlit);
         sinfl_build(s.lits, lens, 10, 15, nlit);
         sinfl_build(s.dsts, lens + nlit, 8, 15, ndist);
         sinfl_build(s.dsts, lens + nlit, 8, 15, ndist);
-        state = blk;
+        state = blk;}
     } break;
     } break;
     case blk: {
     case blk: {
       /* decompress block */
       /* decompress block */
-      int i, sym = sinfl_decode(&in, e, &s, s.lits, 10);
-      if (sym > 256) {sym -= 257; /* match symbol */
-        {int len = sinfl_get(&in, e, &s, lbits[sym]) + lbase[sym];
-        int dsym = sinfl_decode(&in, e, &s, s.dsts, 8);
-        int offs = sinfl_get(&in, e, &s, dbits[dsym]) + dbase[dsym];
-        if (offs > (int)(out-o)) {
+      int sym = sinfl_decode(&s, s.lits, 10);
+      if (sym < 256) {
+        /* literal */
+        *out++ = (unsigned char)sym;
+      } else if (sym > 256) {sym -= 257; /* match symbol */
+        sinfl_refill(&s);
+        {int len = sinfl__get(&s, lbits[sym]) + lbase[sym];
+        int dsym = sinfl_decode(&s, s.dsts, 8);
+        int offs = sinfl__get(&s, dbits[dsym]) + dbase[dsym];
+        unsigned char *dst = out, *src = out - offs;
+        if (sinfl_unlikely(offs > (int)(out-o))) {
           return (int)(out-o);
           return (int)(out-o);
-        } else if (offs == 1) {
-          /* rle match copying */
-          unsigned char c = *(out - offs);
-          unsigned long w = (c << 24) | (c << 16) | (c << 8) | c;
-          for (i = 0; i < len >> 2; ++i) {
-            memcpy(out, &w, 4);
-            out += 4;
+        }
+        out = out + len;
+
+#ifndef SINFL_NO_SIMD
+        if (sinfl_likely(oe - out >= 16 * 3)) {
+          if (offs >= 16) {
+            /* copy match */
+            sinfl_copy128(&dst, &src);
+            sinfl_copy128(&dst, &src);
+            do sinfl_copy128(&dst, &src);
+            while (dst < out);
+          } else if (offs == 1) {
+            /* rle match copying */
+            sinfl_char16 w = sinfl_char16_char(src[0]);
+            dst = sinfl_write128(dst, w);
+            dst = sinfl_write128(dst, w);
+            do dst = sinfl_write128(dst, w);
+            while (dst < out);
+          } else {
+            *dst++ = *src++;
+            *dst++ = *src++;
+            do *dst++ = *src++;
+            while (dst < out);
           }
           }
-          len = len & 3;
-        } else if (offs >= 4) {
-          /* copy match */
-          int wcnt = len >> 2;
-          for (i = 0; i < wcnt; ++i) {
-            unsigned long w = 0;
-            memcpy(&w, out - offs, 4);
-            memcpy(out, &w, 4);
-            out += 4;
+        }
+#else
+        if (sinfl_likely(oe - out >= 3 * 8 - 3)) {
+          if (offs >= 8) {
+            /* copy match */
+            sinfl_copy64(&dst, &src);
+            sinfl_copy64(&dst, &src);
+            do sinfl_copy64(&dst, &src);
+            while (dst < out);
+          } else if (offs == 1) {
+            /* rle match copying */
+            unsigned int c = src[0];
+            unsigned int hw = (c << 24u) | (c << 16u) | (c << 8u) | (unsigned)c;
+            unsigned long long w = (unsigned long long)hw << 32llu | hw;
+            dst = sinfl_write64(dst, w);
+            dst = sinfl_write64(dst, w);
+            do dst = sinfl_write64(dst, w);
+            while (dst < out);
+          } else {
+            *dst++ = *src++;
+            *dst++ = *src++;
+            do *dst++ = *src++;
+            while (dst < out);
           }
           }
-          len = len & 3;
         }
         }
-        for (i = 0; i < len; ++i)
-          {*out = *(out-offs), out++;}
+#endif
+        else {
+          *dst++ = *src++;
+          *dst++ = *src++;
+          do *dst++ = *src++;
+          while (dst < out);}
         }
         }
-      } else if (sym == 256) {
+      } else {
         /* end of block */
         /* end of block */
         if (last) return (int)(out-o);
         if (last) return (int)(out-o);
         state = hdr;
         state = hdr;
         break;
         break;
-        /* literal */
-      } else *out++ = (unsigned char)sym;
+      }
     } break;}
     } break;}
   }
   }
   return (int)(out-o);
   return (int)(out-o);
 }
 }
 extern int
 extern int
-sinflate(void *out, const void *in, int size) {
-  return sinfl_decompress((unsigned char*)out, (const unsigned char*)in, size);
+sinflate(void *out, int cap, const void *in, int size) {
+  return sinfl_decompress((unsigned char*)out, cap, (const unsigned char*)in, size);
 }
 }
 static unsigned
 static unsigned
 sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) {
 sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) {
@@ -448,11 +568,11 @@ sinfl_adler32(unsigned adler32, const unsigned char *in, int in_len) {
   } return (unsigned)(s2 << 16) + (unsigned)s1;
   } return (unsigned)(s2 << 16) + (unsigned)s1;
 }
 }
 extern int
 extern int
-zsinflate(void *out, const void *mem, int size) {
+zsinflate(void *out, int cap, const void *mem, int size) {
   const unsigned char *in = (const unsigned char*)mem;
   const unsigned char *in = (const unsigned char*)mem;
   if (size >= 6) {
   if (size >= 6) {
     const unsigned char *eob = in + size - 4;
     const unsigned char *eob = in + size - 4;
-    int n = sinfl_decompress((unsigned char*)out, in + 2u, size);
+    int n = sinfl_decompress((unsigned char*)out, cap, in + 2u, size);
     unsigned a = sinfl_adler32(1u, (unsigned char*)out, n);
     unsigned a = sinfl_adler32(1u, (unsigned char*)out, n);
     unsigned h = eob[0] << 24 | eob[1] << 16 | eob[2] << 8 | eob[3] << 0;
     unsigned h = eob[0] << 24 | eob[1] << 16 | eob[2] << 8 | eob[3] << 0;
     return a == h ? n : -1;
     return a == h ? n : -1;

+ 208 - 73
src/external/stb_image.h

@@ -1,4 +1,4 @@
-/* stb_image - v2.26 - public domain image loader - http://nothings.org/stb
+/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
                                   no warranty implied; use at your own risk
                                   no warranty implied; use at your own risk
 
 
    Do this:
    Do this:
@@ -48,6 +48,7 @@ LICENSE
 
 
 RECENT REVISION HISTORY:
 RECENT REVISION HISTORY:
 
 
+      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
       2.26  (2020-07-13) many minor fixes
       2.26  (2020-07-13) many minor fixes
       2.25  (2020-02-02) fix warnings
       2.25  (2020-02-02) fix warnings
       2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
       2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
@@ -89,7 +90,7 @@ RECENT REVISION HISTORY:
                                            Jeremy Sawicki (handle all ImageNet JPGs)
                                            Jeremy Sawicki (handle all ImageNet JPGs)
  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
-    Arseny Kapoulkine
+    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
     John-Mark Allen
     John-Mark Allen
     Carmelo J Fdez-Aguera
     Carmelo J Fdez-Aguera
 
 
@@ -102,7 +103,7 @@ RECENT REVISION HISTORY:
     Thomas Ruf              Ronny Chevalier                         github:rlyeh
     Thomas Ruf              Ronny Chevalier                         github:rlyeh
     Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
     Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
     Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
     Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
-                            Laurent Gomila     Cort Stratton        github:snagar
+    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
     Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
     Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
     Cass Everitt            Ryamond Barbiero                        github:grim210
     Cass Everitt            Ryamond Barbiero                        github:grim210
     Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
     Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
@@ -110,11 +111,13 @@ RECENT REVISION HISTORY:
     Josh Tobin                                 Matthew Gregan       github:poppolopoppo
     Josh Tobin                                 Matthew Gregan       github:poppolopoppo
     Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
     Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
     Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
     Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
-                            Brad Weinberger    Matvey Cherevko      [reserved]
+                            Brad Weinberger    Matvey Cherevko      github:mosra
     Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
     Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
     Ryan C. Gordon          [reserved]                              [reserved]
     Ryan C. Gordon          [reserved]                              [reserved]
                      DO NOT ADD YOUR NAME HERE
                      DO NOT ADD YOUR NAME HERE
 
 
+                     Jacko Dirks
+
   To add your name to the credits, pick a random blank space in the middle and fill it.
   To add your name to the credits, pick a random blank space in the middle and fill it.
   80% of merge conflicts on stb PRs are due to people adding their name at the end
   80% of merge conflicts on stb PRs are due to people adding their name at the end
   of the credits.
   of the credits.
@@ -176,6 +179,32 @@ RECENT REVISION HISTORY:
 //
 //
 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 //
 //
+// To query the width, height and component count of an image without having to
+// decode the full file, you can use the stbi_info family of functions:
+//
+//   int x,y,n,ok;
+//   ok = stbi_info(filename, &x, &y, &n);
+//   // returns ok=1 and sets x, y, n if image is a supported format,
+//   // 0 otherwise.
+//
+// Note that stb_image pervasively uses ints in its public API for sizes,
+// including sizes of memory buffers. This is now part of the API and thus
+// hard to change without causing breakage. As a result, the various image
+// loaders all have certain limits on image size; these differ somewhat
+// by format but generally boil down to either just under 2GB or just under
+// 1GB. When the decoded image would be larger than this, stb_image decoding
+// will fail.
+//
+// Additionally, stb_image will reject image files that have any of their
+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+// the only way to have an image with such dimensions load correctly
+// is for it to have a rather extreme aspect ratio. Either way, the
+// assumption here is that such larger images are likely to be malformed
+// or malicious. If you do need to load an image with individual dimensions
+// larger than that, and it still fits in the overall size limit, you can
+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+//
 // ===========================================================================
 // ===========================================================================
 //
 //
 // UNICODE:
 // UNICODE:
@@ -281,11 +310,10 @@ RECENT REVISION HISTORY:
 //
 //
 // iPhone PNG support:
 // iPhone PNG support:
 //
 //
-// By default we convert iphone-formatted PNGs back to RGB, even though
-// they are internally encoded differently. You can disable this conversion
-// by calling stbi_convert_iphone_png_to_rgb(0), in which case
-// you will always just get the native iphone "format" through (which
-// is BGR stored in RGB).
+// We optionally support converting iPhone-formatted PNGs (which store
+// premultiplied BGRA) back to RGB, even though they're internally encoded
+// differently. To enable this conversion, call
+// stbi_convert_iphone_png_to_rgb(1).
 //
 //
 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 // pixel to remove any premultiplied alpha *only* if the image file explicitly
 // pixel to remove any premultiplied alpha *only* if the image file explicitly
@@ -489,6 +517,8 @@ STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
 // as above, but only applies to images loaded on the thread that calls the function
 // as above, but only applies to images loaded on the thread that calls the function
 // this function is only available if your compiler supports thread-local variables;
 // this function is only available if your compiler supports thread-local variables;
 // calling it will fail to link if your compiler doesn't
 // calling it will fail to link if your compiler doesn't
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
 STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
 STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
 
 
 // ZLIB client - used by PNG, available for other purposes
 // ZLIB client - used by PNG, available for other purposes
@@ -634,7 +664,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #ifdef STBI_HAS_LROTL
 #ifdef STBI_HAS_LROTL
    #define stbi_lrot(x,y)  _lrotl(x,y)
    #define stbi_lrot(x,y)  _lrotl(x,y)
 #else
 #else
-   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
 #endif
 #endif
 
 
 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
@@ -748,9 +778,12 @@ static int stbi__sse2_available(void)
 
 
 #ifdef STBI_NEON
 #ifdef STBI_NEON
 #include <arm_neon.h>
 #include <arm_neon.h>
-// assume GCC or Clang on ARM targets
+#ifdef _MSC_VER
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 #endif
 #endif
+#endif
 
 
 #ifndef STBI_SIMD_ALIGN
 #ifndef STBI_SIMD_ALIGN
 #define STBI_SIMD_ALIGN(type, name) type name
 #define STBI_SIMD_ALIGN(type, name) type name
@@ -924,6 +957,7 @@ static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 static int      stbi__pnm_test(stbi__context *s);
 static int      stbi__pnm_test(stbi__context *s);
 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__pnm_is16(stbi__context *s);
 #endif
 #endif
 
 
 static
 static
@@ -998,7 +1032,7 @@ static int stbi__mad3sizes_valid(int a, int b, int c, int add)
 }
 }
 
 
 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
 {
 {
    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
@@ -1021,7 +1055,7 @@ static void *stbi__malloc_mad3(int a, int b, int c, int add)
    return stbi__malloc(a*b*c + add);
    return stbi__malloc(a*b*c + add);
 }
 }
 
 
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
 {
 {
    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
@@ -1087,9 +1121,8 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
    ri->num_channels = 0;
    ri->num_channels = 0;
 
 
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
-   #endif
+   // test the formats with a very explicit header first (at least a FOURCC
+   // or distinctive magic number first)
    #ifndef STBI_NO_PNG
    #ifndef STBI_NO_PNG
    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    #endif
    #endif
@@ -1107,6 +1140,13 @@ static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int re
    #ifndef STBI_NO_PIC
    #ifndef STBI_NO_PIC
    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
    #endif
    #endif
+
+   // then the formats that can end up attempting to load with just 1 or 2
+   // bytes matching expectations; these are prone to false positives, so
+   // try them later
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
+   #endif
    #ifndef STBI_NO_PNM
    #ifndef STBI_NO_PNM
    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    #endif
    #endif
@@ -1262,12 +1302,12 @@ static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, in
 
 
 #ifndef STBI_NO_STDIO
 #ifndef STBI_NO_STDIO
 
 
-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
 STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
 STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
 STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
 STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
 #endif
 #endif
 
 
-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
 {
 {
 	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
 	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
@@ -1277,16 +1317,16 @@ STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wch
 static FILE *stbi__fopen(char const *filename, char const *mode)
 static FILE *stbi__fopen(char const *filename, char const *mode)
 {
 {
    FILE *f;
    FILE *f;
-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
    wchar_t wMode[64];
    wchar_t wMode[64];
    wchar_t wFilename[1024];
    wchar_t wFilename[1024];
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
       return 0;
       return 0;
 
 
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
       return 0;
       return 0;
 
 
-#if _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
 	if (0 != _wfopen_s(&f, wFilename, wMode))
 	if (0 != _wfopen_s(&f, wFilename, wMode))
 		f = 0;
 		f = 0;
 #else
 #else
@@ -1662,7 +1702,8 @@ static int stbi__get16le(stbi__context *s)
 static stbi__uint32 stbi__get32le(stbi__context *s)
 static stbi__uint32 stbi__get32le(stbi__context *s)
 {
 {
    stbi__uint32 z = stbi__get16le(s);
    stbi__uint32 z = stbi__get16le(s);
-   return z + (stbi__get16le(s) << 16);
+   z += (stbi__uint32)stbi__get16le(s) << 16;
+   return z;
 }
 }
 #endif
 #endif
 
 
@@ -2090,13 +2131,12 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
    int sgn;
    int sgn;
    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
 
 
-   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
    k = stbi_lrot(j->code_buffer, n);
    k = stbi_lrot(j->code_buffer, n);
-   if (n < 0 || n >= (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))) return 0;
    j->code_buffer = k & ~stbi__bmask[n];
    j->code_buffer = k & ~stbi__bmask[n];
    k &= stbi__bmask[n];
    k &= stbi__bmask[n];
    j->code_bits -= n;
    j->code_bits -= n;
-   return k + (stbi__jbias[n] & ~sgn);
+   return k + (stbi__jbias[n] & (sgn - 1));
 }
 }
 
 
 // get some unsigned bits
 // get some unsigned bits
@@ -2146,7 +2186,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
 
 
    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
    t = stbi__jpeg_huff_decode(j, hdc);
    t = stbi__jpeg_huff_decode(j, hdc);
-   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
 
 
    // 0 all the ac values now so we can do it 32-bits at a time
    // 0 all the ac values now so we can do it 32-bits at a time
    memset(data,0,64*sizeof(data[0]));
    memset(data,0,64*sizeof(data[0]));
@@ -2203,12 +2243,12 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
       // first scan for DC coefficient, must be first
       // first scan for DC coefficient, must be first
       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
       t = stbi__jpeg_huff_decode(j, hdc);
       t = stbi__jpeg_huff_decode(j, hdc);
-      if (t == -1) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
       diff = t ? stbi__extend_receive(j, t) : 0;
       diff = t ? stbi__extend_receive(j, t) : 0;
 
 
       dc = j->img_comp[b].dc_pred + diff;
       dc = j->img_comp[b].dc_pred + diff;
       j->img_comp[b].dc_pred = dc;
       j->img_comp[b].dc_pred = dc;
-      data[0] = (short) (dc << j->succ_low);
+      data[0] = (short) (dc * (1 << j->succ_low));
    } else {
    } else {
       // refinement scan for DC coefficient
       // refinement scan for DC coefficient
       if (stbi__jpeg_get_bit(j))
       if (stbi__jpeg_get_bit(j))
@@ -2245,7 +2285,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
             j->code_buffer <<= s;
             j->code_buffer <<= s;
             j->code_bits -= s;
             j->code_bits -= s;
             zig = stbi__jpeg_dezigzag[k++];
             zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) ((r >> 8) << shift);
+            data[zig] = (short) ((r >> 8) * (1 << shift));
          } else {
          } else {
             int rs = stbi__jpeg_huff_decode(j, hac);
             int rs = stbi__jpeg_huff_decode(j, hac);
             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
@@ -2263,7 +2303,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
             } else {
             } else {
                k += r;
                k += r;
                zig = stbi__jpeg_dezigzag[k++];
                zig = stbi__jpeg_dezigzag[k++];
-               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
             }
             }
          }
          }
       } while (k <= j->spec_end);
       } while (k <= j->spec_end);
@@ -3227,6 +3267,13 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
    }
    }
 
 
+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+   // and I've never seen a non-corrupted JPEG file actually use them
+   for (i=0; i < s->img_n; ++i) {
+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
+   }
+
    // compute interleaved mcu info
    // compute interleaved mcu info
    z->img_h_max = h_max;
    z->img_h_max = h_max;
    z->img_v_max = v_max;
    z->img_v_max = v_max;
@@ -3782,6 +3829,10 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
    else
    else
       decode_n = z->s->img_n;
       decode_n = z->s->img_n;
 
 
+   // nothing to do if no components requested; check this now to avoid
+   // accessing uninitialized coutput[0] later
+   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
+
    // resample and color-convert
    // resample and color-convert
    {
    {
       int k;
       int k;
@@ -3924,6 +3975,7 @@ static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int re
 {
 {
    unsigned char* result;
    unsigned char* result;
    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__errpuc("outofmem", "Out of memory");
    STBI_NOTUSED(ri);
    STBI_NOTUSED(ri);
    j->s = s;
    j->s = s;
    stbi__setup_jpeg(j);
    stbi__setup_jpeg(j);
@@ -3936,6 +3988,7 @@ static int stbi__jpeg_test(stbi__context *s)
 {
 {
    int r;
    int r;
    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__err("outofmem", "Out of memory");
    j->s = s;
    j->s = s;
    stbi__setup_jpeg(j);
    stbi__setup_jpeg(j);
    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
@@ -3960,6 +4013,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 {
 {
    int result;
    int result;
    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   if (!j) return stbi__err("outofmem", "Out of memory");
    j->s = s;
    j->s = s;
    result = stbi__jpeg_info_raw(j, x, y, comp);
    result = stbi__jpeg_info_raw(j, x, y, comp);
    STBI_FREE(j);
    STBI_FREE(j);
@@ -3979,6 +4033,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 // fast-way is faster to check than jpeg huffman, but slow way is slower
 // fast-way is faster to check than jpeg huffman, but slow way is slower
 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
 
 
 // zlib-style huffman encoding
 // zlib-style huffman encoding
 // (jpegs packs from left, zlib from right, so can't share code)
 // (jpegs packs from left, zlib from right, so can't share code)
@@ -3988,8 +4043,8 @@ typedef struct
    stbi__uint16 firstcode[16];
    stbi__uint16 firstcode[16];
    int maxcode[17];
    int maxcode[17];
    stbi__uint16 firstsymbol[16];
    stbi__uint16 firstsymbol[16];
-   stbi_uc  size[288];
-   stbi__uint16 value[288];
+   stbi_uc  size[STBI__ZNSYMS];
+   stbi__uint16 value[STBI__ZNSYMS];
 } stbi__zhuffman;
 } stbi__zhuffman;
 
 
 stbi_inline static int stbi__bitreverse16(int n)
 stbi_inline static int stbi__bitreverse16(int n)
@@ -4120,7 +4175,7 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
    if (s >= 16) return -1; // invalid code!
    if (s >= 16) return -1; // invalid code!
    // code size is s, so:
    // code size is s, so:
    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
-   if (b >= sizeof (z->size)) return -1; // some data was corrupt somewhere!
+   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
    if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
    if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
    a->code_buffer >>= s;
    a->code_buffer >>= s;
    a->num_bits -= s;
    a->num_bits -= s;
@@ -4317,7 +4372,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
    return 1;
    return 1;
 }
 }
 
 
-static const stbi_uc stbi__zdefault_length[288] =
+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
 {
 {
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
@@ -4363,7 +4418,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
       } else {
       } else {
          if (type == 1) {
          if (type == 1) {
             // use fixed code lengths
             // use fixed code lengths
-            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
          } else {
          } else {
             if (!stbi__compute_huffman_codes(a)) return 0;
             if (!stbi__compute_huffman_codes(a)) return 0;
@@ -4759,6 +4814,7 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
 
 
    // de-interlacing
    // de-interlacing
    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+   if (!final) return stbi__err("outofmem", "Out of memory");
    for (p=0; p < 7; ++p) {
    for (p=0; p < 7; ++p) {
       int xorig[] = { 0,4,0,2,0,1,0 };
       int xorig[] = { 0,4,0,2,0,1,0 };
       int yorig[] = { 0,0,4,0,2,0,1 };
       int yorig[] = { 0,0,4,0,2,0,1 };
@@ -4879,19 +4935,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    return 1;
    return 1;
 }
 }
 
 
-static int stbi__unpremultiply_on_load = 0;
-static int stbi__de_iphone_flag = 0;
+static int stbi__unpremultiply_on_load_global = 0;
+static int stbi__de_iphone_flag_global = 0;
 
 
 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
 {
 {
-   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
 }
 }
 
 
 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
 {
 {
-   stbi__de_iphone_flag = flag_true_if_should_convert;
+   stbi__de_iphone_flag_global = flag_true_if_should_convert;
+}
+
+#ifndef STBI_THREAD_LOCAL
+#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
+#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
+#else
+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
+
+STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_set = 1;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag_local = flag_true_if_should_convert;
+   stbi__de_iphone_flag_set = 1;
 }
 }
 
 
+#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
+                                       ? stbi__unpremultiply_on_load_local      \
+                                       : stbi__unpremultiply_on_load_global)
+#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
+                                ? stbi__de_iphone_flag_local                    \
+                                : stbi__de_iphone_flag_global)
+#endif // STBI_THREAD_LOCAL
+
 static void stbi__de_iphone(stbi__png *z)
 static void stbi__de_iphone(stbi__png *z)
 {
 {
    stbi__context *s = z->s;
    stbi__context *s = z->s;
@@ -5272,6 +5355,32 @@ typedef struct
    int extra_read;
    int extra_read;
 } stbi__bmp_data;
 } stbi__bmp_data;
 
 
+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
+{
+   // BI_BITFIELDS specifies masks explicitly, don't override
+   if (compress == 3)
+      return 1;
+
+   if (compress == 0) {
+      if (info->bpp == 16) {
+         info->mr = 31u << 10;
+         info->mg = 31u <<  5;
+         info->mb = 31u <<  0;
+      } else if (info->bpp == 32) {
+         info->mr = 0xffu << 16;
+         info->mg = 0xffu <<  8;
+         info->mb = 0xffu <<  0;
+         info->ma = 0xffu << 24;
+         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+      } else {
+         // otherwise, use defaults, which is all-0
+         info->mr = info->mg = info->mb = info->ma = 0;
+      }
+      return 1;
+   }
+   return 0; // error
+}
+
 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 {
 {
    int hsz;
    int hsz;
@@ -5299,6 +5408,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
    if (hsz != 12) {
    if (hsz != 12) {
       int compress = stbi__get32le(s);
       int compress = stbi__get32le(s);
       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
       stbi__get32le(s); // discard sizeof
       stbi__get32le(s); // discard sizeof
       stbi__get32le(s); // discard hres
       stbi__get32le(s); // discard hres
       stbi__get32le(s); // discard vres
       stbi__get32le(s); // discard vres
@@ -5313,17 +5424,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          }
          }
          if (info->bpp == 16 || info->bpp == 32) {
          if (info->bpp == 16 || info->bpp == 32) {
             if (compress == 0) {
             if (compress == 0) {
-               if (info->bpp == 32) {
-                  info->mr = 0xffu << 16;
-                  info->mg = 0xffu <<  8;
-                  info->mb = 0xffu <<  0;
-                  info->ma = 0xffu << 24;
-                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-               } else {
-                  info->mr = 31u << 10;
-                  info->mg = 31u <<  5;
-                  info->mb = 31u <<  0;
-               }
+               stbi__bmp_set_mask_defaults(info, compress);
             } else if (compress == 3) {
             } else if (compress == 3) {
                info->mr = stbi__get32le(s);
                info->mr = stbi__get32le(s);
                info->mg = stbi__get32le(s);
                info->mg = stbi__get32le(s);
@@ -5338,6 +5439,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
                return stbi__errpuc("bad BMP", "bad BMP");
                return stbi__errpuc("bad BMP", "bad BMP");
          }
          }
       } else {
       } else {
+         // V4/V5 header
          int i;
          int i;
          if (hsz != 108 && hsz != 124)
          if (hsz != 108 && hsz != 124)
             return stbi__errpuc("bad BMP", "bad BMP");
             return stbi__errpuc("bad BMP", "bad BMP");
@@ -5345,6 +5447,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          info->mg = stbi__get32le(s);
          info->mg = stbi__get32le(s);
          info->mb = stbi__get32le(s);
          info->mb = stbi__get32le(s);
          info->ma = stbi__get32le(s);
          info->ma = stbi__get32le(s);
+         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+            stbi__bmp_set_mask_defaults(info, compress);
          stbi__get32le(s); // discard color space
          stbi__get32le(s); // discard color space
          for (i=0; i < 12; ++i)
          for (i=0; i < 12; ++i)
             stbi__get32le(s); // discard color space parameters
             stbi__get32le(s); // discard color space parameters
@@ -5394,8 +5498,7 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req
          psize = (info.offset - info.extra_read - info.hsz) >> 2;
          psize = (info.offset - info.extra_read - info.hsz) >> 2;
    }
    }
    if (psize == 0) {
    if (psize == 0) {
-      STBI_ASSERT(info.offset == s->callback_already_read + (int) (s->img_buffer - s->img_buffer_original));
-      if (info.offset != s->callback_already_read + (s->img_buffer - s->buffer_start)) {
+      if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
         return stbi__errpuc("bad offset", "Corrupt BMP");
         return stbi__errpuc("bad offset", "Corrupt BMP");
       }
       }
    }
    }
@@ -6342,6 +6445,7 @@ static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_c
 
 
    // intermediate buffer is RGBA
    // intermediate buffer is RGBA
    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+   if (!result) return stbi__errpuc("outofmem", "Out of memory");
    memset(result, 0xff, x*y*4);
    memset(result, 0xff, x*y*4);
 
 
    if (!stbi__pic_load_core(s,x,y,comp, result)) {
    if (!stbi__pic_load_core(s,x,y,comp, result)) {
@@ -6457,6 +6561,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 {
 {
    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!g) return stbi__err("outofmem", "Out of memory");
    if (!stbi__gif_header(s, g, comp, 1)) {
    if (!stbi__gif_header(s, g, comp, 1)) {
       STBI_FREE(g);
       STBI_FREE(g);
       stbi__rewind( s );
       stbi__rewind( s );
@@ -6766,6 +6871,17 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
    }
    }
 }
 }
 
 
+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
+{
+   STBI_FREE(g->out);
+   STBI_FREE(g->history);
+   STBI_FREE(g->background);
+
+   if (out) STBI_FREE(out);
+   if (delays && *delays) STBI_FREE(*delays);
+   return stbi__errpuc("outofmem", "Out of memory");
+}
+
 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
 {
 {
    if (stbi__gif_test(s)) {
    if (stbi__gif_test(s)) {
@@ -6777,6 +6893,10 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
       int stride;
       int stride;
       int out_size = 0;
       int out_size = 0;
       int delays_size = 0;
       int delays_size = 0;
+
+      STBI_NOTUSED(out_size);
+      STBI_NOTUSED(delays_size);
+
       memset(&g, 0, sizeof(g));
       memset(&g, 0, sizeof(g));
       if (delays) {
       if (delays) {
          *delays = 0;
          *delays = 0;
@@ -6794,26 +6914,29 @@ static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y,
 
 
             if (out) {
             if (out) {
                void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
                void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
-               if (NULL == tmp) {
-                  STBI_FREE(g.out);
-                  STBI_FREE(g.history);
-                  STBI_FREE(g.background);
-                  return stbi__errpuc("outofmem", "Out of memory");
-               }
+               if (!tmp)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
                else {
                else {
                    out = (stbi_uc*) tmp;
                    out = (stbi_uc*) tmp;
                    out_size = layers * stride;
                    out_size = layers * stride;
                }
                }
 
 
                if (delays) {
                if (delays) {
-                  *delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+                  if (!new_delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  *delays = new_delays;
                   delays_size = layers * sizeof(int);
                   delays_size = layers * sizeof(int);
                }
                }
             } else {
             } else {
                out = (stbi_uc*)stbi__malloc( layers * stride );
                out = (stbi_uc*)stbi__malloc( layers * stride );
+               if (!out)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
                out_size = layers * stride;
                out_size = layers * stride;
                if (delays) {
                if (delays) {
                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
+                  if (!*delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
                   delays_size = layers * sizeof(int);
                   delays_size = layers * sizeof(int);
                }
                }
             }
             }
@@ -7138,9 +7261,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
 
 
    info.all_a = 255;
    info.all_a = 255;
    p = stbi__bmp_parse_header(s, &info);
    p = stbi__bmp_parse_header(s, &info);
-   stbi__rewind( s );
-   if (p == NULL)
+   if (p == NULL) {
+      stbi__rewind( s );
       return 0;
       return 0;
+   }
    if (x) *x = s->img_x;
    if (x) *x = s->img_x;
    if (y) *y = s->img_y;
    if (y) *y = s->img_y;
    if (comp) {
    if (comp) {
@@ -7206,8 +7330,8 @@ static int stbi__psd_is16(stbi__context *s)
        stbi__rewind( s );
        stbi__rewind( s );
        return 0;
        return 0;
    }
    }
-   (void) stbi__get32be(s);
-   (void) stbi__get32be(s);
+   STBI_NOTUSED(stbi__get32be(s));
+   STBI_NOTUSED(stbi__get32be(s));
    depth = stbi__get16be(s);
    depth = stbi__get16be(s);
    if (depth != 16) {
    if (depth != 16) {
        stbi__rewind( s );
        stbi__rewind( s );
@@ -7286,7 +7410,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
 // Known limitations:
 // Known limitations:
 //    Does not support comments in the header section
 //    Does not support comments in the header section
 //    Does not support ASCII image data (formats P2 and P3)
 //    Does not support ASCII image data (formats P2 and P3)
-//    Does not support 16-bit-per-channel
 
 
 #ifndef STBI_NO_PNM
 #ifndef STBI_NO_PNM
 
 
@@ -7307,7 +7430,8 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
    stbi_uc *out;
    stbi_uc *out;
    STBI_NOTUSED(ri);
    STBI_NOTUSED(ri);
 
 
-   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+   if (ri->bits_per_channel == 0)
       return 0;
       return 0;
 
 
    if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
    if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
@@ -7317,12 +7441,12 @@ static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req
    *y = s->img_y;
    *y = s->img_y;
    if (comp) *comp = s->img_n;
    if (comp) *comp = s->img_n;
 
 
-   if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
+   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
       return stbi__errpuc("too large", "PNM too large");
       return stbi__errpuc("too large", "PNM too large");
 
 
-   out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
+   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+   stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
 
 
    if (req_comp && req_comp != s->img_n) {
    if (req_comp && req_comp != s->img_n) {
       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
@@ -7398,11 +7522,19 @@ static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
    stbi__pnm_skip_whitespace(s, &c);
    stbi__pnm_skip_whitespace(s, &c);
 
 
    maxv = stbi__pnm_getinteger(s, &c);  // read max value
    maxv = stbi__pnm_getinteger(s, &c);  // read max value
-
-   if (maxv > 255)
-      return stbi__err("max value > 255", "PPM image not 8-bit");
+   if (maxv > 65535)
+      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+   else if (maxv > 255)
+      return 16;
    else
    else
-      return 1;
+      return 8;
+}
+
+static int stbi__pnm_is16(stbi__context *s)
+{
+   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+	   return 1;
+   return 0;
 }
 }
 #endif
 #endif
 
 
@@ -7458,6 +7590,9 @@ static int stbi__is_16_main(stbi__context *s)
    if (stbi__psd_is16(s))  return 1;
    if (stbi__psd_is16(s))  return 1;
    #endif
    #endif
 
 
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_is16(s))  return 1;
+   #endif
    return 0;
    return 0;
 }
 }
 
 

+ 12 - 9
src/external/stb_image_resize.h

@@ -1,4 +1,4 @@
-/* stb_image_resize - v0.96 - public domain image resizing
+/* stb_image_resize - v0.97 - public domain image resizing
    by Jorge L Rodriguez (@VinoBS) - 2014
    by Jorge L Rodriguez (@VinoBS) - 2014
    http://github.com/nothings/stb
    http://github.com/nothings/stb
 
 
@@ -1064,7 +1064,11 @@ static void stbir__calculate_coefficients_upsample(stbir_filter filter, float sc
         total_filter += coefficient_group[i];
         total_filter += coefficient_group[i];
     }
     }
 
 
-    STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
+    // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
+    // It would be true in exact math but is at best approximately true in floating-point math,
+    // and it would not make sense to try and put actual bounds on this here because it depends
+    // on the image aspect ratio which can get pretty extreme.
+    //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(in_last_pixel + 1) + 0.5f - in_center_of_out, 1/scale) == 0);
 
 
     STBIR_ASSERT(total_filter > 0.9);
     STBIR_ASSERT(total_filter > 0.9);
     STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
     STBIR_ASSERT(total_filter < 1.1f); // Make sure it's not way off.
@@ -1089,7 +1093,7 @@ static void stbir__calculate_coefficients_downsample(stbir_filter filter, float
 {
 {
     int i;
     int i;
 
 
-     STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
+    STBIR_ASSERT(out_last_pixel - out_first_pixel <= (int)ceil(stbir__filter_info_table[filter].support(scale_ratio) * 2)); // Taken directly from stbir__get_coefficient_width() which we can't call because we don't know if we're horizontal or vertical.
 
 
     contributor->n0 = out_first_pixel;
     contributor->n0 = out_first_pixel;
     contributor->n1 = out_last_pixel;
     contributor->n1 = out_last_pixel;
@@ -1103,7 +1107,11 @@ static void stbir__calculate_coefficients_downsample(stbir_filter filter, float
         coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
         coefficient_group[i] = stbir__filter_info_table[filter].kernel(x, scale_ratio) * scale_ratio;
     }
     }
 
 
-    STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
+    // NOTE(fg): Not actually true in general, nor is there any reason to expect it should be.
+    // It would be true in exact math but is at best approximately true in floating-point math,
+    // and it would not make sense to try and put actual bounds on this here because it depends
+    // on the image aspect ratio which can get pretty extreme.
+    //STBIR_ASSERT(stbir__filter_info_table[filter].kernel((float)(out_last_pixel + 1) + 0.5f - out_center_of_in, scale_ratio) == 0);
 
 
     for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
     for (i = out_last_pixel - out_first_pixel; i >= 0; i--)
     {
     {
@@ -1552,7 +1560,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float
                 {
                 {
                     int out_pixel_index = k * 1;
                     int out_pixel_index = k * 1;
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
-                    STBIR_ASSERT(coefficient != 0);
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                 }
                 }
             }
             }
@@ -1573,7 +1580,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float
                 {
                 {
                     int out_pixel_index = k * 2;
                     int out_pixel_index = k * 2;
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
-                    STBIR_ASSERT(coefficient != 0);
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                 }
                 }
@@ -1595,7 +1601,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float
                 {
                 {
                     int out_pixel_index = k * 3;
                     int out_pixel_index = k * 3;
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
-                    STBIR_ASSERT(coefficient != 0);
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
@@ -1618,7 +1623,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float
                 {
                 {
                     int out_pixel_index = k * 4;
                     int out_pixel_index = k * 4;
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
-                    STBIR_ASSERT(coefficient != 0);
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                     output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
                     output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
@@ -1643,7 +1647,6 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, float
                     int c;
                     int c;
                     int out_pixel_index = k * channels;
                     int out_pixel_index = k * channels;
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
                     float coefficient = horizontal_coefficients[coefficient_group + k - n0];
-                    STBIR_ASSERT(coefficient != 0);
                     for (c = 0; c < channels; c++)
                     for (c = 0; c < channels; c++)
                         output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
                         output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
                 }
                 }

+ 57 - 23
src/external/stb_image_write.h

@@ -1,4 +1,4 @@
-/* stb_image_write - v1.15 - public domain - http://nothings.org/stb
+/* stb_image_write - v1.16 - public domain - http://nothings.org/stb
    writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
    writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
                                      no warranty implied; use at your own risk
                                      no warranty implied; use at your own risk
 
 
@@ -140,6 +140,7 @@ CREDITS:
       Ivan Tikhonov
       Ivan Tikhonov
       github:ignotion
       github:ignotion
       Adam Schackart
       Adam Schackart
+      Andrew Kensler
 
 
 LICENSE
 LICENSE
 
 
@@ -166,9 +167,9 @@ LICENSE
 #endif
 #endif
 
 
 #ifndef STB_IMAGE_WRITE_STATIC  // C++ forbids static forward declarations
 #ifndef STB_IMAGE_WRITE_STATIC  // C++ forbids static forward declarations
-extern int stbi_write_tga_with_rle;
-extern int stbi_write_png_compression_level;
-extern int stbi_write_force_png_filter;
+STBIWDEF int stbi_write_tga_with_rle;
+STBIWDEF int stbi_write_png_compression_level;
+STBIWDEF int stbi_write_force_png_filter;
 #endif
 #endif
 
 
 #ifndef STBI_WRITE_NO_STDIO
 #ifndef STBI_WRITE_NO_STDIO
@@ -178,7 +179,7 @@ STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const
 STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
 STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
 STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality);
 STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality);
 
 
-#ifdef STBI_WINDOWS_UTF8
+#ifdef STBIW_WINDOWS_UTF8
 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
 #endif
 #endif
 #endif
 #endif
@@ -285,7 +286,7 @@ static void stbi__stdio_write(void *context, void *data, int size)
    fwrite(data,1,size,(FILE*) context);
    fwrite(data,1,size,(FILE*) context);
 }
 }
 
 
-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
 #ifdef __cplusplus
 #ifdef __cplusplus
 #define STBIW_EXTERN extern "C"
 #define STBIW_EXTERN extern "C"
 #else
 #else
@@ -296,25 +297,25 @@ STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned in
 
 
 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
 {
 {
-	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
+   return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
 }
 }
 #endif
 #endif
 
 
 static FILE *stbiw__fopen(char const *filename, char const *mode)
 static FILE *stbiw__fopen(char const *filename, char const *mode)
 {
 {
    FILE *f;
    FILE *f;
-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8)
    wchar_t wMode[64];
    wchar_t wMode[64];
    wchar_t wFilename[1024];
    wchar_t wFilename[1024];
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
+   if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
       return 0;
       return 0;
 
 
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
+   if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
       return 0;
       return 0;
 
 
-#if _MSC_VER >= 1400
-	if (0 != _wfopen_s(&f, wFilename, wMode))
-		f = 0;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+   if (0 != _wfopen_s(&f, wFilename, wMode))
+      f = 0;
 #else
 #else
    f = _wfopen(wFilename, wMode);
    f = _wfopen(wFilename, wMode);
 #endif
 #endif
@@ -397,7 +398,7 @@ static void stbiw__putc(stbi__write_context *s, unsigned char c)
 
 
 static void stbiw__write1(stbi__write_context *s, unsigned char a)
 static void stbiw__write1(stbi__write_context *s, unsigned char a)
 {
 {
-   if (s->buf_used + 1 > sizeof(s->buffer))
+   if ((size_t)s->buf_used + 1 > sizeof(s->buffer))
       stbiw__write_flush(s);
       stbiw__write_flush(s);
    s->buffer[s->buf_used++] = a;
    s->buffer[s->buf_used++] = a;
 }
 }
@@ -405,7 +406,7 @@ static void stbiw__write1(stbi__write_context *s, unsigned char a)
 static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
 static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
 {
 {
    int n;
    int n;
-   if (s->buf_used + 3 > sizeof(s->buffer))
+   if ((size_t)s->buf_used + 3 > sizeof(s->buffer))
       stbiw__write_flush(s);
       stbiw__write_flush(s);
    n = s->buf_used;
    n = s->buf_used;
    s->buf_used = n+3;
    s->buf_used = n+3;
@@ -490,11 +491,22 @@ static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x,
 
 
 static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
 static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
 {
 {
-   int pad = (-x*3) & 3;
-   return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
-           "11 4 22 4" "4 44 22 444444",
-           'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
-            40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
+   if (comp != 4) {
+      // write RGB bitmap
+      int pad = (-x*3) & 3;
+      return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
+              "11 4 22 4" "4 44 22 444444",
+              'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
+               40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
+   } else {
+      // RGBA bitmaps need a v4 header
+      // use BI_BITFIELDS mode with 32bpp and alpha mask
+      // (straight BI_RGB with alpha mask doesn't work in most readers)
+      return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0,
+         "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444",
+         'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header
+         108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header
+   }
 }
 }
 
 
 STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
 STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
@@ -622,6 +634,8 @@ STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const
 
 
 #define stbiw__max(a, b)  ((a) > (b) ? (a) : (b))
 #define stbiw__max(a, b)  ((a) > (b) ? (a) : (b))
 
 
+#ifndef STBI_WRITE_NO_STDIO
+
 static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
 static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
 {
 {
    int exponent;
    int exponent;
@@ -756,7 +770,7 @@ static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, f
       char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
       char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
       s->func(s->context, header, sizeof(header)-1);
       s->func(s->context, header, sizeof(header)-1);
 
 
-#ifdef __STDC_WANT_SECURE_LIB__
+#ifdef __STDC_LIB_EXT1__
       len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
       len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
 #else
 #else
       len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
       len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
@@ -777,7 +791,6 @@ STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x,
    return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
    return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
 }
 }
 
 
-#ifndef STBI_WRITE_NO_STDIO
 STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
 STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
 {
 {
    stbi__write_context s = { 0 };
    stbi__write_context s = { 0 };
@@ -968,6 +981,23 @@ STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, i
       (void) stbiw__sbfree(hash_table[i]);
       (void) stbiw__sbfree(hash_table[i]);
    STBIW_FREE(hash_table);
    STBIW_FREE(hash_table);
 
 
+   // store uncompressed instead if compression was worse
+   if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) {
+      stbiw__sbn(out) = 2;  // truncate to DEFLATE 32K window and FLEVEL = 1
+      for (j = 0; j < data_len;) {
+         int blocklen = data_len - j;
+         if (blocklen > 32767) blocklen = 32767;
+         stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression
+         stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN
+         stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8));
+         stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN
+         stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8));
+         memcpy(out+stbiw__sbn(out), data+j, blocklen);
+         stbiw__sbn(out) += blocklen;
+         j += blocklen;
+      }
+   }
+
    {
    {
       // compute adler32 on input
       // compute adler32 on input
       unsigned int s1=1, s2=0;
       unsigned int s1=1, s2=0;
@@ -1598,6 +1628,10 @@ STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const
 #endif // STB_IMAGE_WRITE_IMPLEMENTATION
 #endif // STB_IMAGE_WRITE_IMPLEMENTATION
 
 
 /* Revision history
 /* Revision history
+      1.16  (2021-07-11)
+             make Deflate code emit uncompressed blocks when it would otherwise expand
+             support writing BMPs with alpha channel
+      1.15  (2020-07-13) unknown
       1.14  (2020-02-02) updated JPEG writer to downsample chroma channels
       1.14  (2020-02-02) updated JPEG writer to downsample chroma channels
       1.13
       1.13
       1.12
       1.12
@@ -1635,7 +1669,7 @@ STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const
              add HDR output
              add HDR output
              fix monochrome BMP
              fix monochrome BMP
       0.95 (2014-08-17)
       0.95 (2014-08-17)
-		       add monochrome TGA output
+             add monochrome TGA output
       0.94 (2014-05-31)
       0.94 (2014-05-31)
              rename private functions to avoid conflicts with stb_image.h
              rename private functions to avoid conflicts with stb_image.h
       0.93 (2014-05-27)
       0.93 (2014-05-27)

+ 15 - 20
src/external/stb_rect_pack.h

@@ -1,9 +1,15 @@
-// stb_rect_pack.h - v1.00 - public domain - rectangle packing
+// stb_rect_pack.h - v1.01 - public domain - rectangle packing
 // Sean Barrett 2014
 // Sean Barrett 2014
 //
 //
 // Useful for e.g. packing rectangular textures into an atlas.
 // Useful for e.g. packing rectangular textures into an atlas.
 // Does not do rotation.
 // Does not do rotation.
 //
 //
+// Before #including,
+//
+//    #define STB_RECT_PACK_IMPLEMENTATION
+//
+// in the file that you want to have the implementation.
+//
 // Not necessarily the awesomest packing method, but better than
 // Not necessarily the awesomest packing method, but better than
 // the totally naive one in stb_truetype (which is primarily what
 // the totally naive one in stb_truetype (which is primarily what
 // this is meant to replace).
 // this is meant to replace).
@@ -35,6 +41,7 @@
 //
 //
 // Version history:
 // Version history:
 //
 //
+//     1.01  (2021-07-11)  always use large rect mode, expose STBRP__MAXVAL in public section
 //     1.00  (2019-02-25)  avoid small space waste; gracefully fail too-wide rectangles
 //     1.00  (2019-02-25)  avoid small space waste; gracefully fail too-wide rectangles
 //     0.99  (2019-02-07)  warning fixes
 //     0.99  (2019-02-07)  warning fixes
 //     0.11  (2017-03-03)  return packing success/fail result
 //     0.11  (2017-03-03)  return packing success/fail result
@@ -75,11 +82,10 @@ typedef struct stbrp_context stbrp_context;
 typedef struct stbrp_node    stbrp_node;
 typedef struct stbrp_node    stbrp_node;
 typedef struct stbrp_rect    stbrp_rect;
 typedef struct stbrp_rect    stbrp_rect;
 
 
-#ifdef STBRP_LARGE_RECTS
 typedef int            stbrp_coord;
 typedef int            stbrp_coord;
-#else
-typedef unsigned short stbrp_coord;
-#endif
+
+#define STBRP__MAXVAL  0x7fffffff
+// Mostly for internal use, but this is the maximum supported coordinate value.
 
 
 STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
 STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
 // Assign packed locations to rectangles. The rectangles are of type
 // Assign packed locations to rectangles. The rectangles are of type
@@ -209,8 +215,10 @@ struct stbrp_context
 
 
 #ifdef _MSC_VER
 #ifdef _MSC_VER
 #define STBRP__NOTUSED(v)  (void)(v)
 #define STBRP__NOTUSED(v)  (void)(v)
+#define STBRP__CDECL       __cdecl
 #else
 #else
 #define STBRP__NOTUSED(v)  (void)sizeof(v)
 #define STBRP__NOTUSED(v)  (void)sizeof(v)
+#define STBRP__CDECL
 #endif
 #endif
 
 
 enum
 enum
@@ -253,9 +261,6 @@ STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_ou
 STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
 STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
 {
 {
    int i;
    int i;
-#ifndef STBRP_LARGE_RECTS
-   STBRP_ASSERT(width <= 0xffff && height <= 0xffff);
-#endif
 
 
    for (i=0; i < num_nodes-1; ++i)
    for (i=0; i < num_nodes-1; ++i)
       nodes[i].next = &nodes[i+1];
       nodes[i].next = &nodes[i+1];
@@ -274,11 +279,7 @@ STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height,
    context->extra[0].y = 0;
    context->extra[0].y = 0;
    context->extra[0].next = &context->extra[1];
    context->extra[0].next = &context->extra[1];
    context->extra[1].x = (stbrp_coord) width;
    context->extra[1].x = (stbrp_coord) width;
-#ifdef STBRP_LARGE_RECTS
    context->extra[1].y = (1<<30);
    context->extra[1].y = (1<<30);
-#else
-   context->extra[1].y = 65535;
-#endif
    context->extra[1].next = NULL;
    context->extra[1].next = NULL;
 }
 }
 
 
@@ -520,7 +521,7 @@ static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, i
    return res;
    return res;
 }
 }
 
 
-static int rect_height_compare(const void *a, const void *b)
+static int STBRP__CDECL rect_height_compare(const void *a, const void *b)
 {
 {
    const stbrp_rect *p = (const stbrp_rect *) a;
    const stbrp_rect *p = (const stbrp_rect *) a;
    const stbrp_rect *q = (const stbrp_rect *) b;
    const stbrp_rect *q = (const stbrp_rect *) b;
@@ -531,19 +532,13 @@ static int rect_height_compare(const void *a, const void *b)
    return (p->w > q->w) ? -1 : (p->w < q->w);
    return (p->w > q->w) ? -1 : (p->w < q->w);
 }
 }
 
 
-static int rect_original_order(const void *a, const void *b)
+static int STBRP__CDECL rect_original_order(const void *a, const void *b)
 {
 {
    const stbrp_rect *p = (const stbrp_rect *) a;
    const stbrp_rect *p = (const stbrp_rect *) a;
    const stbrp_rect *q = (const stbrp_rect *) b;
    const stbrp_rect *q = (const stbrp_rect *) b;
    return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
    return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
 }
 }
 
 
-#ifdef STBRP_LARGE_RECTS
-#define STBRP__MAXVAL  0xffffffff
-#else
-#define STBRP__MAXVAL  0xffff
-#endif
-
 STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
 STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
 {
 {
    int i, all_rects_packed = 1;
    int i, all_rects_packed = 1;

+ 347 - 281
src/external/stb_truetype.h

@@ -1,5 +1,5 @@
-// stb_truetype.h - v1.24 - public domain
-// authored from 2009-2020 by Sean Barrett / RAD Game Tools
+// stb_truetype.h - v1.26 - public domain
+// authored from 2009-2021 by Sean Barrett / RAD Game Tools
 //
 //
 // =======================================================================
 // =======================================================================
 //
 //
@@ -53,11 +53,13 @@
 //       Johan Duparc               Thomas Fields
 //       Johan Duparc               Thomas Fields
 //       Hou Qiming                 Derek Vinyard
 //       Hou Qiming                 Derek Vinyard
 //       Rob Loach                  Cort Stratton
 //       Rob Loach                  Cort Stratton
-//       Kenney Phillis Jr.         Brian Costabile            
-//       Ken Voskuil (kaesve)       
+//       Kenney Phillis Jr.         Brian Costabile
+//       Ken Voskuil (kaesve)
 //
 //
 // VERSION HISTORY
 // VERSION HISTORY
 //
 //
+//   1.26 (2021-08-28) fix broken rasterizer
+//   1.25 (2021-07-11) many fixes
 //   1.24 (2020-02-05) fix warning
 //   1.24 (2020-02-05) fix warning
 //   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
 //   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
 //   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
 //   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
@@ -270,8 +272,8 @@
 ////  SAMPLE PROGRAMS
 ////  SAMPLE PROGRAMS
 ////
 ////
 //
 //
-//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless
-//
+//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless.
+//  See "tests/truetype_demo_win32.c" for a complete version.
 #if 0
 #if 0
 #define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
 #define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
 #include "stb_truetype.h"
 #include "stb_truetype.h"
@@ -297,6 +299,8 @@ void my_stbtt_initfont(void)
 void my_stbtt_print(float x, float y, char *text)
 void my_stbtt_print(float x, float y, char *text)
 {
 {
    // assume orthographic projection with units = screen pixels, origin at top left
    // assume orthographic projection with units = screen pixels, origin at top left
+   glEnable(GL_BLEND);
+   glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
    glEnable(GL_TEXTURE_2D);
    glEnable(GL_TEXTURE_2D);
    glBindTexture(GL_TEXTURE_2D, ftex);
    glBindTexture(GL_TEXTURE_2D, ftex);
    glBegin(GL_QUADS);
    glBegin(GL_QUADS);
@@ -304,10 +308,10 @@ void my_stbtt_print(float x, float y, char *text)
       if (*text >= 32 && *text < 128) {
       if (*text >= 32 && *text < 128) {
          stbtt_aligned_quad q;
          stbtt_aligned_quad q;
          stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
          stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
-         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0);
-         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0);
-         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1);
-         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1);
+         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y0);
+         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y0);
+         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y1);
+         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y1);
       }
       }
       ++text;
       ++text;
    }
    }
@@ -853,6 +857,7 @@ STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, s
 STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices);
 STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices);
 // frees the data allocated above
 // frees the data allocated above
 
 
+STBTT_DEF unsigned char *stbtt_FindSVGDoc(const stbtt_fontinfo *info, int gl);
 STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg);
 STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg);
 STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg);
 STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg);
 // fills svg with the character's SVG data.
 // fills svg with the character's SVG data.
@@ -1539,12 +1544,12 @@ STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codep
       search += 2;
       search += 2;
 
 
       {
       {
-         stbtt_uint16 offset, start;
+         stbtt_uint16 offset, start, last;
          stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1);
          stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1);
 
 
-         STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item));
          start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item);
          start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item);
-         if (unicode_codepoint < start)
+         last = ttUSHORT(data + endCount + 2*item);
+         if (unicode_codepoint < start || unicode_codepoint > last)
             return 0;
             return 0;
 
 
          offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item);
          offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item);
@@ -1871,7 +1876,7 @@ static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, s
                if (comp_verts) STBTT_free(comp_verts, info->userdata);
                if (comp_verts) STBTT_free(comp_verts, info->userdata);
                return 0;
                return 0;
             }
             }
-            if (num_vertices > 0) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex));
+            if (num_vertices > 0 && vertices) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex));
             STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex));
             STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex));
             if (vertices) STBTT_free(vertices, info->userdata);
             if (vertices) STBTT_free(vertices, info->userdata);
             vertices = tmp;
             vertices = tmp;
@@ -2134,7 +2139,7 @@ static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, st
                subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
                subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
             has_subrs = 1;
             has_subrs = 1;
          }
          }
-         // fallthrough
+         // FALLTHROUGH
       case 0x1D: // callgsubr
       case 0x1D: // callgsubr
          if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
          if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
          v = (int) s[--sp];
          v = (int) s[--sp];
@@ -2239,7 +2244,7 @@ static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, st
       } break;
       } break;
 
 
       default:
       default:
-         if (b0 != 255 && b0 != 28 && (b0 < 32 || b0 > 254))
+         if (b0 != 255 && b0 != 28 && b0 < 32)
             return STBTT__CSERR("reserved operator");
             return STBTT__CSERR("reserved operator");
 
 
          // push immediate
          // push immediate
@@ -2351,7 +2356,7 @@ STBTT_DEF int stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningent
    return length;
    return length;
 }
 }
 
 
-static int  stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+static int stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
 {
 {
    stbtt_uint8 *data = info->data + info->kern;
    stbtt_uint8 *data = info->data + info->kern;
    stbtt_uint32 needle, straw;
    stbtt_uint32 needle, straw;
@@ -2381,243 +2386,225 @@ static int  stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph
    return 0;
    return 0;
 }
 }
 
 
-static stbtt_int32  stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph)
-{
-    stbtt_uint16 coverageFormat = ttUSHORT(coverageTable);
-    switch(coverageFormat) {
-        case 1: {
-            stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2);
-
-            // Binary search.
-            stbtt_int32 l=0, r=glyphCount-1, m;
-            int straw, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *glyphArray = coverageTable + 4;
-                stbtt_uint16 glyphID;
-                m = (l + r) >> 1;
-                glyphID = ttUSHORT(glyphArray + 2 * m);
-                straw = glyphID;
-                if (needle < straw)
-                    r = m - 1;
-                else if (needle > straw)
-                    l = m + 1;
-                else {
-                     return m;
-                }
+static stbtt_int32 stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph)
+{
+   stbtt_uint16 coverageFormat = ttUSHORT(coverageTable);
+   switch (coverageFormat) {
+      case 1: {
+         stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2);
+
+         // Binary search.
+         stbtt_int32 l=0, r=glyphCount-1, m;
+         int straw, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *glyphArray = coverageTable + 4;
+            stbtt_uint16 glyphID;
+            m = (l + r) >> 1;
+            glyphID = ttUSHORT(glyphArray + 2 * m);
+            straw = glyphID;
+            if (needle < straw)
+               r = m - 1;
+            else if (needle > straw)
+               l = m + 1;
+            else {
+               return m;
             }
             }
-        } break;
-
-        case 2: {
-            stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2);
-            stbtt_uint8 *rangeArray = coverageTable + 4;
-
-            // Binary search.
-            stbtt_int32 l=0, r=rangeCount-1, m;
-            int strawStart, strawEnd, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *rangeRecord;
-                m = (l + r) >> 1;
-                rangeRecord = rangeArray + 6 * m;
-                strawStart = ttUSHORT(rangeRecord);
-                strawEnd = ttUSHORT(rangeRecord + 2);
-                if (needle < strawStart)
-                    r = m - 1;
-                else if (needle > strawEnd)
-                    l = m + 1;
-                else {
-                    stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4);
-                    return startCoverageIndex + glyph - strawStart;
-                }
+         }
+         break;
+      }
+
+      case 2: {
+         stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2);
+         stbtt_uint8 *rangeArray = coverageTable + 4;
+
+         // Binary search.
+         stbtt_int32 l=0, r=rangeCount-1, m;
+         int strawStart, strawEnd, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *rangeRecord;
+            m = (l + r) >> 1;
+            rangeRecord = rangeArray + 6 * m;
+            strawStart = ttUSHORT(rangeRecord);
+            strawEnd = ttUSHORT(rangeRecord + 2);
+            if (needle < strawStart)
+               r = m - 1;
+            else if (needle > strawEnd)
+               l = m + 1;
+            else {
+               stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4);
+               return startCoverageIndex + glyph - strawStart;
             }
             }
-        } break;
+         }
+         break;
+      }
 
 
-        default: {
-            // There are no other cases.
-            STBTT_assert(0);
-        } break;
-    }
+      default: return -1; // unsupported
+   }
 
 
-    return -1;
+   return -1;
 }
 }
 
 
 static stbtt_int32  stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph)
 static stbtt_int32  stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph)
 {
 {
-    stbtt_uint16 classDefFormat = ttUSHORT(classDefTable);
-    switch(classDefFormat)
-    {
-        case 1: {
-            stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2);
-            stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4);
-            stbtt_uint8 *classDef1ValueArray = classDefTable + 6;
-
-            if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount)
-                return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID));
-
-            classDefTable = classDef1ValueArray + 2 * glyphCount;
-        } break;
-
-        case 2: {
-            stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2);
-            stbtt_uint8 *classRangeRecords = classDefTable + 4;
-
-            // Binary search.
-            stbtt_int32 l=0, r=classRangeCount-1, m;
-            int strawStart, strawEnd, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *classRangeRecord;
-                m = (l + r) >> 1;
-                classRangeRecord = classRangeRecords + 6 * m;
-                strawStart = ttUSHORT(classRangeRecord);
-                strawEnd = ttUSHORT(classRangeRecord + 2);
-                if (needle < strawStart)
-                    r = m - 1;
-                else if (needle > strawEnd)
-                    l = m + 1;
-                else
-                    return (stbtt_int32)ttUSHORT(classRangeRecord + 4);
-            }
+   stbtt_uint16 classDefFormat = ttUSHORT(classDefTable);
+   switch (classDefFormat)
+   {
+      case 1: {
+         stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2);
+         stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4);
+         stbtt_uint8 *classDef1ValueArray = classDefTable + 6;
 
 
-            classDefTable = classRangeRecords + 6 * classRangeCount;
-        } break;
+         if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount)
+            return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID));
+         break;
+      }
 
 
-        default: {
-            // There are no other cases.
-            STBTT_assert(0);
-        } break;
-    }
+      case 2: {
+         stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2);
+         stbtt_uint8 *classRangeRecords = classDefTable + 4;
+
+         // Binary search.
+         stbtt_int32 l=0, r=classRangeCount-1, m;
+         int strawStart, strawEnd, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *classRangeRecord;
+            m = (l + r) >> 1;
+            classRangeRecord = classRangeRecords + 6 * m;
+            strawStart = ttUSHORT(classRangeRecord);
+            strawEnd = ttUSHORT(classRangeRecord + 2);
+            if (needle < strawStart)
+               r = m - 1;
+            else if (needle > strawEnd)
+               l = m + 1;
+            else
+               return (stbtt_int32)ttUSHORT(classRangeRecord + 4);
+         }
+         break;
+      }
 
 
-    return -1;
+      default:
+         return -1; // Unsupported definition type, return an error.
+   }
+
+   // "All glyphs not assigned to a class fall into class 0". (OpenType spec)
+   return 0;
 }
 }
 
 
 // Define to STBTT_assert(x) if you want to break on unimplemented formats.
 // Define to STBTT_assert(x) if you want to break on unimplemented formats.
 #define STBTT_GPOS_TODO_assert(x)
 #define STBTT_GPOS_TODO_assert(x)
 
 
-static stbtt_int32  stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
-{
-    stbtt_uint16 lookupListOffset;
-    stbtt_uint8 *lookupList;
-    stbtt_uint16 lookupCount;
-    stbtt_uint8 *data;
-    stbtt_int32 i;
-
-    if (!info->gpos) return 0;
-
-    data = info->data + info->gpos;
-
-    if (ttUSHORT(data+0) != 1) return 0; // Major version 1
-    if (ttUSHORT(data+2) != 0) return 0; // Minor version 0
-
-    lookupListOffset = ttUSHORT(data+8);
-    lookupList = data + lookupListOffset;
-    lookupCount = ttUSHORT(lookupList);
-
-    for (i=0; i<lookupCount; ++i) {
-        stbtt_uint16 lookupOffset = ttUSHORT(lookupList + 2 + 2 * i);
-        stbtt_uint8 *lookupTable = lookupList + lookupOffset;
-
-        stbtt_uint16 lookupType = ttUSHORT(lookupTable);
-        stbtt_uint16 subTableCount = ttUSHORT(lookupTable + 4);
-        stbtt_uint8 *subTableOffsets = lookupTable + 6;
-        switch(lookupType) {
-            case 2: { // Pair Adjustment Positioning Subtable
-                stbtt_int32 sti;
-                for (sti=0; sti<subTableCount; sti++) {
-                    stbtt_uint16 subtableOffset = ttUSHORT(subTableOffsets + 2 * sti);
-                    stbtt_uint8 *table = lookupTable + subtableOffset;
-                    stbtt_uint16 posFormat = ttUSHORT(table);
-                    stbtt_uint16 coverageOffset = ttUSHORT(table + 2);
-                    stbtt_int32 coverageIndex = stbtt__GetCoverageIndex(table + coverageOffset, glyph1);
-                    if (coverageIndex == -1) continue;
-
-                    switch (posFormat) {
-                        case 1: {
-                            stbtt_int32 l, r, m;
-                            int straw, needle;
-                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
-                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
-                            stbtt_int32 valueRecordPairSizeInBytes = 2;
-                            stbtt_uint16 pairSetCount = ttUSHORT(table + 8);
-                            stbtt_uint16 pairPosOffset = ttUSHORT(table + 10 + 2 * coverageIndex);
-                            stbtt_uint8 *pairValueTable = table + pairPosOffset;
-                            stbtt_uint16 pairValueCount = ttUSHORT(pairValueTable);
-                            stbtt_uint8 *pairValueArray = pairValueTable + 2;
-                            // TODO: Support more formats.
-                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
-                            if (valueFormat1 != 4) return 0;
-                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
-                            if (valueFormat2 != 0) return 0;
-
-                            STBTT_assert(coverageIndex < pairSetCount);
-                            STBTT__NOTUSED(pairSetCount);
-
-                            needle=glyph2;
-                            r=pairValueCount-1;
-                            l=0;
-
-                            // Binary search.
-                            while (l <= r) {
-                                stbtt_uint16 secondGlyph;
-                                stbtt_uint8 *pairValue;
-                                m = (l + r) >> 1;
-                                pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m;
-                                secondGlyph = ttUSHORT(pairValue);
-                                straw = secondGlyph;
-                                if (needle < straw)
-                                    r = m - 1;
-                                else if (needle > straw)
-                                    l = m + 1;
-                                else {
-                                    stbtt_int16 xAdvance = ttSHORT(pairValue + 2);
-                                    return xAdvance;
-                                }
-                            }
-                        } break;
-
-                        case 2: {
-                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
-                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
-
-                            stbtt_uint16 classDef1Offset = ttUSHORT(table + 8);
-                            stbtt_uint16 classDef2Offset = ttUSHORT(table + 10);
-                            int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1);
-                            int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2);
-
-                            stbtt_uint16 class1Count = ttUSHORT(table + 12);
-                            stbtt_uint16 class2Count = ttUSHORT(table + 14);
-                            STBTT_assert(glyph1class < class1Count);
-                            STBTT_assert(glyph2class < class2Count);
-
-                            // TODO: Support more formats.
-                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
-                            if (valueFormat1 != 4) return 0;
-                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
-                            if (valueFormat2 != 0) return 0;
-
-                            if (glyph1class >= 0 && glyph1class < class1Count && glyph2class >= 0 && glyph2class < class2Count) {
-                                stbtt_uint8 *class1Records = table + 16;
-                                stbtt_uint8 *class2Records = class1Records + 2 * (glyph1class * class2Count);
-                                stbtt_int16 xAdvance = ttSHORT(class2Records + 2 * glyph2class);
-                                return xAdvance;
-                            }
-                        } break;
-
-                        default: {
-                            // There are no other cases.
-                            STBTT_assert(0);
-                            break;
-                        };
-                    }
-                }
-                break;
-            };
+static stbtt_int32 stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+{
+   stbtt_uint16 lookupListOffset;
+   stbtt_uint8 *lookupList;
+   stbtt_uint16 lookupCount;
+   stbtt_uint8 *data;
+   stbtt_int32 i, sti;
+
+   if (!info->gpos) return 0;
+
+   data = info->data + info->gpos;
+
+   if (ttUSHORT(data+0) != 1) return 0; // Major version 1
+   if (ttUSHORT(data+2) != 0) return 0; // Minor version 0
+
+   lookupListOffset = ttUSHORT(data+8);
+   lookupList = data + lookupListOffset;
+   lookupCount = ttUSHORT(lookupList);
+
+   for (i=0; i<lookupCount; ++i) {
+      stbtt_uint16 lookupOffset = ttUSHORT(lookupList + 2 + 2 * i);
+      stbtt_uint8 *lookupTable = lookupList + lookupOffset;
+
+      stbtt_uint16 lookupType = ttUSHORT(lookupTable);
+      stbtt_uint16 subTableCount = ttUSHORT(lookupTable + 4);
+      stbtt_uint8 *subTableOffsets = lookupTable + 6;
+      if (lookupType != 2) // Pair Adjustment Positioning Subtable
+         continue;
+
+      for (sti=0; sti<subTableCount; sti++) {
+         stbtt_uint16 subtableOffset = ttUSHORT(subTableOffsets + 2 * sti);
+         stbtt_uint8 *table = lookupTable + subtableOffset;
+         stbtt_uint16 posFormat = ttUSHORT(table);
+         stbtt_uint16 coverageOffset = ttUSHORT(table + 2);
+         stbtt_int32 coverageIndex = stbtt__GetCoverageIndex(table + coverageOffset, glyph1);
+         if (coverageIndex == -1) continue;
+
+         switch (posFormat) {
+            case 1: {
+               stbtt_int32 l, r, m;
+               int straw, needle;
+               stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+               stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+               if (valueFormat1 == 4 && valueFormat2 == 0) { // Support more formats?
+                  stbtt_int32 valueRecordPairSizeInBytes = 2;
+                  stbtt_uint16 pairSetCount = ttUSHORT(table + 8);
+                  stbtt_uint16 pairPosOffset = ttUSHORT(table + 10 + 2 * coverageIndex);
+                  stbtt_uint8 *pairValueTable = table + pairPosOffset;
+                  stbtt_uint16 pairValueCount = ttUSHORT(pairValueTable);
+                  stbtt_uint8 *pairValueArray = pairValueTable + 2;
+
+                  if (coverageIndex >= pairSetCount) return 0;
+
+                  needle=glyph2;
+                  r=pairValueCount-1;
+                  l=0;
+
+                  // Binary search.
+                  while (l <= r) {
+                     stbtt_uint16 secondGlyph;
+                     stbtt_uint8 *pairValue;
+                     m = (l + r) >> 1;
+                     pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m;
+                     secondGlyph = ttUSHORT(pairValue);
+                     straw = secondGlyph;
+                     if (needle < straw)
+                        r = m - 1;
+                     else if (needle > straw)
+                        l = m + 1;
+                     else {
+                        stbtt_int16 xAdvance = ttSHORT(pairValue + 2);
+                        return xAdvance;
+                     }
+                  }
+               } else
+                  return 0;
+               break;
+            }
+
+            case 2: {
+               stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+               stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+               if (valueFormat1 == 4 && valueFormat2 == 0) { // Support more formats?
+                  stbtt_uint16 classDef1Offset = ttUSHORT(table + 8);
+                  stbtt_uint16 classDef2Offset = ttUSHORT(table + 10);
+                  int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1);
+                  int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2);
+
+                  stbtt_uint16 class1Count = ttUSHORT(table + 12);
+                  stbtt_uint16 class2Count = ttUSHORT(table + 14);
+                  stbtt_uint8 *class1Records, *class2Records;
+                  stbtt_int16 xAdvance;
+
+                  if (glyph1class < 0 || glyph1class >= class1Count) return 0; // malformed
+                  if (glyph2class < 0 || glyph2class >= class2Count) return 0; // malformed
+
+                  class1Records = table + 16;
+                  class2Records = class1Records + 2 * (glyph1class * class2Count);
+                  xAdvance = ttSHORT(class2Records + 2 * glyph2class);
+                  return xAdvance;
+               } else
+                  return 0;
+               break;
+            }
 
 
             default:
             default:
-                // TODO: Implement other stuff.
-                break;
-        }
-    }
+               return 0; // Unsupported position format
+         }
+      }
+   }
 
 
-    return 0;
+   return 0;
 }
 }
 
 
 STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2)
 STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2)
@@ -3075,6 +3062,23 @@ static void stbtt__handle_clipped_edge(float *scanline, int x, stbtt__active_edg
    }
    }
 }
 }
 
 
+static float stbtt__sized_trapezoid_area(float height, float top_width, float bottom_width)
+{
+   STBTT_assert(top_width >= 0);
+   STBTT_assert(bottom_width >= 0);
+   return (top_width + bottom_width) / 2.0f * height;
+}
+
+static float stbtt__position_trapezoid_area(float height, float tx0, float tx1, float bx0, float bx1)
+{
+   return stbtt__sized_trapezoid_area(height, tx1 - tx0, bx1 - bx0);
+}
+
+static float stbtt__sized_triangle_area(float height, float width)
+{
+   return height * width / 2;
+}
+
 static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top)
 static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top)
 {
 {
    float y_bottom = y_top+1;
    float y_bottom = y_top+1;
@@ -3129,13 +3133,13 @@ static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill,
                float height;
                float height;
                // simple case, only spans one pixel
                // simple case, only spans one pixel
                int x = (int) x_top;
                int x = (int) x_top;
-               height = sy1 - sy0;
+               height = (sy1 - sy0) * e->direction;
                STBTT_assert(x >= 0 && x < len);
                STBTT_assert(x >= 0 && x < len);
-               scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2)  * height;
-               scanline_fill[x] += e->direction * height; // everything right of this pixel is filled
+               scanline[x]      += stbtt__position_trapezoid_area(height, x_top, x+1.0f, x_bottom, x+1.0f);
+               scanline_fill[x] += height; // everything right of this pixel is filled
             } else {
             } else {
                int x,x1,x2;
                int x,x1,x2;
-               float y_crossing, step, sign, area;
+               float y_crossing, y_final, step, sign, area;
                // covers 2+ pixels
                // covers 2+ pixels
                if (x_top > x_bottom) {
                if (x_top > x_bottom) {
                   // flip scanline vertically; signed area is the same
                   // flip scanline vertically; signed area is the same
@@ -3148,29 +3152,79 @@ static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill,
                   dy = -dy;
                   dy = -dy;
                   t = x0, x0 = xb, xb = t;
                   t = x0, x0 = xb, xb = t;
                }
                }
+               STBTT_assert(dy >= 0);
+               STBTT_assert(dx >= 0);
 
 
                x1 = (int) x_top;
                x1 = (int) x_top;
                x2 = (int) x_bottom;
                x2 = (int) x_bottom;
                // compute intersection with y axis at x1+1
                // compute intersection with y axis at x1+1
-               y_crossing = (x1+1 - x0) * dy + y_top;
+               y_crossing = y_top + dy * (x1+1 - x0);
+
+               // compute intersection with y axis at x2
+               y_final = y_top + dy * (x2 - x0);
+
+               //           x1    x_top                            x2    x_bottom
+               //     y_top  +------|-----+------------+------------+--------|---+------------+
+               //            |            |            |            |            |            |
+               //            |            |            |            |            |            |
+               //       sy0  |      Txxxxx|............|............|............|............|
+               // y_crossing |            *xxxxx.......|............|............|............|
+               //            |            |     xxxxx..|............|............|............|
+               //            |            |     /-   xx*xxxx........|............|............|
+               //            |            | dy <       |    xxxxxx..|............|............|
+               //   y_final  |            |     \-     |          xx*xxx.........|............|
+               //       sy1  |            |            |            |   xxxxxB...|............|
+               //            |            |            |            |            |            |
+               //            |            |            |            |            |            |
+               //  y_bottom  +------------+------------+------------+------------+------------+
+               //
+               // goal is to measure the area covered by '.' in each pixel
+
+               // if x2 is right at the right edge of x1, y_crossing can blow up, github #1057
+               // @TODO: maybe test against sy1 rather than y_bottom?
+               if (y_crossing > y_bottom)
+                  y_crossing = y_bottom;
 
 
                sign = e->direction;
                sign = e->direction;
-               // area of the rectangle covered from y0..y_crossing
+
+               // area of the rectangle covered from sy0..y_crossing
                area = sign * (y_crossing-sy0);
                area = sign * (y_crossing-sy0);
-               // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing)
-               scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2);
 
 
-               step = sign * dy;
+               // area of the triangle (x_top,sy0), (x1+1,sy0), (x1+1,y_crossing)
+               scanline[x1] += stbtt__sized_triangle_area(area, x1+1 - x_top);
+
+               // check if final y_crossing is blown up; no test case for this
+               if (y_final > y_bottom) {
+                  y_final = y_bottom;
+                  dy = (y_final - y_crossing ) / (x2 - (x1+1)); // if denom=0, y_final = y_crossing, so y_final <= y_bottom
+               }
+
+               // in second pixel, area covered by line segment found in first pixel
+               // is always a rectangle 1 wide * the height of that line segment; this
+               // is exactly what the variable 'area' stores. it also gets a contribution
+               // from the line segment within it. the THIRD pixel will get the first
+               // pixel's rectangle contribution, the second pixel's rectangle contribution,
+               // and its own contribution. the 'own contribution' is the same in every pixel except
+               // the leftmost and rightmost, a trapezoid that slides down in each pixel.
+               // the second pixel's contribution to the third pixel will be the
+               // rectangle 1 wide times the height change in the second pixel, which is dy.
+
+               step = sign * dy * 1; // dy is dy/dx, change in y for every 1 change in x,
+               // which multiplied by 1-pixel-width is how much pixel area changes for each step in x
+               // so the area advances by 'step' every time
+
                for (x = x1+1; x < x2; ++x) {
                for (x = x1+1; x < x2; ++x) {
-                  scanline[x] += area + step/2;
+                  scanline[x] += area + step/2; // area of trapezoid is 1*step/2
                   area += step;
                   area += step;
                }
                }
-               y_crossing += dy * (x2 - (x1+1));
-
-               STBTT_assert(STBTT_fabs(area) <= 1.01f);
+               STBTT_assert(STBTT_fabs(area) <= 1.01f); // accumulated error from area += step unless we round step down
+               STBTT_assert(sy1 > y_final-0.01f);
 
 
-               scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (sy1-y_crossing);
+               // area covered in the last pixel is the rectangle from all the pixels to the left,
+               // plus the trapezoid filled by the line segment in this pixel all the way to the right edge
+               scanline[x2] += area + sign * stbtt__position_trapezoid_area(sy1-y_final, (float) x2, x2+1.0f, x_bottom, x2+1.0f);
 
 
+               // the rest of the line is filled based on the total height of the line segment in this pixel
                scanline_fill[x2] += sign * (sy1-sy0);
                scanline_fill[x2] += sign * (sy1-sy0);
             }
             }
          } else {
          } else {
@@ -3178,6 +3232,9 @@ static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill,
             // clipping logic. since this does not match the intended use
             // clipping logic. since this does not match the intended use
             // of this library, we use a different, very slow brute
             // of this library, we use a different, very slow brute
             // force implementation
             // force implementation
+            // note though that this does happen some of the time because
+            // x_top and x_bottom can be extrapolated at the top & bottom of
+            // the shape and actually lie outside the bounding box
             int x;
             int x;
             for (x=0; x < len; ++x) {
             for (x=0; x < len; ++x) {
                // cases:
                // cases:
@@ -4414,15 +4471,14 @@ static int stbtt__compute_crossings_x(float x, float y, int nverts, stbtt_vertex
    float y_frac;
    float y_frac;
    int winding = 0;
    int winding = 0;
 
 
-   orig[0] = x;
-   orig[1] = y;
-
    // make sure y never passes through a vertex of the shape
    // make sure y never passes through a vertex of the shape
    y_frac = (float) STBTT_fmod(y, 1.0f);
    y_frac = (float) STBTT_fmod(y, 1.0f);
    if (y_frac < 0.01f)
    if (y_frac < 0.01f)
       y += 0.01f;
       y += 0.01f;
    else if (y_frac > 0.99f)
    else if (y_frac > 0.99f)
       y -= 0.01f;
       y -= 0.01f;
+
+   orig[0] = x;
    orig[1] = y;
    orig[1] = y;
 
 
    // test a ray from (-infinity,y) to (x,y)
    // test a ray from (-infinity,y) to (x,y)
@@ -4484,35 +4540,35 @@ static float stbtt__cuberoot( float x )
       return  (float) STBTT_pow( x,1.0f/3.0f);
       return  (float) STBTT_pow( x,1.0f/3.0f);
 }
 }
 
 
-// x^3 + c*x^2 + b*x + a = 0
+// x^3 + a*x^2 + b*x + c = 0
 static int stbtt__solve_cubic(float a, float b, float c, float* r)
 static int stbtt__solve_cubic(float a, float b, float c, float* r)
 {
 {
-	float s = -a / 3;
-	float p = b - a*a / 3;
-	float q = a * (2*a*a - 9*b) / 27 + c;
+   float s = -a / 3;
+   float p = b - a*a / 3;
+   float q = a * (2*a*a - 9*b) / 27 + c;
    float p3 = p*p*p;
    float p3 = p*p*p;
-	float d = q*q + 4*p3 / 27;
-	if (d >= 0) {
-		float z = (float) STBTT_sqrt(d);
-		float u = (-q + z) / 2;
-		float v = (-q - z) / 2;
-		u = stbtt__cuberoot(u);
-		v = stbtt__cuberoot(v);
-		r[0] = s + u + v;
-		return 1;
-	} else {
-	   float u = (float) STBTT_sqrt(-p/3);
-	   float v = (float) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative
-	   float m = (float) STBTT_cos(v);
+   float d = q*q + 4*p3 / 27;
+   if (d >= 0) {
+      float z = (float) STBTT_sqrt(d);
+      float u = (-q + z) / 2;
+      float v = (-q - z) / 2;
+      u = stbtt__cuberoot(u);
+      v = stbtt__cuberoot(v);
+      r[0] = s + u + v;
+      return 1;
+   } else {
+      float u = (float) STBTT_sqrt(-p/3);
+      float v = (float) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative
+      float m = (float) STBTT_cos(v);
       float n = (float) STBTT_cos(v-3.141592/2)*1.732050808f;
       float n = (float) STBTT_cos(v-3.141592/2)*1.732050808f;
-	   r[0] = s + u * 2 * m;
-	   r[1] = s - u * (m + n);
-	   r[2] = s - u * (m - n);
+      r[0] = s + u * 2 * m;
+      r[1] = s - u * (m + n);
+      r[2] = s - u * (m - n);
 
 
       //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f);  // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe?
       //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f);  // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe?
       //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f);
       //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f);
       //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f);
       //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f);
-   	return 3;
+      return 3;
    }
    }
 }
 }
 
 
@@ -4589,18 +4645,17 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc
             for (i=0; i < num_verts; ++i) {
             for (i=0; i < num_verts; ++i) {
                float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
                float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
 
 
-               // check against every point here rather than inside line/curve primitives -- @TODO: wrong if multiple 'moves' in a row produce a garbage point, and given culling, probably more efficient to do within line/curve
-               float dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
-               if (dist2 < min_dist*min_dist)
-                  min_dist = (float) STBTT_sqrt(dist2);
-
-               if (verts[i].type == STBTT_vline) {
+               if (verts[i].type == STBTT_vline && precompute[i] != 0.0f) {
                   float x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y;
                   float x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y;
 
 
+                  float dist,dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
+                  if (dist2 < min_dist*min_dist)
+                     min_dist = (float) STBTT_sqrt(dist2);
+
                   // coarse culling against bbox
                   // coarse culling against bbox
                   //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist &&
                   //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist &&
                   //    sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist)
                   //    sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist)
-                  float dist = (float) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i];
+                  dist = (float) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i];
                   STBTT_assert(i != 0);
                   STBTT_assert(i != 0);
                   if (dist < min_dist) {
                   if (dist < min_dist) {
                      // check position along line
                      // check position along line
@@ -4627,7 +4682,8 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc
                      float ax = x1-x0, ay = y1-y0;
                      float ax = x1-x0, ay = y1-y0;
                      float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
                      float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
                      float mx = x0 - sx, my = y0 - sy;
                      float mx = x0 - sx, my = y0 - sy;
-                     float res[3],px,py,t,it;
+                     float res[3] = {0.f,0.f,0.f};
+                     float px,py,t,it,dist2;
                      float a_inv = precompute[i];
                      float a_inv = precompute[i];
                      if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula
                      if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula
                         float a = 3*(ax*bx + ay*by);
                         float a = 3*(ax*bx + ay*by);
@@ -4654,6 +4710,10 @@ STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float sc
                         float d = (mx*ax+my*ay) * a_inv;
                         float d = (mx*ax+my*ay) * a_inv;
                         num = stbtt__solve_cubic(b, c, d, res);
                         num = stbtt__solve_cubic(b, c, d, res);
                      }
                      }
+                     dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
+                     if (dist2 < min_dist*min_dist)
+                        min_dist = (float) STBTT_sqrt(dist2);
+
                      if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) {
                      if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) {
                         t = res[0], it = 1.0f - t;
                         t = res[0], it = 1.0f - t;
                         px = it*it*x0 + 2*t*it*x1 + t*t*x2;
                         px = it*it*x0 + 2*t*it*x1 + t*t*x2;
@@ -4913,6 +4973,12 @@ STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const
 
 
 // FULL VERSION HISTORY
 // FULL VERSION HISTORY
 //
 //
+//   1.25 (2021-07-11) many fixes
+//   1.24 (2020-02-05) fix warning
+//   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
+//   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
+//   1.21 (2019-02-25) fix warning
+//   1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics()
 //   1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod
 //   1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod
 //   1.18 (2018-01-29) add missing function
 //   1.18 (2018-01-29) add missing function
 //   1.17 (2017-07-23) make more arguments const; doc fix
 //   1.17 (2017-07-23) make more arguments const; doc fix

+ 2 - 1
src/rcore.c

@@ -148,6 +148,7 @@
 
 
 #if defined(SUPPORT_COMPRESSION_API)
 #if defined(SUPPORT_COMPRESSION_API)
     #define SINFL_IMPLEMENTATION
     #define SINFL_IMPLEMENTATION
+    #define SINFL_NO_SIMD
     #include "external/sinfl.h"     // Deflate (RFC 1951) decompressor
     #include "external/sinfl.h"     // Deflate (RFC 1951) decompressor
 
 
     #define SDEFL_IMPLEMENTATION
     #define SDEFL_IMPLEMENTATION
@@ -3004,7 +3005,7 @@ unsigned char *DecompressData(unsigned char *compData, int compDataLength, int *
 #if defined(SUPPORT_COMPRESSION_API)
 #if defined(SUPPORT_COMPRESSION_API)
     // Decompress data from a valid DEFLATE stream
     // Decompress data from a valid DEFLATE stream
     data = RL_CALLOC(MAX_DECOMPRESSION_SIZE*1024*1024, 1);
     data = RL_CALLOC(MAX_DECOMPRESSION_SIZE*1024*1024, 1);
-    int length = sinflate(data, compData, compDataLength);
+    int length = sinflate(data, MAX_DECOMPRESSION_SIZE, compData, compDataLength);
     unsigned char *temp = RL_REALLOC(data, length);
     unsigned char *temp = RL_REALLOC(data, length);
 
 
     if (temp != NULL) data = temp;
     if (temp != NULL) data = temp;

+ 1 - 1
src/rmodels.c

@@ -1830,7 +1830,7 @@ void UpdateModelAnimation(Model model, ModelAnimation anim, int frame)
                         continue;
                         continue;
                     }
                     }
                     boneId = mesh.boneIds[boneCounter];
                     boneId = mesh.boneIds[boneCounter];
-                    int boneIdParent = model.bones[boneId].parent;
+                    //int boneIdParent = model.bones[boneId].parent;
                     inTranslation = model.bindPose[boneId].translation;
                     inTranslation = model.bindPose[boneId].translation;
                     inRotation = model.bindPose[boneId].rotation;
                     inRotation = model.bindPose[boneId].rotation;
                     // inScale = model.bindPose[boneId].scale;
                     // inScale = model.bindPose[boneId].scale;

Some files were not shown because too many files changed in this diff