3 years ago · e79c272140
--- a/license.txt
+++ b/license.txt
@@ -88,7 +88,7 @@ This distribution contains code from the following projects (full license text b
 
				 

			
 
				  - dr_flac

			
 
				 	Website: https://github.com/mackron/dr_libs

			
 
				-	Source download: https://github.com/mackron/dr_libs/blob/c5e5355/dr_flac.h

			
 
				+	Source download: https://github.com/mackron/dr_libs/blob/15f37e3/dr_flac.h

			
 
				 	License: MIT/Expat

			
 
				 	Copyright 2018 David Reid

			
 
				 

			
--- a/src/libraries/dr/dr_flac.h
+++ b/src/libraries/dr/dr_flac.h
@@ -1,6 +1,6 @@
 
				 /*
			
 
				 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
			
 
				-dr_flac - v0.12.33 - 2021-12-22
			
 
				+dr_flac - v0.12.38 - 2022-04-10
			
 
				 
			
 
				 David Reid - [email protected]
			
 
				 
			
@@ -232,7 +232,7 @@ extern "C" {
 
				 
			
 
				 #define DRFLAC_VERSION_MAJOR     0
			
 
				 #define DRFLAC_VERSION_MINOR     12
			
 
				-#define DRFLAC_VERSION_REVISION  33
			
 
				+#define DRFLAC_VERSION_REVISION  38
			
 
				 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
			
 
				 
			
 
				 #include <stddef.h> /* For size_t. */
			
@@ -1363,9 +1363,15 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
				     I am using "__inline__" only when we're compiling in strict ANSI mode.
			
 
				     */
			
 
				     #if defined(__STRICT_ANSI__)
			
 
				-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
			
 
				+        #define DRFLAC_GNUC_INLINE_HINT __inline__
			
 
				     #else
			
 
				-        #define DRFLAC_INLINE inline __attribute__((always_inline))
			
 
				+        #define DRFLAC_GNUC_INLINE_HINT inline
			
 
				+    #endif
			
 
				+
			
 
				+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
			
 
				+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
			
 
				+    #else
			
 
				+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
			
 
				     #endif
			
 
				 #elif defined(__WATCOMC__)
			
 
				     #define DRFLAC_INLINE __inline
			
@@ -1378,7 +1384,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
				     #define DRFLAC_X64
			
 
				 #elif defined(__i386) || defined(_M_IX86)
			
 
				     #define DRFLAC_X86
			
 
				-#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
			
 
				+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
			
 
				     #define DRFLAC_ARM
			
 
				 #endif
			
 
				 
			
@@ -1431,16 +1437,6 @@ Unfortuantely dr_flac depends on this for a few things so we're just going to di
 
				     #if defined(DRFLAC_ARM)
			
 
				         #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
			
 
				             #define DRFLAC_SUPPORT_NEON
			
 
				-        #endif
			
 
				-
			
 
				-        /* Fall back to looking for the #include file. */
			
 
				-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
			
 
				-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
			
 
				-                #define DRFLAC_SUPPORT_NEON
			
 
				-            #endif
			
 
				-        #endif
			
 
				-
			
 
				-        #if defined(DRFLAC_SUPPORT_NEON)
			
 
				             #include <arm_neon.h>
			
 
				         #endif
			
 
				     #endif
			
@@ -1909,6 +1905,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
 
				     return n;
			
 
				 }
			
 
				 
			
 
				+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
			
 
				+{
			
 
				+    const drflac_uint8* pNum = (drflac_uint8*)pData;
			
 
				+    return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
			
 
				+}
			
 
				+
			
 
				 static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
			
 
				 {
			
 
				     if (drflac__is_little_endian()) {
			
@@ -1928,6 +1930,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
 
				     return n;
			
 
				 }
			
 
				 
			
 
				+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
			
 
				+{
			
 
				+    const drflac_uint8* pNum = (drflac_uint8*)pData;
			
 
				+    return *pNum | *(pNum+1) << 8 |  *(pNum+2) << 16 | *(pNum+3) << 24;
			
 
				+}
			
 
				+
			
 
				 
			
 
				 static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
			
 
				 {
			
@@ -2429,6 +2437,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
 
				         if (!drflac__reload_cache(bs)) {
			
 
				             return DRFLAC_FALSE;
			
 
				         }
			
 
				+        if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
			
 
				+            /* This happens when we get to end of stream */
			
 
				+            return DRFLAC_FALSE;
			
 
				+        }
			
 
				 
			
 
				         *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
			
 
				         bs->consumedBits += bitCountLo;
			
@@ -2872,9 +2884,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs,
 
				         }
			
 
				     }
			
 
				 
			
 
				+    if (bs->cache == 1) {
			
 
				+        /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
			
 
				+        *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
			
 
				+        if (!drflac__reload_cache(bs)) {
			
 
				+            return DRFLAC_FALSE;
			
 
				+        }
			
 
				+
			
 
				+        return DRFLAC_TRUE;
			
 
				+    }
			
 
				+
			
 
				     setBitOffsetPlus1 = drflac__clz(bs->cache);
			
 
				     setBitOffsetPlus1 += 1;
			
 
				 
			
 
				+    if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
			
 
				+        /* This happens when we get to end of stream */
			
 
				+        return DRFLAC_FALSE;
			
 
				+    }
			
 
				+
			
 
				     bs->consumedBits += setBitOffsetPlus1;
			
 
				     bs->cache <<= setBitOffsetPlus1;
			
 
				 
			
@@ -2989,6 +3016,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
 
				 }
			
 
				 
			
 
				 
			
 
				+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
			
 
				+{
			
 
				+#if 1   /* Needs optimizing. */
			
 
				+    drflac_uint32 result = 0;
			
 
				+    while (x > 0) {
			
 
				+        result += 1;
			
 
				+        x >>= 1;
			
 
				+    }
			
 
				+
			
 
				+    return result;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
			
 
				+{
			
 
				+    /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
			
 
				+    return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /*
			
 
				 The next two functions are responsible for calculating the prediction.
			
@@ -2996,6 +3042,9 @@ The next two functions are responsible for calculating the prediction.
 
				 When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
			
 
				 safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
			
 
				 */
			
 
				+#if defined(__clang__)
			
 
				+__attribute__((no_sanitize("signed-integer-overflow")))
			
 
				+#endif
			
 
				 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
			
 
				 {
			
 
				     drflac_int32 prediction = 0;
			
@@ -3231,7 +3280,7 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
 
				 Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
			
 
				 sake of readability and should only be used as a reference.
			
 
				 */
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				     drflac_uint32 i;
			
 
				 
			
@@ -3270,10 +3319,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
 
				         }
			
 
				 
			
 
				 
			
 
				-        if (bitsPerSample+shift >= 32) {
			
 
				-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
			
 
				+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
			
 
				         } else {
			
 
				-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
			
 
				+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -3370,6 +3419,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
 
				             if (!drflac__reload_cache(bs)) {
			
 
				                 return DRFLAC_FALSE;
			
 
				             }
			
 
				+            if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
			
 
				+                /* This happens when we get to end of stream */
			
 
				+                return DRFLAC_FALSE;
			
 
				+            }
			
 
				         }
			
 
				 
			
 
				         riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
			
@@ -3450,6 +3503,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf
 
				                 if (!drflac__reload_cache(bs)) {
			
 
				                     return DRFLAC_FALSE;
			
 
				                 }
			
 
				+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
			
 
				+                    /* This happens when we get to end of stream */
			
 
				+                    return DRFLAC_FALSE;
			
 
				+                }
			
 
				 
			
 
				                 bs_cache = bs->cache;
			
 
				                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
			
@@ -3560,6 +3617,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac
 
				                     return DRFLAC_FALSE;
			
 
				                 }
			
 
				 
			
 
				+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
			
 
				+                    /* This happens when we get to end of stream */
			
 
				+                    return DRFLAC_FALSE;
			
 
				+                }
			
 
				+
			
 
				                 bs_cache = bs->cache;
			
 
				                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
			
 
				             }
			
@@ -3646,7 +3708,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
 
				     return DRFLAC_TRUE;
			
 
				 }
			
 
				 
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				     drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
			
 
				     drflac_uint32 zeroCountPart0 = 0;
			
@@ -3664,14 +3726,14 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
				     DRFLAC_ASSERT(bs != NULL);
			
 
				     DRFLAC_ASSERT(pSamplesOut != NULL);
			
 
				 
			
 
				-    if (order == 0) {
			
 
				-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+    if (lpcOrder == 0) {
			
 
				+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
			
 
				     }
			
 
				 
			
 
				     riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
			
 
				     pSamplesOutEnd = pSamplesOut + (count & ~3);
			
 
				 
			
 
				-    if (bitsPerSample+shift > 32) {
			
 
				+    if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				         while (pSamplesOut < pSamplesOutEnd) {
			
 
				             /*
			
 
				             Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
			
@@ -3699,10 +3761,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
				             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
			
 
				             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
			
 
				 
			
 
				-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
			
 
				-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
			
 
				-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
			
 
				-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
			
 
				+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
			
 
				+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
			
 
				+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
			
 
				+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
			
 
				 
			
 
				             pSamplesOut += 4;
			
 
				         }
			
@@ -3730,10 +3792,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
				             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
			
 
				             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
			
 
				 
			
 
				-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
			
 
				-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
			
 
				-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
			
 
				-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
			
 
				+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
			
 
				+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
			
 
				+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
			
 
				+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
			
 
				 
			
 
				             pSamplesOut += 4;
			
 
				         }
			
@@ -3753,10 +3815,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
				         /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
			
 
				 
			
 
				         /* Sample reconstruction. */
			
 
				-        if (bitsPerSample+shift > 32) {
			
 
				-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
			
 
				+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
			
 
				         } else {
			
 
				-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
			
 
				+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
			
 
				         }
			
 
				 
			
 
				         i += 1;
			
@@ -4212,20 +4274,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac
 
				     return DRFLAC_TRUE;
			
 
				 }
			
 
				 
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				     DRFLAC_ASSERT(bs != NULL);
			
 
				     DRFLAC_ASSERT(pSamplesOut != NULL);
			
 
				 
			
 
				     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
			
 
				-    if (order > 0 && order <= 12) {
			
 
				-        if (bitsPerSample+shift > 32) {
			
 
				-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+    if (lpcOrder > 0 && lpcOrder <= 12) {
			
 
				+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
			
 
				         } else {
			
 
				-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
			
 
				         }
			
 
				     } else {
			
 
				-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     }
			
 
				 }
			
 
				 #endif
			
@@ -4562,7 +4624,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
				 
			
 
				     /*
			
 
				     Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
			
 
				-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
			
 
				+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
			
 
				     in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
			
 
				     so I think there's opportunity for this to be simplified.
			
 
				     */
			
@@ -4710,41 +4772,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
				     return DRFLAC_TRUE;
			
 
				 }
			
 
				 
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				     DRFLAC_ASSERT(bs != NULL);
			
 
				     DRFLAC_ASSERT(pSamplesOut != NULL);
			
 
				 
			
 
				     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
			
 
				-    if (order > 0 && order <= 12) {
			
 
				-        if (bitsPerSample+shift > 32) {
			
 
				-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+    if (lpcOrder > 0 && lpcOrder <= 12) {
			
 
				+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
			
 
				         } else {
			
 
				-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
			
 
				         }
			
 
				     } else {
			
 
				-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     }
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				 #if defined(DRFLAC_SUPPORT_SSE41)
			
 
				     if (drflac__gIsSSE41Supported) {
			
 
				-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     } else
			
 
				 #elif defined(DRFLAC_SUPPORT_NEON)
			
 
				     if (drflac__gIsNEONSupported) {
			
 
				-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     } else
			
 
				 #endif
			
 
				     {
			
 
				         /* Scalar fallback. */
			
 
				     #if 0
			
 
				-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     #else
			
 
				-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
			
 
				+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
			
 
				     #endif
			
 
				     }
			
 
				 }
			
@@ -4765,7 +4827,10 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
 
				     return DRFLAC_TRUE;
			
 
				 }
			
 
				 
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				+#if defined(__clang__)
			
 
				+__attribute__((no_sanitize("signed-integer-overflow")))
			
 
				+#endif
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
			
 
				 {
			
 
				     drflac_uint32 i;
			
 
				 
			
@@ -4782,10 +4847,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
 
				             pSamplesOut[i] = 0;
			
 
				         }
			
 
				 
			
 
				-        if (bitsPerSample >= 24) {
			
 
				-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
			
 
				+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
			
 
				+            pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
			
 
				         } else {
			
 
				-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
			
 
				+            pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -4798,7 +4863,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin
 
				 when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
			
 
				 <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
			
 
				 */
			
 
				-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
			
 
				+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
			
 
				 {
			
 
				     drflac_uint8 residualMethod;
			
 
				     drflac_uint8 partitionOrder;
			
@@ -4818,7 +4883,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
				     }
			
 
				 
			
 
				     /* Ignore the first <order> values. */
			
 
				-    pDecodedSamples += order;
			
 
				+    pDecodedSamples += lpcOrder;
			
 
				 
			
 
				     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
			
 
				         return DRFLAC_FALSE;
			
@@ -4833,11 +4898,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
				     }
			
 
				 
			
 
				     /* Validation check. */
			
 
				-    if ((blockSize / (1 << partitionOrder)) < order) {
			
 
				+    if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
			
 
				         return DRFLAC_FALSE;
			
 
				     }
			
 
				 
			
 
				-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
			
 
				+    samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
			
 
				     partitionsRemaining = (1 << partitionOrder);
			
 
				     for (;;) {
			
 
				         drflac_uint8 riceParam = 0;
			
@@ -4858,7 +4923,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
				         }
			
 
				 
			
 
				         if (riceParam != 0xFF) {
			
 
				-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
			
 
				+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
			
 
				                 return DRFLAC_FALSE;
			
 
				             }
			
 
				         } else {
			
@@ -4867,7 +4932,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
				                 return DRFLAC_FALSE;
			
 
				             }
			
 
				 
			
 
				-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
			
 
				+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
			
 
				                 return DRFLAC_FALSE;
			
 
				             }
			
 
				         }
			
@@ -5036,7 +5101,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
 
				         pDecodedSamples[i] = sample;
			
 
				     }
			
 
				 
			
 
				-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
			
 
				+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
			
 
				         return DRFLAC_FALSE;
			
 
				     }
			
 
				 
			
@@ -5091,7 +5156,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
 
				         }
			
 
				     }
			
 
				 
			
 
				-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
			
 
				+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
			
 
				         return DRFLAC_FALSE;
			
 
				     }
			
 
				 
			
@@ -5219,6 +5284,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
 
				                 return DRFLAC_FALSE;
			
 
				             }
			
 
				             crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
			
 
				+            if (header->blockSizeInPCMFrames == 0xFFFF) {
			
 
				+                return DRFLAC_FALSE;    /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
			
 
				+            }
			
 
				             header->blockSizeInPCMFrames += 1;
			
 
				         } else {
			
 
				             DRFLAC_ASSERT(blockSize >= 8);
			
@@ -5257,6 +5325,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
 
				             header->bitsPerSample = streaminfoBitsPerSample;
			
 
				         }
			
 
				 
			
 
				+        if (header->bitsPerSample != streaminfoBitsPerSample) {
			
 
				+            /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
			
 
				+            return DRFLAC_FALSE;
			
 
				+        }
			
 
				+
			
 
				         if (!drflac__read_uint8(bs, 8, &header->crc8)) {
			
 
				             return DRFLAC_FALSE;
			
 
				         }
			
@@ -5343,6 +5416,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
 
				         subframeBitsPerSample += 1;
			
 
				     }
			
 
				 
			
 
				+    if (subframeBitsPerSample > 32) {
			
 
				+        /* libFLAC and ffmpeg reject 33-bit subframes as well */
			
 
				+        return DRFLAC_FALSE;
			
 
				+    }
			
 
				+
			
 
				     /* Need to handle wasted bits per sample. */
			
 
				     if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
			
 
				         return DRFLAC_FALSE;
			
@@ -6485,7 +6563,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                     pRunningData    = (const char*)pRawData;
			
 
				                     pRunningDataEnd = (const char*)pRawData + blockSize;
			
 
				 
			
 
				-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				 
			
 
				                     /* Need space for the rest of the block */
			
 
				                     if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
			
@@ -6493,7 +6571,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                         return DRFLAC_FALSE;
			
 
				                     }
			
 
				                     metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
			
 
				-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				 
			
 
				                     /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
			
 
				                     if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
			
@@ -6511,7 +6589,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                             return DRFLAC_FALSE;
			
 
				                         }
			
 
				 
			
 
				-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                        commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				                         if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
			
 
				                             drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
			
 
				                             return DRFLAC_FALSE;
			
@@ -6620,8 +6698,8 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                     pRunningData    = (const char*)pRawData;
			
 
				                     pRunningDataEnd = (const char*)pRawData + blockSize;
			
 
				 
			
 
				-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.type       = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				 
			
 
				                     /* Need space for the rest of the block */
			
 
				                     if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
			
@@ -6629,7 +6707,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                         return DRFLAC_FALSE;
			
 
				                     }
			
 
				                     metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
			
 
				-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				 
			
 
				                     /* Need space for the rest of the block */
			
 
				                     if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
			
@@ -6637,11 +6715,11 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
				                         return DRFLAC_FALSE;
			
 
				                     }
			
 
				                     metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
			
 
				-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.width           = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.height          = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.colorDepth      = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				+                    metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
			
 
				                     metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
			
 
				 
			
 
				                     /* Need space for the picture after the block */
			
@@ -7865,7 +7943,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
				 #ifndef DR_FLAC_NO_OGG
			
 
				     if (init.container == drflac_container_ogg) {
			
 
				         drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
			
 
				-        *pInternalOggbs = oggbs;
			
 
				+        DRFLAC_COPY_MEMORY(pInternalOggbs, &oggbs, sizeof(oggbs));
			
 
				 
			
 
				         /* The Ogg bistream needs to be layered on top of the original bitstream. */
			
 
				         pFlac->bs.onRead = drflac__on_read_ogg;
			
@@ -11786,7 +11864,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
			
 
				+    length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
			
 
				     pIter->pRunningData += 4;
			
 
				 
			
 
				     pComment = pIter->pRunningData;
			
@@ -11856,6 +11934,22 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
				 /*
			
 
				 REVISION HISTORY
			
 
				 ================
			
 
				+v0.12.38 - 2022-04-10
			
 
				+  - Fix compilation error on older versions of GCC.
			
 
				+
			
 
				+v0.12.37 - 2022-02-12
			
 
				+  - Improve ARM detection.
			
 
				+
			
 
				+v0.12.36 - 2022-02-07
			
 
				+  - Fix a compilation error with the ARM build.
			
 
				+
			
 
				+v0.12.35 - 2022-02-06
			
 
				+  - Fix a bug due to underestimating the amount of precision required for the prediction stage.
			
 
				+  - Fix some bugs found from fuzz testing.
			
 
				+
			
 
				+v0.12.34 - 2022-01-07
			
 
				+  - Fix some misalignment bugs when reading metadata.
			
 
				+
			
 
				 v0.12.33 - 2021-12-22
			
 
				   - Fix a bug with seeking when the seek table does not start at PCM frame 0.