3 years ago · e79c272140
--- a/license.txt
+++ b/license.txt
@@ -88,7 +88,7 @@ This distribution contains code from the following projects (full license text b
 
															  - dr_flac

														
 
															 	Website: https://github.com/mackron/dr_libs

														
 
															-	Source download: https://github.com/mackron/dr_libs/blob/c5e5355/dr_flac.h

														
 
															+	Source download: https://github.com/mackron/dr_libs/blob/15f37e3/dr_flac.h

														
 
															 	License: MIT/Expat

														
 
															 	Copyright 2018 David Reid

														
--- a/src/libraries/dr/dr_flac.h
+++ b/src/libraries/dr/dr_flac.h
@@ -1,6 +1,6 @@
 
															 /*
														
 
															 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
														
 
															-dr_flac - v0.12.33 - 2021-12-22
														
 
															+dr_flac - v0.12.38 - 2022-04-10
														
 
															 David Reid - [email protected]
														
@@ -232,7 +232,7 @@ extern "C" {
 
															 #define DRFLAC_VERSION_MAJOR     0
														
 
															 #define DRFLAC_VERSION_MINOR     12
														
 
															-#define DRFLAC_VERSION_REVISION  33
														
 
															+#define DRFLAC_VERSION_REVISION  38
														
 
															 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
														
 
															 #include <stddef.h> /* For size_t. */
														
@@ -1363,9 +1363,15 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
															     I am using "__inline__" only when we're compiling in strict ANSI mode.
														
 
															     */
														
 
															     #if defined(__STRICT_ANSI__)
														
 
															-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
														
 
															+        #define DRFLAC_GNUC_INLINE_HINT __inline__
														
 
															     #else
														
 
															-        #define DRFLAC_INLINE inline __attribute__((always_inline))
														
 
															+        #define DRFLAC_GNUC_INLINE_HINT inline
														
 
															+    #endif
														
 
															+
														
 
															+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
														
 
															+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
														
 
															+    #else
														
 
															+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
														
 
															     #endif
														
 
															 #elif defined(__WATCOMC__)
														
 
															     #define DRFLAC_INLINE __inline
														
@@ -1378,7 +1384,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
															     #define DRFLAC_X64
														
 
															 #elif defined(__i386) || defined(_M_IX86)
														
 
															     #define DRFLAC_X86
														
 
															-#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
														
 
															+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
														
 
															     #define DRFLAC_ARM
														
 
															 #endif
														
@@ -1431,16 +1437,6 @@ Unfortuantely dr_flac depends on this for a few things so we're just going to di
 
															     #if defined(DRFLAC_ARM)
														
 
															         #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
														
 
															             #define DRFLAC_SUPPORT_NEON
														
 
															-        #endif
														
 
															-
														
 
															-        /* Fall back to looking for the #include file. */
														
 
															-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
														
 
															-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
														
 
															-                #define DRFLAC_SUPPORT_NEON
														
 
															-            #endif
														
 
															-        #endif
														
 
															-
														
 
															-        #if defined(DRFLAC_SUPPORT_NEON)
														
 
															             #include <arm_neon.h>
														
 
															         #endif
														
 
															     #endif
														
@@ -1909,6 +1905,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
 
															     return n;
														
 
															 }
														
 
															+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
														
 
															+{
														
 
															+    const drflac_uint8* pNum = (drflac_uint8*)pData;
														
 
															+    return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
														
 
															+}
														
 
															+
														
 
															 static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
														
 
															 {
														
 
															     if (drflac__is_little_endian()) {
														
@@ -1928,6 +1930,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
 
															     return n;
														
 
															 }
														
 
															+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
														
 
															+{
														
 
															+    const drflac_uint8* pNum = (drflac_uint8*)pData;
														
 
															+    return *pNum | *(pNum+1) << 8 |  *(pNum+2) << 16 | *(pNum+3) << 24;
														
 
															+}
														
 
															+
														
 
															 static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
														
 
															 {
														
@@ -2429,6 +2437,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
 
															         if (!drflac__reload_cache(bs)) {
														
 
															             return DRFLAC_FALSE;
														
 
															         }
														
 
															+        if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
														
 
															+            /* This happens when we get to end of stream */
														
 
															+            return DRFLAC_FALSE;
														
 
															+        }
														
 
															         *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
														
 
															         bs->consumedBits += bitCountLo;
														
@@ -2872,9 +2884,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs,
 
															         }
														
 
															     }
														
 
															+    if (bs->cache == 1) {
														
 
															+        /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
														
 
															+        *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
														
 
															+        if (!drflac__reload_cache(bs)) {
														
 
															+            return DRFLAC_FALSE;
														
 
															+        }
														
 
															+
														
 
															+        return DRFLAC_TRUE;
														
 
															+    }
														
 
															+
														
 
															     setBitOffsetPlus1 = drflac__clz(bs->cache);
														
 
															     setBitOffsetPlus1 += 1;
														
 
															+    if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
														
 
															+        /* This happens when we get to end of stream */
														
 
															+        return DRFLAC_FALSE;
														
 
															+    }
														
 
															+
														
 
															     bs->consumedBits += setBitOffsetPlus1;
														
 
															     bs->cache <<= setBitOffsetPlus1;
														
@@ -2989,6 +3016,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
 
															 }
														
 
															+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
														
 
															+{
														
 
															+#if 1   /* Needs optimizing. */
														
 
															+    drflac_uint32 result = 0;
														
 
															+    while (x > 0) {
														
 
															+        result += 1;
														
 
															+        x >>= 1;
														
 
															+    }
														
 
															+
														
 
															+    return result;
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
														
 
															+{
														
 
															+    /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
														
 
															+    return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															 The next two functions are responsible for calculating the prediction.
														
@@ -2996,6 +3042,9 @@ The next two functions are responsible for calculating the prediction.
 
															 When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
														
 
															 safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
														
 
															 */
														
 
															+#if defined(__clang__)
														
 
															+__attribute__((no_sanitize("signed-integer-overflow")))
														
 
															+#endif
														
 
															 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
														
 
															 {
														
 
															     drflac_int32 prediction = 0;
														
@@ -3231,7 +3280,7 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
 
															 Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
														
 
															 sake of readability and should only be used as a reference.
														
 
															 */
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															     drflac_uint32 i;
														
@@ -3270,10 +3319,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
 
															         }
														
 
															-        if (bitsPerSample+shift >= 32) {
														
 
															-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
														
 
															+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
														
 
															         } else {
														
 
															-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
														
 
															+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
														
 
															         }
														
 
															     }
														
@@ -3370,6 +3419,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
 
															             if (!drflac__reload_cache(bs)) {
														
 
															                 return DRFLAC_FALSE;
														
 
															             }
														
 
															+            if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
														
 
															+                /* This happens when we get to end of stream */
														
 
															+                return DRFLAC_FALSE;
														
 
															+            }
														
 
															         }
														
 
															         riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
														
@@ -3450,6 +3503,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf
 
															                 if (!drflac__reload_cache(bs)) {
														
 
															                     return DRFLAC_FALSE;
														
 
															                 }
														
 
															+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
														
 
															+                    /* This happens when we get to end of stream */
														
 
															+                    return DRFLAC_FALSE;
														
 
															+                }
														
 
															                 bs_cache = bs->cache;
														
 
															                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
														
@@ -3560,6 +3617,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac
 
															                     return DRFLAC_FALSE;
														
 
															                 }
														
 
															+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
														
 
															+                    /* This happens when we get to end of stream */
														
 
															+                    return DRFLAC_FALSE;
														
 
															+                }
														
 
															+
														
 
															                 bs_cache = bs->cache;
														
 
															                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
														
 
															             }
														
@@ -3646,7 +3708,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
 
															     return DRFLAC_TRUE;
														
 
															 }
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															     drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
														
 
															     drflac_uint32 zeroCountPart0 = 0;
														
@@ -3664,14 +3726,14 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
															     DRFLAC_ASSERT(bs != NULL);
														
 
															     DRFLAC_ASSERT(pSamplesOut != NULL);
														
 
															-    if (order == 0) {
														
 
															-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+    if (lpcOrder == 0) {
														
 
															+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
														
 
															     }
														
 
															     riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
														
 
															     pSamplesOutEnd = pSamplesOut + (count & ~3);
														
 
															-    if (bitsPerSample+shift > 32) {
														
 
															+    if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															         while (pSamplesOut < pSamplesOutEnd) {
														
 
															             /*
														
 
															             Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
														
@@ -3699,10 +3761,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
															             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
														
 
															             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
														
 
															-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
														
 
															-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
														
 
															-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
														
 
															-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
														
 
															+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
														
 
															+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
														
 
															+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
														
 
															+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
														
 
															             pSamplesOut += 4;
														
 
															         }
														
@@ -3730,10 +3792,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
															             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
														
 
															             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
														
 
															-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
														
 
															-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
														
 
															-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
														
 
															-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
														
 
															+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
														
 
															+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
														
 
															+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
														
 
															+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
														
 
															             pSamplesOut += 4;
														
 
															         }
														
@@ -3753,10 +3815,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
 
															         /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
														
 
															         /* Sample reconstruction. */
														
 
															-        if (bitsPerSample+shift > 32) {
														
 
															-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
														
 
															+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
														
 
															         } else {
														
 
															-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
														
 
															+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
														
 
															         }
														
 
															         i += 1;
														
@@ -4212,20 +4274,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac
 
															     return DRFLAC_TRUE;
														
 
															 }
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															     DRFLAC_ASSERT(bs != NULL);
														
 
															     DRFLAC_ASSERT(pSamplesOut != NULL);
														
 
															     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
														
 
															-    if (order > 0 && order <= 12) {
														
 
															-        if (bitsPerSample+shift > 32) {
														
 
															-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+    if (lpcOrder > 0 && lpcOrder <= 12) {
														
 
															+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
														
 
															         } else {
														
 
															-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
														
 
															         }
														
 
															     } else {
														
 
															-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     }
														
 
															 }
														
 
															 #endif
														
@@ -4562,7 +4624,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
															     /*
														
 
															     Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
														
 
															-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
														
 
															+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
														
 
															     in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
														
 
															     so I think there's opportunity for this to be simplified.
														
 
															     */
														
@@ -4710,41 +4772,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
															     return DRFLAC_TRUE;
														
 
															 }
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															     DRFLAC_ASSERT(bs != NULL);
														
 
															     DRFLAC_ASSERT(pSamplesOut != NULL);
														
 
															     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
														
 
															-    if (order > 0 && order <= 12) {
														
 
															-        if (bitsPerSample+shift > 32) {
														
 
															-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+    if (lpcOrder > 0 && lpcOrder <= 12) {
														
 
															+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
														
 
															         } else {
														
 
															-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
														
 
															         }
														
 
															     } else {
														
 
															-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     }
														
 
															 }
														
 
															 #endif
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															 #if defined(DRFLAC_SUPPORT_SSE41)
														
 
															     if (drflac__gIsSSE41Supported) {
														
 
															-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     } else
														
 
															 #elif defined(DRFLAC_SUPPORT_NEON)
														
 
															     if (drflac__gIsNEONSupported) {
														
 
															-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     } else
														
 
															 #endif
														
 
															     {
														
 
															         /* Scalar fallback. */
														
 
															     #if 0
														
 
															-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     #else
														
 
															-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
														
 
															+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
														
 
															     #endif
														
 
															     }
														
 
															 }
														
@@ -4765,7 +4827,10 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
 
															     return DRFLAC_TRUE;
														
 
															 }
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															+#if defined(__clang__)
														
 
															+__attribute__((no_sanitize("signed-integer-overflow")))
														
 
															+#endif
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
														
 
															 {
														
 
															     drflac_uint32 i;
														
@@ -4782,10 +4847,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
 
															             pSamplesOut[i] = 0;
														
 
															         }
														
 
															-        if (bitsPerSample >= 24) {
														
 
															-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
														
 
															+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
														
 
															+            pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
														
 
															         } else {
														
 
															-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
														
 
															+            pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
														
 
															         }
														
 
															     }
														
@@ -4798,7 +4863,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin
 
															 when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
														
 
															 <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
														
 
															 */
														
 
															-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
														
 
															+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
														
 
															 {
														
 
															     drflac_uint8 residualMethod;
														
 
															     drflac_uint8 partitionOrder;
														
@@ -4818,7 +4883,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
															     }
														
 
															     /* Ignore the first <order> values. */
														
 
															-    pDecodedSamples += order;
														
 
															+    pDecodedSamples += lpcOrder;
														
 
															     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
														
 
															         return DRFLAC_FALSE;
														
@@ -4833,11 +4898,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
															     }
														
 
															     /* Validation check. */
														
 
															-    if ((blockSize / (1 << partitionOrder)) < order) {
														
 
															+    if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
														
 
															         return DRFLAC_FALSE;
														
 
															     }
														
 
															-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
														
 
															+    samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
														
 
															     partitionsRemaining = (1 << partitionOrder);
														
 
															     for (;;) {
														
 
															         drflac_uint8 riceParam = 0;
														
@@ -4858,7 +4923,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
															         }
														
 
															         if (riceParam != 0xFF) {
														
 
															-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
														
 
															+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
														
 
															                 return DRFLAC_FALSE;
														
 
															             }
														
 
															         } else {
														
@@ -4867,7 +4932,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
 
															                 return DRFLAC_FALSE;
														
 
															             }
														
 
															-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
														
 
															+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
														
 
															                 return DRFLAC_FALSE;
														
 
															             }
														
 
															         }
														
@@ -5036,7 +5101,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
 
															         pDecodedSamples[i] = sample;
														
 
															     }
														
 
															-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
														
 
															+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
														
 
															         return DRFLAC_FALSE;
														
 
															     }
														
@@ -5091,7 +5156,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
 
															         }
														
 
															     }
														
 
															-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
														
 
															+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
														
 
															         return DRFLAC_FALSE;
														
 
															     }
														
@@ -5219,6 +5284,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
 
															                 return DRFLAC_FALSE;
														
 
															             }
														
 
															             crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
														
 
															+            if (header->blockSizeInPCMFrames == 0xFFFF) {
														
 
															+                return DRFLAC_FALSE;    /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
														
 
															+            }
														
 
															             header->blockSizeInPCMFrames += 1;
														
 
															         } else {
														
 
															             DRFLAC_ASSERT(blockSize >= 8);
														
@@ -5257,6 +5325,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
 
															             header->bitsPerSample = streaminfoBitsPerSample;
														
 
															         }
														
 
															+        if (header->bitsPerSample != streaminfoBitsPerSample) {
														
 
															+            /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
														
 
															+            return DRFLAC_FALSE;
														
 
															+        }
														
 
															+
														
 
															         if (!drflac__read_uint8(bs, 8, &header->crc8)) {
														
 
															             return DRFLAC_FALSE;
														
 
															         }
														
@@ -5343,6 +5416,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
 
															         subframeBitsPerSample += 1;
														
 
															     }
														
 
															+    if (subframeBitsPerSample > 32) {
														
 
															+        /* libFLAC and ffmpeg reject 33-bit subframes as well */
														
 
															+        return DRFLAC_FALSE;
														
 
															+    }
														
 
															+
														
 
															     /* Need to handle wasted bits per sample. */
														
 
															     if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
														
 
															         return DRFLAC_FALSE;
														
@@ -6485,7 +6563,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                     pRunningData    = (const char*)pRawData;
														
 
															                     pRunningDataEnd = (const char*)pRawData + blockSize;
														
 
															-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                     /* Need space for the rest of the block */
														
 
															                     if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
														
@@ -6493,7 +6571,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                         return DRFLAC_FALSE;
														
 
															                     }
														
 
															                     metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
														
 
															-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                     /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
														
 
															                     if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
														
@@ -6511,7 +6589,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                             return DRFLAC_FALSE;
														
 
															                         }
														
 
															-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                        commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                         if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
														
 
															                             drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
														
 
															                             return DRFLAC_FALSE;
														
@@ -6620,8 +6698,8 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                     pRunningData    = (const char*)pRawData;
														
 
															                     pRunningDataEnd = (const char*)pRawData + blockSize;
														
 
															-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.type       = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                     /* Need space for the rest of the block */
														
 
															                     if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
														
@@ -6629,7 +6707,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                         return DRFLAC_FALSE;
														
 
															                     }
														
 
															                     metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
														
 
															-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                     /* Need space for the rest of the block */
														
 
															                     if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
														
@@ -6637,11 +6715,11 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
															                         return DRFLAC_FALSE;
														
 
															                     }
														
 
															                     metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
														
 
															-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.width           = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.height          = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.colorDepth      = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															+                    metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
														
 
															                     metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
														
 
															                     /* Need space for the picture after the block */
														
@@ -7865,7 +7943,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
															 #ifndef DR_FLAC_NO_OGG
														
 
															     if (init.container == drflac_container_ogg) {
														
 
															         drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
														
 
															-        *pInternalOggbs = oggbs;
														
 
															+        DRFLAC_COPY_MEMORY(pInternalOggbs, &oggbs, sizeof(oggbs));
														
 
															         /* The Ogg bistream needs to be layered on top of the original bitstream. */
														
 
															         pFlac->bs.onRead = drflac__on_read_ogg;
														
@@ -11786,7 +11864,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator
 
															         return NULL;
														
 
															     }
														
 
															-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
														
 
															+    length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
														
 
															     pIter->pRunningData += 4;
														
 
															     pComment = pIter->pRunningData;
														
@@ -11856,6 +11934,22 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 
															 /*
														
 
															 REVISION HISTORY
														
 
															 ================
														
 
															+v0.12.38 - 2022-04-10
														
 
															+  - Fix compilation error on older versions of GCC.
														
 
															+
														
 
															+v0.12.37 - 2022-02-12
														
 
															+  - Improve ARM detection.
														
 
															+
														
 
															+v0.12.36 - 2022-02-07
														
 
															+  - Fix a compilation error with the ARM build.
														
 
															+
														
 
															+v0.12.35 - 2022-02-06
														
 
															+  - Fix a bug due to underestimating the amount of precision required for the prediction stage.
														
 
															+  - Fix some bugs found from fuzz testing.
														
 
															+
														
 
															+v0.12.34 - 2022-01-07
														
 
															+  - Fix some misalignment bugs when reading metadata.
														
 
															+
														
 
															 v0.12.33 - 2021-12-22
														
 
															   - Fix a bug with seeking when the seek table does not start at PCM frame 0.