|
@@ -1,6 +1,6 @@
|
|
|
/*
|
|
|
FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
|
|
|
-dr_flac - v0.12.33 - 2021-12-22
|
|
|
+dr_flac - v0.12.38 - 2022-04-10
|
|
|
|
|
|
David Reid - [email protected]
|
|
|
|
|
@@ -232,7 +232,7 @@ extern "C" {
|
|
|
|
|
|
#define DRFLAC_VERSION_MAJOR 0
|
|
|
#define DRFLAC_VERSION_MINOR 12
|
|
|
-#define DRFLAC_VERSION_REVISION 33
|
|
|
+#define DRFLAC_VERSION_REVISION 38
|
|
|
#define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
|
|
|
|
|
|
#include <stddef.h> /* For size_t. */
|
|
@@ -1363,9 +1363,15 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
|
|
|
I am using "__inline__" only when we're compiling in strict ANSI mode.
|
|
|
*/
|
|
|
#if defined(__STRICT_ANSI__)
|
|
|
- #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
|
|
|
+ #define DRFLAC_GNUC_INLINE_HINT __inline__
|
|
|
#else
|
|
|
- #define DRFLAC_INLINE inline __attribute__((always_inline))
|
|
|
+ #define DRFLAC_GNUC_INLINE_HINT inline
|
|
|
+ #endif
|
|
|
+
|
|
|
+ #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
|
|
|
+ #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
|
|
|
+ #else
|
|
|
+ #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
|
|
|
#endif
|
|
|
#elif defined(__WATCOMC__)
|
|
|
#define DRFLAC_INLINE __inline
|
|
@@ -1378,7 +1384,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
|
|
|
#define DRFLAC_X64
|
|
|
#elif defined(__i386) || defined(_M_IX86)
|
|
|
#define DRFLAC_X86
|
|
|
-#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
|
+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
|
|
|
#define DRFLAC_ARM
|
|
|
#endif
|
|
|
|
|
@@ -1431,16 +1437,6 @@ Unfortuantely dr_flac depends on this for a few things so we're just going to di
|
|
|
#if defined(DRFLAC_ARM)
|
|
|
#if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
|
|
|
#define DRFLAC_SUPPORT_NEON
|
|
|
- #endif
|
|
|
-
|
|
|
- /* Fall back to looking for the #include file. */
|
|
|
- #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
|
|
|
- #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
|
|
|
- #define DRFLAC_SUPPORT_NEON
|
|
|
- #endif
|
|
|
- #endif
|
|
|
-
|
|
|
- #if defined(DRFLAC_SUPPORT_NEON)
|
|
|
#include <arm_neon.h>
|
|
|
#endif
|
|
|
#endif
|
|
@@ -1909,6 +1905,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
|
|
|
return n;
|
|
|
}
|
|
|
|
|
|
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
|
|
|
+{
|
|
|
+ const drflac_uint8* pNum = (drflac_uint8*)pData;
|
|
|
+ return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
|
|
|
+}
|
|
|
+
|
|
|
static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
|
|
|
{
|
|
|
if (drflac__is_little_endian()) {
|
|
@@ -1928,6 +1930,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
|
|
|
return n;
|
|
|
}
|
|
|
|
|
|
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
|
|
|
+{
|
|
|
+ const drflac_uint8* pNum = (drflac_uint8*)pData;
|
|
|
+ return *pNum | *(pNum+1) << 8 | *(pNum+2) << 16 | *(pNum+3) << 24;
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
|
|
|
{
|
|
@@ -2429,6 +2437,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
|
|
|
if (!drflac__reload_cache(bs)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
+ if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
|
|
|
+ /* This happens when we get to end of stream */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
|
|
|
*pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
|
|
|
bs->consumedBits += bitCountLo;
|
|
@@ -2872,9 +2884,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ if (bs->cache == 1) {
|
|
|
+ /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
|
|
|
+ *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
|
|
|
+ if (!drflac__reload_cache(bs)) {
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ return DRFLAC_TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
setBitOffsetPlus1 = drflac__clz(bs->cache);
|
|
|
setBitOffsetPlus1 += 1;
|
|
|
|
|
|
+ if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
|
|
|
+ /* This happens when we get to end of stream */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
bs->consumedBits += setBitOffsetPlus1;
|
|
|
bs->cache <<= setBitOffsetPlus1;
|
|
|
|
|
@@ -2989,6 +3016,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
|
|
|
}
|
|
|
|
|
|
|
|
|
+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
|
|
|
+{
|
|
|
+#if 1 /* Needs optimizing. */
|
|
|
+ drflac_uint32 result = 0;
|
|
|
+ while (x > 0) {
|
|
|
+ result += 1;
|
|
|
+ x >>= 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
|
|
|
+{
|
|
|
+ /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
|
|
|
+ return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
/*
|
|
|
The next two functions are responsible for calculating the prediction.
|
|
@@ -2996,6 +3042,9 @@ The next two functions are responsible for calculating the prediction.
|
|
|
When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
|
|
|
safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
|
|
|
*/
|
|
|
+#if defined(__clang__)
|
|
|
+__attribute__((no_sanitize("signed-integer-overflow")))
|
|
|
+#endif
|
|
|
static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
|
|
|
{
|
|
|
drflac_int32 prediction = 0;
|
|
@@ -3231,7 +3280,7 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
|
|
|
Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
|
|
|
sake of readability and should only be used as a reference.
|
|
|
*/
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
drflac_uint32 i;
|
|
|
|
|
@@ -3270,10 +3319,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
|
|
|
}
|
|
|
|
|
|
|
|
|
- if (bitsPerSample+shift >= 32) {
|
|
|
- pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
+ pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
|
|
|
} else {
|
|
|
- pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
|
|
|
+ pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -3370,6 +3419,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
|
|
|
if (!drflac__reload_cache(bs)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
+ if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
|
|
|
+ /* This happens when we get to end of stream */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
|
|
@@ -3450,6 +3503,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf
|
|
|
if (!drflac__reload_cache(bs)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
+ if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
|
|
|
+ /* This happens when we get to end of stream */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
|
|
|
bs_cache = bs->cache;
|
|
|
bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
|
|
@@ -3560,6 +3617,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
|
+ if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
|
|
|
+ /* This happens when we get to end of stream */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
bs_cache = bs->cache;
|
|
|
bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
|
|
|
}
|
|
@@ -3646,7 +3708,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
|
|
|
return DRFLAC_TRUE;
|
|
|
}
|
|
|
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
|
|
|
drflac_uint32 zeroCountPart0 = 0;
|
|
@@ -3664,14 +3726,14 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
|
|
|
DRFLAC_ASSERT(bs != NULL);
|
|
|
DRFLAC_ASSERT(pSamplesOut != NULL);
|
|
|
|
|
|
- if (order == 0) {
|
|
|
- return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ if (lpcOrder == 0) {
|
|
|
+ return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
|
|
|
}
|
|
|
|
|
|
riceParamMask = (drflac_uint32)~((~0UL) << riceParam);
|
|
|
pSamplesOutEnd = pSamplesOut + (count & ~3);
|
|
|
|
|
|
- if (bitsPerSample+shift > 32) {
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
while (pSamplesOut < pSamplesOutEnd) {
|
|
|
/*
|
|
|
Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
|
|
@@ -3699,10 +3761,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
|
|
|
riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
|
|
|
riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
|
|
|
|
|
|
- pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
|
|
|
- pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
|
|
|
- pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
|
|
|
- pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
|
|
|
+ pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
|
|
|
+ pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
|
|
|
+ pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
|
|
|
+ pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
|
|
|
|
|
|
pSamplesOut += 4;
|
|
|
}
|
|
@@ -3730,10 +3792,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
|
|
|
riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
|
|
|
riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
|
|
|
|
|
|
- pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
|
|
|
- pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
|
|
|
- pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
|
|
|
- pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
|
|
|
+ pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
|
|
|
+ pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
|
|
|
+ pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
|
|
|
+ pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
|
|
|
|
|
|
pSamplesOut += 4;
|
|
|
}
|
|
@@ -3753,10 +3815,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
|
|
|
/*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
|
|
|
|
|
|
/* Sample reconstruction. */
|
|
|
- if (bitsPerSample+shift > 32) {
|
|
|
- pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
+ pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
|
|
|
} else {
|
|
|
- pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
|
|
|
+ pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
|
|
|
}
|
|
|
|
|
|
i += 1;
|
|
@@ -4212,20 +4274,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac
|
|
|
return DRFLAC_TRUE;
|
|
|
}
|
|
|
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
DRFLAC_ASSERT(bs != NULL);
|
|
|
DRFLAC_ASSERT(pSamplesOut != NULL);
|
|
|
|
|
|
/* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
|
|
|
- if (order > 0 && order <= 12) {
|
|
|
- if (bitsPerSample+shift > 32) {
|
|
|
- return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ if (lpcOrder > 0 && lpcOrder <= 12) {
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
+ return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
|
|
|
} else {
|
|
|
- return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
|
|
|
}
|
|
|
} else {
|
|
|
- return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
}
|
|
|
}
|
|
|
#endif
|
|
@@ -4562,7 +4624,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
|
|
|
|
|
|
/*
|
|
|
Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
|
|
|
- what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
|
|
|
+ what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
|
|
|
in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
|
|
|
so I think there's opportunity for this to be simplified.
|
|
|
*/
|
|
@@ -4710,41 +4772,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
|
|
|
return DRFLAC_TRUE;
|
|
|
}
|
|
|
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
DRFLAC_ASSERT(bs != NULL);
|
|
|
DRFLAC_ASSERT(pSamplesOut != NULL);
|
|
|
|
|
|
/* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
|
|
|
- if (order > 0 && order <= 12) {
|
|
|
- if (bitsPerSample+shift > 32) {
|
|
|
- return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ if (lpcOrder > 0 && lpcOrder <= 12) {
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
+ return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
|
|
|
} else {
|
|
|
- return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
|
|
|
}
|
|
|
} else {
|
|
|
- return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
}
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
#if defined(DRFLAC_SUPPORT_SSE41)
|
|
|
if (drflac__gIsSSE41Supported) {
|
|
|
- return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
} else
|
|
|
#elif defined(DRFLAC_SUPPORT_NEON)
|
|
|
if (drflac__gIsNEONSupported) {
|
|
|
- return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
} else
|
|
|
#endif
|
|
|
{
|
|
|
/* Scalar fallback. */
|
|
|
#if 0
|
|
|
- return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
#else
|
|
|
- return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
|
|
|
+ return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
|
|
|
#endif
|
|
|
}
|
|
|
}
|
|
@@ -4765,7 +4827,10 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
|
|
|
return DRFLAC_TRUE;
|
|
|
}
|
|
|
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
+#if defined(__clang__)
|
|
|
+__attribute__((no_sanitize("signed-integer-overflow")))
|
|
|
+#endif
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
|
|
|
{
|
|
|
drflac_uint32 i;
|
|
|
|
|
@@ -4782,10 +4847,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
|
|
|
pSamplesOut[i] = 0;
|
|
|
}
|
|
|
|
|
|
- if (bitsPerSample >= 24) {
|
|
|
- pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
|
|
|
+ if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
|
|
|
+ pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
|
|
|
} else {
|
|
|
- pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
|
|
|
+ pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -4798,7 +4863,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin
|
|
|
when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
|
|
|
<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
|
|
|
*/
|
|
|
-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
|
|
|
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
|
|
|
{
|
|
|
drflac_uint8 residualMethod;
|
|
|
drflac_uint8 partitionOrder;
|
|
@@ -4818,7 +4883,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
|
|
|
}
|
|
|
|
|
|
/* Ignore the first <order> values. */
|
|
|
- pDecodedSamples += order;
|
|
|
+ pDecodedSamples += lpcOrder;
|
|
|
|
|
|
if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
|
|
|
return DRFLAC_FALSE;
|
|
@@ -4833,11 +4898,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
|
|
|
}
|
|
|
|
|
|
/* Validation check. */
|
|
|
- if ((blockSize / (1 << partitionOrder)) < order) {
|
|
|
+ if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
|
- samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
|
|
|
+ samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
|
|
|
partitionsRemaining = (1 << partitionOrder);
|
|
|
for (;;) {
|
|
|
drflac_uint8 riceParam = 0;
|
|
@@ -4858,7 +4923,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
|
|
|
}
|
|
|
|
|
|
if (riceParam != 0xFF) {
|
|
|
- if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
|
|
|
+ if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
} else {
|
|
@@ -4867,7 +4932,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
|
- if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
|
|
|
+ if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
}
|
|
@@ -5036,7 +5101,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
|
|
|
pDecodedSamples[i] = sample;
|
|
|
}
|
|
|
|
|
|
- if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
|
|
|
+ if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
@@ -5091,7 +5156,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
|
|
|
+ if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
@@ -5219,6 +5284,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
|
|
|
+ if (header->blockSizeInPCMFrames == 0xFFFF) {
|
|
|
+ return DRFLAC_FALSE; /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
|
|
|
+ }
|
|
|
header->blockSizeInPCMFrames += 1;
|
|
|
} else {
|
|
|
DRFLAC_ASSERT(blockSize >= 8);
|
|
@@ -5257,6 +5325,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
|
|
|
header->bitsPerSample = streaminfoBitsPerSample;
|
|
|
}
|
|
|
|
|
|
+ if (header->bitsPerSample != streaminfoBitsPerSample) {
|
|
|
+ /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
if (!drflac__read_uint8(bs, 8, &header->crc8)) {
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
@@ -5343,6 +5416,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
|
|
|
subframeBitsPerSample += 1;
|
|
|
}
|
|
|
|
|
|
+ if (subframeBitsPerSample > 32) {
|
|
|
+ /* libFLAC and ffmpeg reject 33-bit subframes as well */
|
|
|
+ return DRFLAC_FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
/* Need to handle wasted bits per sample. */
|
|
|
if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
|
|
|
return DRFLAC_FALSE;
|
|
@@ -6485,7 +6563,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
pRunningData = (const char*)pRawData;
|
|
|
pRunningDataEnd = (const char*)pRawData + blockSize;
|
|
|
|
|
|
- metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
|
|
|
/* Need space for the rest of the block */
|
|
|
if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
|
|
@@ -6493,7 +6571,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength;
|
|
|
- metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
|
|
|
/* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
|
|
|
if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
|
|
@@ -6511,7 +6589,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
|
|
|
- commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
|
|
|
drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
|
|
|
return DRFLAC_FALSE;
|
|
@@ -6620,8 +6698,8 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
pRunningData = (const char*)pRawData;
|
|
|
pRunningDataEnd = (const char*)pRawData + blockSize;
|
|
|
|
|
|
- metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
- metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.type = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
|
|
|
/* Need space for the rest of the block */
|
|
|
if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
|
|
@@ -6629,7 +6707,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength;
|
|
|
- metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
|
|
|
/* Need space for the rest of the block */
|
|
|
if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
|
|
@@ -6637,11 +6715,11 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
|
|
|
return DRFLAC_FALSE;
|
|
|
}
|
|
|
metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength;
|
|
|
- metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
- metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
- metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
- metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
- metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.width = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.height = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.colorDepth = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
+ metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
|
|
|
metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData;
|
|
|
|
|
|
/* Need space for the picture after the block */
|
|
@@ -7865,7 +7943,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
|
|
|
#ifndef DR_FLAC_NO_OGG
|
|
|
if (init.container == drflac_container_ogg) {
|
|
|
drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
|
|
|
- *pInternalOggbs = oggbs;
|
|
|
+ DRFLAC_COPY_MEMORY(pInternalOggbs, &oggbs, sizeof(oggbs));
|
|
|
|
|
|
/* The Ogg bistream needs to be layered on top of the original bitstream. */
|
|
|
pFlac->bs.onRead = drflac__on_read_ogg;
|
|
@@ -11786,7 +11864,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
- length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
|
|
|
+ length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
|
|
|
pIter->pRunningData += 4;
|
|
|
|
|
|
pComment = pIter->pRunningData;
|
|
@@ -11856,6 +11934,22 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
|
|
|
/*
|
|
|
REVISION HISTORY
|
|
|
================
|
|
|
+v0.12.38 - 2022-04-10
|
|
|
+ - Fix compilation error on older versions of GCC.
|
|
|
+
|
|
|
+v0.12.37 - 2022-02-12
|
|
|
+ - Improve ARM detection.
|
|
|
+
|
|
|
+v0.12.36 - 2022-02-07
|
|
|
+ - Fix a compilation error with the ARM build.
|
|
|
+
|
|
|
+v0.12.35 - 2022-02-06
|
|
|
+ - Fix a bug due to underestimating the amount of precision required for the prediction stage.
|
|
|
+ - Fix some bugs found from fuzz testing.
|
|
|
+
|
|
|
+v0.12.34 - 2022-01-07
|
|
|
+ - Fix some misalignment bugs when reading metadata.
|
|
|
+
|
|
|
v0.12.33 - 2021-12-22
|
|
|
- Fix a bug with seeking when the seek table does not start at PCM frame 0.
|
|
|
|