|
|
@@ -63,6 +63,10 @@
|
|
|
#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
|
|
|
#endif
|
|
|
|
|
|
+#ifndef __has_builtin
|
|
|
+#define __has_builtin(x) 0
|
|
|
+#endif
|
|
|
+
|
|
|
namespace meshopt
|
|
|
{
|
|
|
|
|
|
@@ -185,9 +189,7 @@ inline uint64_t rotateleft64(uint64_t v, int x)
|
|
|
{
|
|
|
#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
return _rotl64(v, x);
|
|
|
-// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for
|
|
|
-// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
|
|
|
-#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11)
|
|
|
+#elif defined(__clang__) && __has_builtin(__builtin_rotateleft64)
|
|
|
return __builtin_rotateleft64(v, x);
|
|
|
#else
|
|
|
return (v << (x & 63)) | (v >> ((64 - x) & 63));
|
|
|
@@ -791,6 +793,33 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+// optimized variant of frexp
|
|
|
+inline int optlog2(float v)
|
|
|
+{
|
|
|
+ union
|
|
|
+ {
|
|
|
+ float f;
|
|
|
+ unsigned int ui;
|
|
|
+ } u;
|
|
|
+
|
|
|
+ u.f = v;
|
|
|
+ // +1 accounts for implicit 1. in mantissa; denormalized numbers will end up clamped to min_exp by calling code
|
|
|
+ return u.ui == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1;
|
|
|
+}
|
|
|
+
|
|
|
+// optimized variant of ldexp
|
|
|
+inline float optexp2(int e)
|
|
|
+{
|
|
|
+ union
|
|
|
+ {
|
|
|
+ float f;
|
|
|
+ unsigned int ui;
|
|
|
+ } u;
|
|
|
+
|
|
|
+ u.ui = unsigned(e + 127) << 23;
|
|
|
+ return u.f;
|
|
|
+}
|
|
|
+
|
|
|
} // namespace meshopt
|
|
|
|
|
|
void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride)
|
|
|
@@ -918,39 +947,78 @@ void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, i
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data)
|
|
|
+void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode)
|
|
|
{
|
|
|
- assert(stride > 0 && stride % 4 == 0);
|
|
|
+ using namespace meshopt;
|
|
|
+
|
|
|
+ assert(stride > 0 && stride % 4 == 0 && stride <= 256);
|
|
|
assert(bits >= 1 && bits <= 24);
|
|
|
|
|
|
unsigned int* destination = static_cast<unsigned int*>(destination_);
|
|
|
size_t stride_float = stride / sizeof(float);
|
|
|
|
|
|
+ int component_exp[64];
|
|
|
+ assert(stride_float <= sizeof(component_exp) / sizeof(int));
|
|
|
+
|
|
|
+ const int min_exp = -100;
|
|
|
+
|
|
|
+ if (mode == meshopt_EncodeExpSharedComponent)
|
|
|
+ {
|
|
|
+ for (size_t j = 0; j < stride_float; ++j)
|
|
|
+ component_exp[j] = min_exp;
|
|
|
+
|
|
|
+ for (size_t i = 0; i < count; ++i)
|
|
|
+ {
|
|
|
+ const float* v = &data[i * stride_float];
|
|
|
+
|
|
|
+ // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1]
|
|
|
+ for (size_t j = 0; j < stride_float; ++j)
|
|
|
+ {
|
|
|
+ int e = optlog2(v[j]);
|
|
|
+
|
|
|
+ component_exp[j] = (component_exp[j] < e) ? e : component_exp[j];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
for (size_t i = 0; i < count; ++i)
|
|
|
{
|
|
|
const float* v = &data[i * stride_float];
|
|
|
unsigned int* d = &destination[i * stride_float];
|
|
|
|
|
|
- // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1]
|
|
|
- int exp = -100;
|
|
|
+ int vector_exp = min_exp;
|
|
|
|
|
|
- for (size_t j = 0; j < stride_float; ++j)
|
|
|
+ if (mode == meshopt_EncodeExpSharedVector)
|
|
|
{
|
|
|
- int e;
|
|
|
- frexp(v[j], &e);
|
|
|
+ // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1]
|
|
|
+ for (size_t j = 0; j < stride_float; ++j)
|
|
|
+ {
|
|
|
+ int e = optlog2(v[j]);
|
|
|
|
|
|
- exp = (exp < e) ? e : exp;
|
|
|
+ vector_exp = (vector_exp < e) ? e : vector_exp;
|
|
|
+ }
|
|
|
}
|
|
|
+ else if (mode == meshopt_EncodeExpSeparate)
|
|
|
+ {
|
|
|
+ for (size_t j = 0; j < stride_float; ++j)
|
|
|
+ {
|
|
|
+ int e = optlog2(v[j]);
|
|
|
|
|
|
- // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude)
|
|
|
- exp -= (bits - 1);
|
|
|
-
|
|
|
- // compute renormalized rounded mantissa for each component
|
|
|
- int mmask = (1 << 24) - 1;
|
|
|
+ component_exp[j] = (min_exp < e) ? e : min_exp;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
for (size_t j = 0; j < stride_float; ++j)
|
|
|
{
|
|
|
- int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f));
|
|
|
+ int exp = (mode == meshopt_EncodeExpSharedVector) ? vector_exp : component_exp[j];
|
|
|
+
|
|
|
+ // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude)
|
|
|
+ exp -= (bits - 1);
|
|
|
+
|
|
|
+ // compute renormalized rounded mantissa for each component
|
|
|
+ int mmask = (1 << 24) - 1;
|
|
|
+
|
|
|
+ int m = int(v[j] * optexp2(-exp) + (v[j] >= 0 ? 0.5f : -0.5f));
|
|
|
|
|
|
d[j] = (m & mmask) | (unsigned(exp) << 24);
|
|
|
}
|