| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516 |
- // TODO(syoyo) Report fatal error
- //
- // Copyright 2017 Pixar
- //
- // Licensed under the Apache License, Version 2.0 (the "Apache License")
- // with the following modification; you may not use this file except in
- // compliance with the Apache License and the following modification to it:
- // Section 6. Trademarks. is deleted and replaced with:
- //
- // 6. Trademarks. This License does not grant permission to use the trade
- // names, trademarks, service marks, or product names of the Licensor
- // and its affiliates, except as required to comply with Section 4(c) of
- // the License and to reproduce the content of the NOTICE file.
- //
- // You may obtain a copy of the Apache License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the Apache License with the above modification is
- // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- // KIND, either express or implied. See the Apache License for the specific
- // language governing permissions and limitations under the Apache License.
- //
- //#include "pxr/pxr.h"
- //#include "pxr/base/tf/diagnostic.h"
- //#include "pxr/base/tf/fastCompression.h"
- //#include "pxr/usd/usd/integerCoding.h"
- #include "lz4-compression.hh"
- #include "integerCoding.h"
- #include <cstdint>
- #include <cstring>
- #include <limits>
- #include <memory>
- #include <unordered_map>
- //PXR_NAMESPACE_OPEN_SCOPE
- namespace tinyusdz {
- /*
- These integer coding & compression routines are tailored for what are typically
- lists of indexes into other tables. The binary "usdc" file format has lots of
- these in its "structural sections" that define the object hierarchy.
- The basic idea is to take a contiguous list of 32-bit integers and encode them
- in a buffer that is not only smaller, but also still quite compressible by a
- general compression algorithm, and then compress that buffer to produce a final
- result. Decompression proceeds by going in reverse. The general compressor is
- LZ4 via TfFastCompression. The integer coding scheme implemented here is
- described below.
- We encode a list of integers as follows. First we transform the input to
- produce a new list of integers where each element is the difference between it
- and the previous integer in the input sequence. This is the sequence we encode.
- Next we find the most common value in the sequence and write it to the output.
- Then we write 2-bit codes, one for each integer, classifying it. Finally we
- write a variable length section of integer data. The decoder uses the 2-bit
- codes to understand how to interpret this variable length data.
- Given a list of integers, say:
- input = [123, 124, 125, 100125, 100125, 100126, 100126]
- We encode as follows. First, we transform the list to be the list of
- differences to the previous integer, or the integer itself for the first element
- in the list (this can be considered a difference to 0) to get:
- input_diffs = [123, 1, 1, 100000, 0, 1, 0]
- Then we find the most commonly occurring value in this sequence, which is '1'.
- We write this most commonly occurring value into the output stream.
- output = [int32(1)]
- Next we write two sections, first a fixed length section, 2-bit codes per
- integer, followed by a variable length section of integer data. The two bit
- code indicates what "kind" of integer we have:
- 00: The most common value
- 01: 8-bit integer
- 10: 16-bit integer
- 11: 32-bit integer
- For our example this gives:
- input = [123, 124, 125, 100125, 100125, 100126, 10026]
- output = [int32(1) 01 00 00 11 01 00 01 XX int8(123) int32(100000) int8(0) int8(0)]
- Where 'XX' represents unused bits in the last byte of the codes section to round
- up to an even number of bytes.
- In this case the output size is 12 bytes compared to the original input which
- was 28 bytes. In the best possible case the output is (asymptotically) 2 bits
- per integer (6.25% the original size), in the worst possible case it is
- (asymptotically) 34 bits per integer (106.25% the original size).
- */
- namespace {
- template <class Int>
- inline typename std::enable_if<
- std::is_integral<Int>::value &&
- std::is_unsigned<Int>::value &&
- sizeof(Int) == 4,
- int32_t>::type
- _Signed(Int x)
- {
- if (x <= static_cast<uint32_t>(INT32_MAX))
- return static_cast<int32_t>(x);
- if (x >= static_cast<uint32_t>(INT32_MIN))
- return static_cast<int32_t>(x - INT32_MIN) + INT32_MIN;
- //TF_FATAL_ERROR("Unsupported C++ integer representation");
- return 0;
- }
- template <class Int>
- inline typename std::enable_if<
- std::is_integral<Int>::value &&
- std::is_signed<Int>::value &&
- sizeof(Int) == 4,
- int32_t>::type
- _Signed(Int x)
- {
- return x;
- }
- template <class Int>
- inline typename std::enable_if<
- std::is_integral<Int>::value &&
- std::is_unsigned<Int>::value &&
- sizeof(Int) == 8,
- int64_t>::type
- _Signed(Int x)
- {
- if (x <= static_cast<uint64_t>(INT64_MAX))
- return static_cast<int64_t>(x);
- if (x >= static_cast<uint64_t>(INT64_MIN))
- return static_cast<int64_t>(x - INT64_MIN) + INT64_MIN;
- //TF_FATAL_ERROR("Unsupported C++ integer representation");
- return 0;
- }
- template <class Int>
- inline typename std::enable_if<
- std::is_integral<Int>::value &&
- std::is_signed<Int>::value &&
- sizeof(Int) == 8,
- int64_t>::type
- _Signed(Int x)
- {
- return x;
- }
- template <class T>
- inline char *_WriteBits(char *p, T val)
- {
- memcpy(p, &val, sizeof(val));
- return p + sizeof(val);
- }
- template <class T>
- inline T _ReadBits(char const *&p)
- {
- T ret;
- memcpy(&ret, p, sizeof(ret));
- p += sizeof(ret);
- return ret;
- }
- template <class Int>
- constexpr size_t
- _GetEncodedBufferSize(size_t numInts)
- {
- // Calculate encoded integer size.
- return numInts ?
- /* commonValue */ (sizeof(Int)) +
- /* numCodesBytes */ ((numInts * 2 + 7) / 8) +
- /* maxIntBytes */ (numInts * sizeof(Int))
- : 0;
- }
- template <class Int>
- struct _SmallTypes
- {
- typedef typename std::conditional<
- sizeof(Int) == 4, int8_t, int16_t>::type SmallInt;
- typedef typename std::conditional<
- sizeof(Int) == 4, int16_t, int32_t>::type MediumInt;
- };
- template <int N, class Iterator>
- void _EncodeNHelper(
- Iterator &cur,
- typename std::iterator_traits<Iterator>::value_type commonValue,
- typename std::make_signed<
- typename std::iterator_traits<Iterator>::value_type
- >::type &prevVal,
- char *&codesOut,
- char *&vintsOut) {
- using Int = typename std::iterator_traits<Iterator>::value_type;
- using SInt = typename std::make_signed<Int>::type;
- using SmallInt = typename _SmallTypes<Int>::SmallInt;
- using MediumInt = typename _SmallTypes<Int>::MediumInt;
- static_assert(1 <= N && N <= 4, "");
- enum Code { Common, Small, Medium, Large };
-
- auto getCode = [commonValue](SInt x) {
- std::numeric_limits<SmallInt> smallLimit;
- std::numeric_limits<MediumInt> mediumLimit;
- if (x == _Signed(commonValue)) { return Common; }
- if (x >= smallLimit.min() && x <= smallLimit.max()) { return Small; }
- if (x >= mediumLimit.min() && x <= mediumLimit.max()) { return Medium; }
- return Large;
- };
- uint8_t codeByte = 0;
- for (int i = 0; i != N; ++i) {
- SInt val = _Signed(*cur) - prevVal;
- prevVal = _Signed(*cur++);
- Code code = getCode(val);
- codeByte |= (code << (2 * i));
- switch (code) {
- default:
- case Common:
- break;
- case Small:
- vintsOut = _WriteBits(vintsOut, static_cast<SmallInt>(val));
- break;
- case Medium:
- vintsOut = _WriteBits(vintsOut, static_cast<MediumInt>(val));
- break;
- case Large:
- vintsOut = _WriteBits(vintsOut, _Signed(val));
- break;
- };
- }
- codesOut = _WriteBits(codesOut, codeByte);
- }
- template <int N, class Iterator>
- void _DecodeNHelper(
- char const *&codesIn,
- char const *&vintsIn,
- typename std::iterator_traits<Iterator>::value_type commonValue,
- typename std::make_signed<
- typename std::iterator_traits<Iterator>::value_type
- >::type &prevVal,
- Iterator &output)
- {
- using Int = typename std::iterator_traits<Iterator>::value_type;
- using SInt = typename std::make_signed<Int>::type;
- using SmallInt = typename _SmallTypes<Int>::SmallInt;
- using MediumInt = typename _SmallTypes<Int>::MediumInt;
- enum Code { Common, Small, Medium, Large };
- uint8_t codeByte = *codesIn++;
- for (int i = 0; i != N; ++i) {
- switch ((codeByte & (3 << (2 * i))) >> (2 * i)) {
- default:
- case Common:
- prevVal += commonValue;
- break;
- case Small:
- prevVal += _ReadBits<SmallInt>(vintsIn);
- break;
- case Medium:
- prevVal += _ReadBits<MediumInt>(vintsIn);
- break;
- case Large:
- prevVal += _ReadBits<SInt>(vintsIn);
- break;
- }
- *output++ = static_cast<Int>(prevVal);
- }
- }
- template <class Int>
- size_t
- _EncodeIntegers(Int const *begin, size_t numInts, char *output)
- {
- using SInt = typename std::make_signed<Int>::type;
-
- if (numInts == 0)
- return 0;
- // First find the most common element value.
- SInt commonValue = 0;
- {
- size_t commonCount = 0;
- std::unordered_map<SInt, size_t> counts;
- SInt prevVal = 0;
- for (Int const *cur = begin, *end = begin + numInts;
- cur != end; ++cur) {
- SInt val = _Signed(*cur) - prevVal;
- const size_t count = ++counts[val];
- if (count > commonCount) {
- commonValue = val;
- commonCount = count;
- } else if (count == commonCount && val > commonValue) {
- // Take the largest common value in case of a tie -- this gives
- // the biggest potential savings in the encoded stream.
- commonValue = val;
- }
- prevVal = _Signed(*cur);
- }
- }
- // Now code the values.
-
- // Write most common value.
- char *p = _WriteBits(output, commonValue);
- char *codesOut = p;
- char *vintsOut = p + (numInts * 2 + 7) / 8;
- Int const *cur = begin;
- SInt prevVal = 0;
- while (numInts >= 4) {
- _EncodeNHelper<4>(cur, commonValue, prevVal, codesOut, vintsOut);
- numInts -= 4;
- }
- switch (numInts) {
- case 0: default: break;
- case 1: _EncodeNHelper<1>(cur, commonValue, prevVal, codesOut, vintsOut);
- break;
- case 2: _EncodeNHelper<2>(cur, commonValue, prevVal, codesOut, vintsOut);
- break;
- case 3: _EncodeNHelper<3>(cur, commonValue, prevVal, codesOut, vintsOut);
- break;
- };
-
- return vintsOut - output;
- }
- template <class Int>
- size_t _DecodeIntegers(char const *data, size_t numInts, Int *result)
- {
- using SInt = typename std::make_signed<Int>::type;
- auto commonValue = _ReadBits<SInt>(data);
- size_t numCodesBytes = (numInts * 2 + 7) / 8;
- char const *codesIn = data;
- char const *vintsIn = data + numCodesBytes;
- SInt prevVal = 0;
- auto intsLeft = numInts;
- while (intsLeft >= 4) {
- _DecodeNHelper<4>(codesIn, vintsIn, commonValue, prevVal, result);
- intsLeft -= 4;
- }
- switch (intsLeft) {
- case 0: default: break;
- case 1: _DecodeNHelper<1>(codesIn, vintsIn, commonValue, prevVal, result);
- break;
- case 2: _DecodeNHelper<2>(codesIn, vintsIn, commonValue, prevVal, result);
- break;
- case 3: _DecodeNHelper<3>(codesIn, vintsIn, commonValue, prevVal, result);
- break;
- };
- return numInts;
- }
- template <class Int>
- size_t
- _CompressIntegers(Int const *begin, size_t numInts, char *output, std::string *err)
- {
- // Working space.
- std::unique_ptr<char[]>
- encodeBuffer(new char[_GetEncodedBufferSize<Int>(numInts)]);
-
- // Encode first.
- size_t encodedSize = _EncodeIntegers(begin, numInts, encodeBuffer.get());
- // Then compress.
- return LZ4Compression::CompressToBuffer(
- encodeBuffer.get(), output, encodedSize, err);
- }
- template <class Int>
- size_t _DecompressIntegers(char const *compressed, size_t compressedSize,
- Int *ints, size_t numInts, std::string *err, char *workingSpace)
- {
- // Working space.
- size_t workingSpaceSize =
- Usd_IntegerCompression::GetDecompressionWorkingSpaceSize(numInts);
- std::unique_ptr<char[]> tmpSpace;
- if (!workingSpace) {
- tmpSpace.reset(new char[workingSpaceSize]);
- workingSpace = tmpSpace.get();
- }
- size_t decompSz = LZ4Compression::DecompressFromBuffer(
- compressed, workingSpace, compressedSize, workingSpaceSize, err);
- if (decompSz == 0)
- return 0;
- return _DecodeIntegers(workingSpace, numInts, ints);
- }
- } // anon
- ////////////////////////////////////////////////////////////////////////
- // 32 bit.
- size_t
- Usd_IntegerCompression::GetCompressedBufferSize(size_t numInts)
- {
- return LZ4Compression::GetCompressedBufferSize(
- _GetEncodedBufferSize<int32_t>(numInts));
- }
- size_t
- Usd_IntegerCompression::GetDecompressionWorkingSpaceSize(size_t numInts)
- {
- return _GetEncodedBufferSize<int32_t>(numInts);
- }
- size_t
- Usd_IntegerCompression::CompressToBuffer(
- int32_t const *ints, size_t numInts, char *compressed, std::string *err)
- {
- return _CompressIntegers(ints, numInts, compressed, err);
- }
- size_t
- Usd_IntegerCompression::CompressToBuffer(
- uint32_t const *ints, size_t numInts, char *compressed, std::string *err)
- {
- return _CompressIntegers(ints, numInts, compressed, err);
- }
- size_t
- Usd_IntegerCompression::DecompressFromBuffer(
- char const *compressed, size_t compressedSize,
- int32_t *ints, size_t numInts, std::string *err, char *workingSpace)
- {
- return _DecompressIntegers(compressed, compressedSize,
- ints, numInts, err, workingSpace);
- }
- size_t
- Usd_IntegerCompression::DecompressFromBuffer(
- char const *compressed, size_t compressedSize,
- uint32_t *ints, size_t numInts, std::string *err, char *workingSpace)
- {
- return _DecompressIntegers(compressed, compressedSize,
- ints, numInts, err, workingSpace);
- }
- ////////////////////////////////////////////////////////////////////////
- // 64 bit.
- size_t
- Usd_IntegerCompression64::GetCompressedBufferSize(size_t numInts)
- {
- return LZ4Compression::GetCompressedBufferSize(
- _GetEncodedBufferSize<int64_t>(numInts));
- }
- size_t
- Usd_IntegerCompression64::GetDecompressionWorkingSpaceSize(size_t numInts)
- {
- return _GetEncodedBufferSize<int64_t>(numInts);
- }
- size_t
- Usd_IntegerCompression64::CompressToBuffer(
- int64_t const *ints, size_t numInts, char *compressed, std::string *err)
- {
- return _CompressIntegers(ints, numInts, compressed, err);
- }
- size_t
- Usd_IntegerCompression64::CompressToBuffer(
- uint64_t const *ints, size_t numInts, char *compressed, std::string *err)
- {
- return _CompressIntegers(ints, numInts, compressed, err);
- }
- size_t
- Usd_IntegerCompression64::DecompressFromBuffer(
- char const *compressed, size_t compressedSize,
- int64_t *ints, size_t numInts, std::string *err, char *workingSpace)
- {
- return _DecompressIntegers(compressed, compressedSize,
- ints, numInts, err, workingSpace);
- }
- size_t
- Usd_IntegerCompression64::DecompressFromBuffer(
- char const *compressed, size_t compressedSize,
- uint64_t *ints, size_t numInts, std::string *err, char *workingSpace)
- {
- return _DecompressIntegers(compressed, compressedSize,
- ints, numInts, err, workingSpace);
- }
- //PXR_NAMESPACE_CLOSE_SCOPE
- } // namespace tinyusdz
|