| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- /*
- lz4opt.h - Optimal Mode of LZ4
- Copyright (C) 2015-2016, Przemyslaw Skibinski <[email protected]>
- Note : this file is intended to be included within lz4hc.c
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- You can contact the author at :
- - LZ4 source repository : https://github.com/lz4/lz4
- - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
- */
- #define LZ4_OPT_NUM (1<<12)
- typedef struct
- {
- int off;
- int len;
- } LZ4HC_match_t;
- typedef struct
- {
- int price;
- int off;
- int mlen;
- int litlen;
- } LZ4HC_optimal_t;
- /* price in bits */
- FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
- {
- size_t price = 8*litlen;
- if (litlen >= (size_t)RUN_MASK) price+=8*(1+(litlen-RUN_MASK)/255);
- return price;
- }
- /* requires mlen >= MINMATCH */
- FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
- {
- size_t price = 16 + 8; /* 16-bit offset + token */
- price += LZ4HC_literalsPrice(litlen);
- mlen -= MINMATCH;
- if (mlen >= (size_t)ML_MASK) price+=8*(1+(mlen-ML_MASK)/255);
- return price;
- }
- /*-*************************************
- * Binary Tree search
- ***************************************/
- FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
- LZ4HC_CCtx_internal* ctx,
- const BYTE* const ip,
- const BYTE* const iHighLimit,
- size_t best_mlen,
- LZ4HC_match_t* matches,
- int* matchNum)
- {
- U16* const chainTable = ctx->chainTable;
- U32* const HashTable = ctx->hashTable;
- const BYTE* const base = ctx->base;
- const U32 dictLimit = ctx->dictLimit;
- const U32 current = (U32)(ip - base);
- const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
- const BYTE* const dictBase = ctx->dictBase;
- const BYTE* match;
- int nbAttempts = ctx->searchNum;
- int mnum = 0;
- U16 *ptr0, *ptr1, delta0, delta1;
- U32 matchIndex;
- size_t matchLength = 0;
- U32* HashPos;
- if (ip + MINMATCH > iHighLimit) return 1;
- /* HC4 match finder */
- HashPos = &HashTable[LZ4HC_hashPtr(ip)];
- matchIndex = *HashPos;
- *HashPos = current;
- ptr0 = &DELTANEXTMAXD(current*2+1);
- ptr1 = &DELTANEXTMAXD(current*2);
- delta0 = delta1 = (U16)(current - matchIndex);
- while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
- nbAttempts--;
- if (matchIndex >= dictLimit) {
- match = base + matchIndex;
- matchLength = LZ4_count(ip, match, iHighLimit);
- } else {
- const BYTE* vLimit = ip + (dictLimit - matchIndex);
- match = dictBase + matchIndex;
- if (vLimit > iHighLimit) vLimit = iHighLimit;
- matchLength = LZ4_count(ip, match, vLimit);
- if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
- matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
- }
- if (matchLength > best_mlen) {
- best_mlen = matchLength;
- if (matches) {
- if (matchIndex >= dictLimit)
- matches[mnum].off = (int)(ip - match);
- else
- matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
- matches[mnum].len = (int)matchLength;
- mnum++;
- }
- if (best_mlen > LZ4_OPT_NUM) break;
- }
- if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
- break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
- if (*(ip+matchLength) < *(match+matchLength)) {
- *ptr0 = delta0;
- ptr0 = &DELTANEXTMAXD(matchIndex*2);
- if (*ptr0 == (U16)-1) break;
- delta0 = *ptr0;
- delta1 += delta0;
- matchIndex -= delta0;
- } else {
- *ptr1 = delta1;
- ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
- if (*ptr1 == (U16)-1) break;
- delta1 = *ptr1;
- delta0 += delta1;
- matchIndex -= delta1;
- }
- }
- *ptr0 = (U16)-1;
- *ptr1 = (U16)-1;
- if (matchNum) *matchNum = mnum;
- /* if (best_mlen > 8) return best_mlen-8; */
- if (!matchNum) return 1;
- return 1;
- }
- FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
- {
- const BYTE* const base = ctx->base;
- const U32 target = (U32)(ip - base);
- U32 idx = ctx->nextToUpdate;
- while(idx < target)
- idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
- }
- /** Tree updater, providing best match */
- FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
- LZ4HC_CCtx_internal* ctx,
- const BYTE* const ip, const BYTE* const iHighLimit,
- size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
- {
- int mnum = 0;
- if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */
- if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
- best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
- ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
- return mnum;
- }
- #define SET_PRICE(pos, mlen, offset, litlen, price) \
- { \
- while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \
- opt[pos].mlen = (int)mlen; \
- opt[pos].off = (int)offset; \
- opt[pos].litlen = (int)litlen; \
- opt[pos].price = (int)price; \
- }
- static int LZ4HC_compress_optimal (
- LZ4HC_CCtx_internal* ctx,
- const char* const source,
- char* dest,
- int inputSize,
- int maxOutputSize,
- limitedOutput_directive limit,
- const size_t sufficient_len,
- const int fullUpdate
- )
- {
- LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1];
- LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
- const BYTE *inr = NULL;
- size_t res, cur, cur2;
- size_t i, llen, litlen, mlen, best_mlen, price, offset, best_off, match_num, last_pos;
- const BYTE* ip = (const BYTE*) source;
- const BYTE* anchor = ip;
- const BYTE* const iend = ip + inputSize;
- const BYTE* const mflimit = iend - MFLIMIT;
- const BYTE* const matchlimit = (iend - LASTLITERALS);
- BYTE* op = (BYTE*) dest;
- BYTE* const oend = op + maxOutputSize;
- /* init */
- ctx->end += inputSize;
- ip++;
- /* Main Loop */
- while (ip < mflimit) {
- memset(opt, 0, sizeof(LZ4HC_optimal_t));
- last_pos = 0;
- llen = ip - anchor;
- match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
- if (!match_num) { ip++; continue; }
- if ((size_t)matches[match_num-1].len > sufficient_len) {
- best_mlen = matches[match_num-1].len;
- best_off = matches[match_num-1].off;
- cur = 0;
- last_pos = 1;
- goto encode;
- }
- /* set prices using matches at position = 0 */
- for (i = 0; i < match_num; i++) {
- mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
- best_mlen = (matches[i].len < LZ4_OPT_NUM) ? matches[i].len : LZ4_OPT_NUM;
- while (mlen <= best_mlen) {
- litlen = 0;
- price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
- SET_PRICE(mlen, mlen, matches[i].off, litlen, price);
- mlen++;
- }
- }
- if (last_pos < MINMATCH) { ip++; continue; }
- /* check further positions */
- opt[0].mlen = opt[1].mlen = 1;
- for (cur = 1; cur <= last_pos; cur++) {
- inr = ip + cur;
- if (opt[cur-1].mlen == 1) {
- litlen = opt[cur-1].litlen + 1;
- if (cur != litlen) {
- price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
- } else {
- price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
- }
- } else {
- litlen = 1;
- price = opt[cur - 1].price + LZ4HC_literalsPrice(litlen);
- }
- mlen = 1;
- best_mlen = 0;
- if (cur > last_pos || price < (size_t)opt[cur].price)
- SET_PRICE(cur, mlen, best_mlen, litlen, price);
- if (cur == last_pos || inr >= mflimit) break;
- match_num = LZ4HC_BinTree_GetAllMatches(ctx, inr, matchlimit, MINMATCH-1, matches, fullUpdate);
- if (match_num > 0 && (size_t)matches[match_num-1].len > sufficient_len) {
- best_mlen = matches[match_num-1].len;
- best_off = matches[match_num-1].off;
- last_pos = cur + 1;
- goto encode;
- }
- /* set prices using matches at position = cur */
- for (i = 0; i < match_num; i++) {
- mlen = (i>0) ? (size_t)matches[i-1].len+1 : MINMATCH;
- cur2 = cur;
- best_mlen = (cur2 + matches[i].len < LZ4_OPT_NUM) ? (size_t)matches[i].len : LZ4_OPT_NUM - cur2;
- while (mlen <= best_mlen) {
- if (opt[cur2].mlen == 1) {
- litlen = opt[cur2].litlen;
- if (cur2 != litlen)
- price = opt[cur2 - litlen].price + LZ4HC_sequencePrice(litlen, mlen);
- else
- price = LZ4HC_sequencePrice(llen + litlen, mlen) - LZ4HC_literalsPrice(llen);
- } else {
- litlen = 0;
- price = opt[cur2].price + LZ4HC_sequencePrice(litlen, mlen);
- }
- if (cur2 + mlen > last_pos || price < (size_t)opt[cur2 + mlen].price) { // || (((int)price == opt[cur2 + mlen].price) && (opt[cur2 + mlen-1].mlen == 1))) {
- SET_PRICE(cur2 + mlen, mlen, matches[i].off, litlen, price);
- }
- mlen++;
- }
- }
- } /* for (cur = 1; cur <= last_pos; cur++) */
- best_mlen = opt[last_pos].mlen;
- best_off = opt[last_pos].off;
- cur = last_pos - best_mlen;
- encode: /* cur, last_pos, best_mlen, best_off have to be set */
- opt[0].mlen = 1;
- while (1) {
- mlen = opt[cur].mlen;
- offset = opt[cur].off;
- opt[cur].mlen = (int)best_mlen;
- opt[cur].off = (int)best_off;
- best_mlen = mlen;
- best_off = offset;
- if (mlen > cur) break;
- cur -= mlen;
- }
- cur = 0;
- while (cur < last_pos) {
- mlen = opt[cur].mlen;
- if (mlen == 1) { ip++; cur++; continue; }
- offset = opt[cur].off;
- cur += mlen;
- res = LZ4HC_encodeSequence(&ip, &op, &anchor, (int)mlen, ip - offset, limit, oend);
- if (res) return 0;
- }
- }
- /* Encode Last Literals */
- { int lastRun = (int)(iend - anchor);
- if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
- if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
- else *op++ = (BYTE)(lastRun<<ML_BITS);
- memcpy(op, anchor, iend - anchor);
- op += iend-anchor;
- }
- /* End */
- return (int) ((char*)op-dest);
- }
|