Browse Source

Revert "UPDATED: `sdefl` and `sinfl` compression libraries"

This reverts commit e190b7eee9199b681a8c50fb69f2fce07e92c7af.
Ray 2 năm trước cách đây
mục cha
commit
7f21cf1dcf
2 tập tin đã thay đổi với 68 bổ sung155 xóa
  1. 57 138
      src/external/sdefl.h
  2. 11 17
      src/external/sinfl.h

+ 57 - 138
src/external/sdefl.h

@@ -71,7 +71,7 @@ Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index
 This software is available under 2 licenses -- choose whichever you prefer.
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
-Copyright (c) 2020-2023 Micha Mettke
+Copyright (c) 2020 Micha Mettke
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
@@ -125,7 +125,7 @@ extern "C" {
 
 #define SDEFL_MIN_MATCH 4
 #define SDEFL_BLK_MAX   (256*1024)
-#define SDEFL_SEQ_SIZ   ((SDEFL_BLK_MAX+2)/3)
+#define SDEFL_SEQ_SIZ   ((SDEFL_BLK_MAX + SDEFL_MIN_MATCH)/SDEFL_MIN_MATCH)
 
 #define SDEFL_SYM_MAX   (288)
 #define SDEFL_OFF_MAX   (32)
@@ -185,7 +185,6 @@ extern int zsdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl);
 #define SDEFL_MAX_CODE_LEN      (15)
 #define SDEFL_SYM_BITS          (10u)
 #define SDEFL_SYM_MSK           ((1u << SDEFL_SYM_BITS)-1u)
-#define SDEFL_RAW_BLK_SIZE      (65535)
 #define SDEFL_LIT_LEN_CODES     (14)
 #define SDEFL_OFF_CODES         (15)
 #define SDEFL_PRE_CODES         (7)
@@ -193,7 +192,6 @@ extern int zsdeflate(struct sdefl *s, void *o, const void *i, int n, int lvl);
 #define SDEFL_EOB               (256)
 
 #define sdefl_npow2(n) (1 << (sdefl_ilog2((n)-1) + 1))
-#define sdefl_div_round_up(n,d) (((n)+((d)-1))/(d))
 
 static int
 sdefl_ilog2(int n) {
@@ -440,12 +438,12 @@ sdefl_precode(struct sdefl_symcnt *cnt, unsigned *freqs, unsigned *items,
   } while (run_start != total);
   cnt->items = (int)(at - items);
 }
-struct sdefl_match_codest {
+struct sdefl_match_codes {
   int ls, lc;
   int dc, dx;
 };
 static void
-sdefl_match_codes(struct sdefl_match_codest *cod, int dist, int len) {
+sdefl_match_codes(struct sdefl_match_codes *cod, int dist, int len) {
   static const short dxmax[] = {0,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576};
   static const unsigned char lslot[258+1] = {
     0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12,
@@ -473,44 +471,6 @@ sdefl_match_codes(struct sdefl_match_codest *cod, int dist, int len) {
   cod->dx = sdefl_ilog2(sdefl_npow2(dist) >> 2);
   cod->dc = cod->dx ? ((cod->dx + 1) << 1) + (dist > dxmax[cod->dx]) : dist-1;
 }
-enum sdefl_blk_type {
-  SDEFL_BLK_UCOMPR,
-  SDEFL_BLK_DYN
-};
-static enum sdefl_blk_type
-sdefl_blk_type(const struct sdefl *s, int blk_len, int pre_item_len,
-               const unsigned *pre_freq, const unsigned char *pre_len) {
-  static const unsigned char x_pre_bits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
-  static const unsigned char x_len_bits[] = {0,0,0,0,0,0,0,0, 1,1,1,1,2,2,2,2,
-    3,3,3,3,4,4,4,4, 5,5,5,5,0};
-  static const unsigned char x_off_bits[] = {0,0,0,0,1,1,2,2, 3,3,4,4,5,5,6,6,
-    7,7,8,8,9,9,10,10, 11,11,12,12,13,13};
-
-  int dyn_cost = 0;
-  int fix_cost = 0;
-  int sym = 0;
-
-  dyn_cost += 5 + 5 + 4 + (3 * pre_item_len);
-  for (sym = 0; sym < SDEFL_PRE_MAX; sym++)
-    dyn_cost += pre_freq[sym] * (x_pre_bits[sym] + pre_len[sym]);
-  for (sym = 0; sym < 256; sym++)
-    dyn_cost += s->freq.lit[sym] * s->cod.len.lit[sym];
-  dyn_cost += s->cod.len.lit[SDEFL_EOB];
-  for (sym = 257; sym < 286; sym++)
-    dyn_cost += s->freq.lit[sym] * (x_len_bits[sym - 257] + s->cod.len.lit[sym]);
-  for (sym = 0; sym < 30; sym++)
-    dyn_cost += s->freq.off[sym] * (x_off_bits[sym] + s->cod.len.off[sym]);
-
-  fix_cost += 8*(5 * sdefl_div_round_up(blk_len, SDEFL_RAW_BLK_SIZE) + blk_len + 1 + 2);
-  return (dyn_cost < fix_cost) ? SDEFL_BLK_DYN : SDEFL_BLK_UCOMPR;
-}
-static void
-sdefl_put16(unsigned char **dst, unsigned short x) {
-  unsigned char *val = *dst;
-  val[0] = (unsigned char)(x & 0xff);
-  val[1] = (unsigned char)(x >> 8);
-  *dst = val + 2;
-}
 static void
 sdefl_match(unsigned char **dst, struct sdefl *s, int dist, int len) {
   static const char lxn[] = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
@@ -519,7 +479,7 @@ sdefl_match(unsigned char **dst, struct sdefl *s, int dist, int len) {
   static const short dmin[] = {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,
       385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
 
-  struct sdefl_match_codest cod;
+  struct sdefl_match_codes cod;
   sdefl_match_codes(&cod, dist, len);
   sdefl_put(dst, s, (int)s->cod.word.lit[cod.lc], s->cod.len.lit[cod.lc]);
   sdefl_put(dst, s, len - lmin[cod.ls], lxn[cod.ls]);
@@ -528,8 +488,7 @@ sdefl_match(unsigned char **dst, struct sdefl *s, int dist, int len) {
 }
 static void
 sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last,
-            const unsigned char *in, int blk_begin, int blk_end) {
-  int blk_len = blk_end - blk_begin;
+            const unsigned char *in) {
   int j, i = 0, item_cnt = 0;
   struct sdefl_symcnt symcnt = {0};
   unsigned codes[SDEFL_PRE_MAX];
@@ -539,7 +498,7 @@ sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last,
   static const unsigned char perm[SDEFL_PRE_MAX] = {16,17,18,0,8,7,9,6,10,5,11,
       4,12,3,13,2,14,1,15};
 
-  /* calculate huffman codes */
+  /* huffman codes */
   s->freq.lit[SDEFL_EOB]++;
   sdefl_huff(s->cod.len.lit, s->cod.word.lit, s->freq.lit, SDEFL_SYM_MAX, SDEFL_LIT_LEN_CODES);
   sdefl_huff(s->cod.len.off, s->cod.word.off, s->freq.off, SDEFL_OFF_MAX, SDEFL_OFF_CODES);
@@ -550,58 +509,35 @@ sdefl_flush(unsigned char **dst, struct sdefl *s, int is_last,
       break;
     }
   }
-  /* write block */
-  switch (sdefl_blk_type(s, blk_len, item_cnt, freqs, lens)) {
-  case SDEFL_BLK_UCOMPR: {
-    /* uncompressed blocks */
-    int n = sdefl_div_round_up(blk_len, SDEFL_RAW_BLK_SIZE);
-    for (i = 0; i < n; ++i) {
-      int fin = is_last && (i + 1 == n);
-      int amount = blk_len < SDEFL_RAW_BLK_SIZE ? blk_len : SDEFL_RAW_BLK_SIZE;
-      sdefl_put(dst, s, !!fin, 1); /* block */
-      sdefl_put(dst, s, 0x00, 2); /* stored block */
-      if (s->bitcnt) {
-        sdefl_put(dst, s, 0x00, 8 - s->bitcnt);
-      }
-      assert(s->bitcnt == 0);
-      sdefl_put16(dst, (unsigned short)amount);
-      sdefl_put16(dst, ~(unsigned short)amount);
-      memcpy(*dst, in + blk_begin + i * SDEFL_RAW_BLK_SIZE, amount);
-      *dst = *dst + amount;
-      blk_len -= amount;
-    }
-  } break;
-  case SDEFL_BLK_DYN: {
-    /* dynamic huffman block */
-    sdefl_put(dst, s, !!is_last, 1); /* block */
-    sdefl_put(dst, s, 0x02, 2); /* dynamic huffman */
-    sdefl_put(dst, s, symcnt.lit - 257, 5);
-    sdefl_put(dst, s, symcnt.off - 1, 5);
-    sdefl_put(dst, s, item_cnt - 4, 4);
-    for (i = 0; i < item_cnt; ++i) {
-      sdefl_put(dst, s, lens[perm[i]], 3);
-    }
-    for (i = 0; i < symcnt.items; ++i) {
-      unsigned sym = items[i] & 0x1F;
-      sdefl_put(dst, s, (int)codes[sym], lens[sym]);
-      if (sym < 16) continue;
-      if (sym == 16) sdefl_put(dst, s, items[i] >> 5, 2);
-      else if(sym == 17) sdefl_put(dst, s, items[i] >> 5, 3);
-      else sdefl_put(dst, s, items[i] >> 5, 7);
-    }
-    /* block sequences */
-    for (i = 0; i < s->seq_cnt; ++i) {
-      if (s->seq[i].off >= 0) {
-        for (j = 0; j < s->seq[i].len; ++j) {
-          int c = in[s->seq[i].off + j];
-          sdefl_put(dst, s, (int)s->cod.word.lit[c], s->cod.len.lit[c]);
-        }
-      } else {
-        sdefl_match(dst, s, -s->seq[i].off, s->seq[i].len);
+  /* block header */
+  sdefl_put(dst, s, is_last ? 0x01 : 0x00, 1); /* block */
+  sdefl_put(dst, s, 0x02, 2); /* dynamic huffman */
+  sdefl_put(dst, s, symcnt.lit - 257, 5);
+  sdefl_put(dst, s, symcnt.off - 1, 5);
+  sdefl_put(dst, s, item_cnt - 4, 4);
+  for (i = 0; i < item_cnt; ++i) {
+    sdefl_put(dst, s, lens[perm[i]], 3);
+  }
+  for (i = 0; i < symcnt.items; ++i) {
+    unsigned sym = items[i] & 0x1F;
+    sdefl_put(dst, s, (int)codes[sym], lens[sym]);
+    if (sym < 16) continue;
+    if (sym == 16) sdefl_put(dst, s, items[i] >> 5, 2);
+    else if(sym == 17) sdefl_put(dst, s, items[i] >> 5, 3);
+    else sdefl_put(dst, s, items[i] >> 5, 7);
+  }
+  /* block sequences */
+  for (i = 0; i < s->seq_cnt; ++i) {
+    if (s->seq[i].off >= 0) {
+      for (j = 0; j < s->seq[i].len; ++j) {
+        int c = in[s->seq[i].off + j];
+        sdefl_put(dst, s, (int)s->cod.word.lit[c], s->cod.len.lit[c]);
       }
+    } else {
+      sdefl_match(dst, s, -s->seq[i].off, s->seq[i].len);
     }
-    sdefl_put(dst, s, (int)(s)->cod.word.lit[SDEFL_EOB], (s)->cod.len.lit[SDEFL_EOB]);
-  } break;}
+  }
+  sdefl_put(dst, s, (int)(s)->cod.word.lit[SDEFL_EOB], (s)->cod.len.lit[SDEFL_EOB]);
   memset(&s->freq, 0, sizeof(s->freq));
   s->seq_cnt = 0;
 }
@@ -614,12 +550,8 @@ sdefl_seq(struct sdefl *s, int off, int len) {
 }
 static void
 sdefl_reg_match(struct sdefl *s, int off, int len) {
-  struct sdefl_match_codest cod;
+  struct sdefl_match_codes cod;
   sdefl_match_codes(&cod, off, len);
-
-  assert(cod.lc < SDEFL_SYM_MAX);
-  assert(cod.dc < SDEFL_OFF_MAX);
-
   s->freq.lit[cod.lc]++;
   s->freq.off[cod.dc]++;
 }
@@ -628,35 +560,22 @@ struct sdefl_match {
   int len;
 };
 static void
-sdefl_fnd(struct sdefl_match *m, const struct sdefl *s, int chain_len,
-          int max_match, const unsigned char *in, int p, int e) {
-  int i = s->tbl[sdefl_hash32(in + p)];
-  int limit = ((p - SDEFL_WIN_SIZ) < SDEFL_NIL) ? SDEFL_NIL : (p-SDEFL_WIN_SIZ);
-
-  assert(p < e);
-  assert(p + max_match <= e);
+sdefl_fnd(struct sdefl_match *m, const struct sdefl *s,
+          int chain_len, int max_match, const unsigned char *in, int p) {
+  int i = s->tbl[sdefl_hash32(&in[p])];
+  int limit = ((p-SDEFL_WIN_SIZ)<SDEFL_NIL)?SDEFL_NIL:(p-SDEFL_WIN_SIZ);
   while (i > limit) {
-    assert(i + m->len < e);
-    assert(p + m->len < e);
-    assert(i + SDEFL_MIN_MATCH < e);
-    assert(p + SDEFL_MIN_MATCH < e);
-
-    if (in[i + m->len] == in[p + m->len] &&
-      (sdefl_uload32(&in[i]) == sdefl_uload32(&in[p]))) {
+    if (in[i+m->len] == in[p+m->len] &&
+        (sdefl_uload32(&in[i]) == sdefl_uload32(&in[p]))){
       int n = SDEFL_MIN_MATCH;
-      while (n < max_match && in[i + n] == in[p + n]) {
-        assert(i + n < e);
-        assert(p + n < e);
-        n++;
-      }
+      while (n < max_match && in[i+n] == in[p+n]) n++;
       if (n > m->len) {
         m->len = n, m->off = p - i;
-        if (n == max_match)
-          break;
+        if (n == max_match) break;
       }
     }
     if (!(--chain_len)) break;
-    i = s->prv[i & SDEFL_WIN_MSK];
+    i = s->prv[i&SDEFL_WIN_MSK];
   }
 }
 static int
@@ -669,20 +588,19 @@ sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in,
   for (n = 0; n < SDEFL_HASH_SIZ; ++n) {
     s->tbl[n] = SDEFL_NIL;
   }
-  do {int blk_begin = i;
-    int blk_end = ((i + SDEFL_BLK_MAX) < in_len) ? (i + SDEFL_BLK_MAX) : in_len;
+  do {int blk_end = ((i + SDEFL_BLK_MAX) < in_len) ? (i + SDEFL_BLK_MAX) : in_len;
     while (i < blk_end) {
       struct sdefl_match m = {0};
       int left = blk_end - i;
-      int max_match = (left > SDEFL_MAX_MATCH) ? SDEFL_MAX_MATCH : left;
+      int max_match = (left >= SDEFL_MAX_MATCH) ? SDEFL_MAX_MATCH : left;
       int nice_match = pref[lvl] < max_match ? pref[lvl] : max_match;
       int run = 1, inc = 1, run_inc = 0;
       if (max_match > SDEFL_MIN_MATCH) {
-        sdefl_fnd(&m, s, max_chain, max_match, in, i, in_len);
+        sdefl_fnd(&m, s, max_chain, max_match, in, i);
       }
-      if (lvl >= 5 && m.len >= SDEFL_MIN_MATCH && m.len + 1 < nice_match){
+      if (lvl >= 5 && m.len >= SDEFL_MIN_MATCH && m.len < nice_match){
         struct sdefl_match m2 = {0};
-        sdefl_fnd(&m2, s, max_chain, m.len + 1, in, i + 1, in_len);
+        sdefl_fnd(&m2, s, max_chain, m.len+1, in, i+1);
         m.len = (m2.len > m.len) ? 0 : m.len;
       }
       if (m.len >= SDEFL_MIN_MATCH) {
@@ -718,12 +636,12 @@ sdefl_compr(struct sdefl *s, unsigned char *out, const unsigned char *in,
       sdefl_seq(s, i - litlen, litlen);
       litlen = 0;
     }
-    sdefl_flush(&q, s, blk_end == in_len, in, blk_begin, blk_end);
+    sdefl_flush(&q, s, blk_end == in_len, in);
   } while (i < in_len);
-  if (s->bitcnt) {
+
+  if (s->bitcnt > 0)
     sdefl_put(&q, s, 0x00, 8 - s->bitcnt);
-  }
-  assert(s->bitcnt == 0);
+
   return (int)(q - out);
 }
 extern int
@@ -783,8 +701,9 @@ zsdeflate(struct sdefl *s, void *out, const void *in, int n, int lvl) {
 }
 extern int
 sdefl_bound(int len) {
-  int max_blocks = 1 + sdefl_div_round_up(len, SDEFL_RAW_BLK_SIZE);
-  int bound = 5 * max_blocks + len + 1 + 4 + 8;
-  return bound;
+  int a = 128 + (len * 110) / 100;
+  int b = 128 + len + ((len / (31 * 1024)) + 1) * 5;
+  return (a > b) ? a : b;
 }
 #endif /* SDEFL_IMPLEMENTATION */
+

+ 11 - 17
src/external/sinfl.h

@@ -72,7 +72,7 @@ Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index
 This software is available under 2 licenses -- choose whichever you prefer.
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
-Copyright (c) 2020-2023 Micha Mettke
+Copyright (c) 2020 Micha Mettke
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
@@ -400,21 +400,17 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
     } break;
     case stored: {
       /* uncompressed block */
-      unsigned len, nlen;
+      int len, nlen;
+      sinfl_refill(&s);
       sinfl__get(&s,s.bitcnt & 7);
-      len = (unsigned short)sinfl__get(&s,16);
-      nlen = (unsigned short)sinfl__get(&s,16);
-      s.bitptr -= s.bitcnt / 8;
-      s.bitbuf = s.bitcnt = 0;
+      len = sinfl__get(&s,16);
+      nlen = sinfl__get(&s,16);
+      in -= 2; s.bitcnt = 0;
 
-      if ((unsigned short)len != (unsigned short)~nlen)
-        return (int)(out-o);
-      if (len > (e - s.bitptr) || !len)
+      if (len > (e-in) || !len)
         return (int)(out-o);
-
-      memcpy(out, s.bitptr, (size_t)len);
-      s.bitptr += len, out += len;
-      if (last) return (int)(out-o);
+      memcpy(out, in, (size_t)len);
+      in += len, out += len;
       state = hdr;
     } break;
     case fixed: {
@@ -447,9 +443,8 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
 
       /* decode code lengths */
       for (n = 0; n < nlit + ndist;) {
-        int sym = 0;
         sinfl_refill(&s);
-        sym = sinfl_decode(&s, hlens, 7);
+        int sym = sinfl_decode(&s, hlens, 7);
         switch (sym) {default: lens[n++] = (unsigned char)sym; break;
         case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
         case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break;
@@ -463,9 +458,8 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
     case blk: {
       /* decompress block */
       while (1) {
-        int sym;
         sinfl_refill(&s);
-        sym = sinfl_decode(&s, s.lits, 10);
+        int sym = sinfl_decode(&s, s.lits, 10);
         if (sym < 256) {
           /* literal */
           if (sinfl_unlikely(out >= oe)) {