Parcourir la source

Revert "UPDATE: `DEFLATE` algorithm"

This reverts commit d786af83fc0364e6a773254a9ebc8cc7f1e621d1.
Ray il y a 3 ans
Parent
commit
6259dc4121
2 fichiers modifiés avec 100 ajouts et 119 suppressions
  1. 18 18
      src/external/sdefl.h
  2. 82 101
      src/external/sinfl.h

+ 18 - 18
src/external/sdefl.h

@@ -38,10 +38,10 @@ this file implementation in *one* C or C++ file to prevent collisions.
 | zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
 | zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
 | zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
-| sdefl 1.0 -0            |   127 MB/s |   355 MB/s |    40004116 | 39.88 |
-| sdefl 1.0 -1            |   111 MB/s |   413 MB/s |    38940674 | 38.82 |
-| sdefl 1.0 -5            |    45 MB/s |   436 MB/s |    36577183 | 36.46 |
-| sdefl 1.0 -7            |    38 MB/s |   432 MB/s |    36523781 | 36.41 |
+| sdefl 1.0 -0            |   127 MB/s |   371 MB/s |    40004116 | 39.88 |
+| sdefl 1.0 -1            |   111 MB/s |   398 MB/s |    38940674 | 38.82 |
+| sdefl 1.0 -5            |    45 MB/s |   420 MB/s |    36577183 | 36.46 |
+| sdefl 1.0 -7            |    38 MB/s |   423 MB/s |    36523781 | 36.41 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
@@ -50,20 +50,20 @@ this file implementation in *one* C or C++ file to prevent collisions.
 ### Compression
 Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
 
-| File    |   Original | `sdefl 0`    | `sdefl 5`  | `sdefl 7`   |
-| --------| -----------| -------------| ---------- | ------------|
-| dickens | 10.192.446 | 4,260,187    |  3,845,261 |   3,833,657 |
-| mozilla | 51.220.480 | 20,774,706   | 19,607,009 |  19,565,867 |
-| mr      |  9.970.564 | 3,860,531    |  3,673,460 |   3,665,627 |
-| nci     | 33.553.445 | 4,030,283    |  3,094,526 |   3,006,075 |
-| ooffice |  6.152.192 | 3,320,063    |  3,186,373 |   3,183,815 |
-| osdb    | 10.085.684 | 3,919,646    |  3,649,510 |   3,649,477 |
-| reymont |  6.627.202 | 2,263,378    |  1,857,588 |   1,827,237 |
-| samba   | 21.606.400 | 6,121,797    |  5,462,670 |   5,450,762 |
-| sao     |  7.251.944 | 5,612,421    |  5,485,380 |   5,481,765 |
-| webster | 41.458.703 | 13,972,648   | 12,059,432 |  11,991,421 |
-| xml     |  5.345.280 | 886,620      |    674,009 |     662,141 |
-| x-ray   |  8.474.240 | 6,304,655    |  6,244,779 |   6,244,779 |
+| File    |   Original | `sdefl 0`  	| `sdefl 5` 	| `sdefl 7` |
+| :------ | ---------: | -----------------: | ---------: | ----------: |
+| dickens | 10.192.446 |  4,260,187|  3,845,261|   3,833,657 |
+| mozilla | 51.220.480 | 20,774,706 | 19,607,009 |  19,565,867 |
+| mr      |  9.970.564 | 3,860,531 |  3,673,460 |   3,665,627 |
+| nci     | 33.553.445 | 4,030,283 |  3,094,526 |   3,006,075 |
+| ooffice |  6.152.192 | 3,320,063 |  3,186,373 |   3,183,815 |
+| osdb    | 10.085.684 | 3,919,646 |  3,649,510 |   3,649,477 |
+| reymont |  6.627.202 | 2,263,378 |  1,857,588 |   1,827,237 |
+| samba   | 21.606.400 | 6,121,797 |  5,462,670 |   5,450,762 |
+| sao     |  7.251.944 | 5,612,421 |  5,485,380 |   5,481,765 |
+| webster | 41.458.703 | 13,972,648 | 12,059,432 |  11,991,421 |
+| xml     |  5.345.280 | 886,620|    674,009 |     662,141 |
+| x-ray   |  8.474.240 | 6,304,655 |  6,244,779 |   6,244,779 |
 
 ## License
 ```

+ 82 - 101
src/external/sinfl.h

@@ -10,7 +10,7 @@ as needed to keep the implementation as concise as possible.
 - Dual license with either MIT or public domain
 - Small implementation
     - Deflate: 525 LoC
-    - Inflate: 500 LoC
+    - Inflate: 320 LoC
 - Webassembly:
     - Deflate ~3.7 KB (~2.2KB compressed)
     - Inflate ~3.6 KB (~2.2KB compressed)
@@ -39,10 +39,10 @@ this file implementation in *one* C or C++ file to prevent collisions.
 | zlib 1.2.11 -1          |    72 MB/s |   307 MB/s |    42298774 | 42.30 |
 | zlib 1.2.11 -6          |    24 MB/s |   313 MB/s |    36548921 | 36.55 |
 | zlib 1.2.11 -9          |    20 MB/s |   314 MB/s |    36475792 | 36.48 |
-| sdefl 1.0 -0            |   127 MB/s |   355 MB/s |    40004116 | 39.88 |
-| sdefl 1.0 -1            |   111 MB/s |   413 MB/s |    38940674 | 38.82 |
-| sdefl 1.0 -5            |    45 MB/s |   436 MB/s |    36577183 | 36.46 |
-| sdefl 1.0 -7            |    38 MB/s |   432 MB/s |    36523781 | 36.41 |
+| sdefl 1.0 -0            |   127 MB/s |   371 MB/s |    40004116 | 39.88 |
+| sdefl 1.0 -1            |   111 MB/s |   398 MB/s |    38940674 | 38.82 |
+| sdefl 1.0 -5            |    45 MB/s |   420 MB/s |    36577183 | 36.46 |
+| sdefl 1.0 -7            |    38 MB/s |   423 MB/s |    36523781 | 36.41 |
 | libdeflate 1.3 -1       |   147 MB/s |   667 MB/s |    39597378 | 39.60 |
 | libdeflate 1.3 -6       |    69 MB/s |   689 MB/s |    36648318 | 36.65 |
 | libdeflate 1.3 -9       |    13 MB/s |   672 MB/s |    35197141 | 35.20 |
@@ -51,20 +51,20 @@ this file implementation in *one* C or C++ file to prevent collisions.
 ### Compression
 Results on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia):
 
-| File    |   Original | `sdefl 0`    | `sdefl 5`  | `sdefl 7`   |
-| --------| -----------| -------------| ---------- | ------------|
-| dickens | 10.192.446 | 4,260,187    |  3,845,261 |   3,833,657 |
-| mozilla | 51.220.480 | 20,774,706   | 19,607,009 |  19,565,867 |
-| mr      |  9.970.564 | 3,860,531    |  3,673,460 |   3,665,627 |
-| nci     | 33.553.445 | 4,030,283    |  3,094,526 |   3,006,075 |
-| ooffice |  6.152.192 | 3,320,063    |  3,186,373 |   3,183,815 |
-| osdb    | 10.085.684 | 3,919,646    |  3,649,510 |   3,649,477 |
-| reymont |  6.627.202 | 2,263,378    |  1,857,588 |   1,827,237 |
-| samba   | 21.606.400 | 6,121,797    |  5,462,670 |   5,450,762 |
-| sao     |  7.251.944 | 5,612,421    |  5,485,380 |   5,481,765 |
-| webster | 41.458.703 | 13,972,648   | 12,059,432 |  11,991,421 |
-| xml     |  5.345.280 | 886,620      |    674,009 |     662,141 |
-| x-ray   |  8.474.240 | 6,304,655    |  6,244,779 |   6,244,779 |
+| File    |   Original | `sdefl 0`    | `sdefl 5`   | `sdefl 7` |
+| :------ | ---------: | -----------------: | ---------: | ----------: |
+| dickens | 10.192.446 |  4,260,187|  3,845,261|   3,833,657 |
+| mozilla | 51.220.480 | 20,774,706 | 19,607,009 |  19,565,867 |
+| mr      |  9.970.564 | 3,860,531 |  3,673,460 |   3,665,627 |
+| nci     | 33.553.445 | 4,030,283 |  3,094,526 |   3,006,075 |
+| ooffice |  6.152.192 | 3,320,063 |  3,186,373 |   3,183,815 |
+| osdb    | 10.085.684 | 3,919,646 |  3,649,510 |   3,649,477 |
+| reymont |  6.627.202 | 2,263,378 |  1,857,588 |   1,827,237 |
+| samba   | 21.606.400 | 6,121,797 |  5,462,670 |   5,450,762 |
+| sao     |  7.251.944 | 5,612,421 |  5,485,380 |   5,481,765 |
+| webster | 41.458.703 | 13,972,648 | 12,059,432 |  11,991,421 |
+| xml     |  5.345.280 | 886,620|    674,009 |     662,141 |
+| x-ray   |  8.474.240 | 6,304,655 |  6,244,779 |   6,244,779 |
 
 ## License
 ```
@@ -151,7 +151,7 @@ extern int zsinflate(void *out, int cap, const void *in, int size);
 #endif
 
 #ifndef SINFL_NO_SIMD
-#if defined(__x86_64__) || defined(_WIN32) || defined(_WIN64)
+#if __x86_64__ || defined(_WIN32) || defined(_WIN64)
   #include <emmintrin.h>
   #define sinfl_char16 __m128i
   #define sinfl_char16_ld(p) _mm_loadu_si128((const __m128i *)(void*)(p))
@@ -183,18 +183,6 @@ sinfl_read64(const void *p) {
   memcpy(&n, p, 8);
   return n;
 }
-static void
-sinfl_copy64(unsigned char **dst, unsigned char **src) {
-  unsigned long long n;
-  memcpy(&n, *src, 8);
-  memcpy(*dst, &n, 8);
-  *dst += 8, *src += 8;
-}
-static unsigned char*
-sinfl_write64(unsigned char *dst, unsigned long long w) {
-  memcpy(dst, &w, 8);
-  return dst + 8;
-}
 #ifndef SINFL_NO_SIMD
 static unsigned char*
 sinfl_write128(unsigned char *dst, sinfl_char16 w) {
@@ -207,12 +195,25 @@ sinfl_copy128(unsigned char **dst, unsigned char **src) {
   sinfl_char16_str(*dst, n);
   *dst += 16, *src += 16;
 }
+#else
+static unsigned char*
+sinfl_write64(unsigned char *dst, unsigned long long w) {
+  memcpy(dst, &w, 8);
+  return dst + 8;
+}
+static void
+sinfl_copy64(unsigned char **dst, unsigned char **src) {
+  unsigned long long n;
+  memcpy(&n, *src, 8);
+  memcpy(*dst, &n, 8);
+  *dst += 8, *src += 8;
+}
 #endif
 static void
 sinfl_refill(struct sinfl *s) {
   s->bitbuf |= sinfl_read64(s->bitptr) << s->bitcnt;
   s->bitptr += (63 - s->bitcnt) >> 3;
-  s->bitcnt |= 56; /* bitcount in range [56,63] */
+  s->bitcnt |= 56; /* bitcount is in range [56,63] */
 }
 static int
 sinfl_peek(struct sinfl *s, int cnt) {
@@ -221,7 +222,7 @@ sinfl_peek(struct sinfl *s, int cnt) {
   return s->bitbuf & ((1ull << cnt) - 1);
 }
 static void
-sinfl_eat(struct sinfl *s, int cnt) {
+sinfl_consume(struct sinfl *s, int cnt) {
   assert(cnt <= s->bitcnt);
   s->bitbuf >>= cnt;
   s->bitcnt -= cnt;
@@ -229,7 +230,7 @@ sinfl_eat(struct sinfl *s, int cnt) {
 static int
 sinfl__get(struct sinfl *s, int cnt) {
   int res = sinfl_peek(s, cnt);
-  sinfl_eat(s, cnt);
+  sinfl_consume(s, cnt);
   return res;
 }
 static int
@@ -284,7 +285,7 @@ sinfl_build_subtbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits,
   while (1) {
     unsigned entry;
     int bit, stride, i;
-    /* start new sub-table */
+    /* start new subtable */
     if ((gen->word & ((1 << tbl_bits)-1)) != sub_prefix) {
       int used = 0;
       sub_prefix = gen->word & ((1 << tbl_bits)-1);
@@ -298,7 +299,7 @@ sinfl_build_subtbl(struct sinfl_gen *gen, unsigned *tbl, int tbl_bits,
       tbl_end = sub_start + (1 << sub_bits);
       tbl[sub_prefix] = (sub_start << 16) | 0x10 | (sub_bits & 0xf);
     }
-    /* fill sub-table */
+    /* fill subtable */
     entry = (*gen->sorted << 16) | ((gen->len - tbl_bits) & 0xf);
     gen->sorted++;
     i = sub_start + (gen->word >> tbl_bits);
@@ -352,17 +353,18 @@ sinfl_build(unsigned *tbl, unsigned char *lens, int tbl_bits, int maxlen,
 }
 static int
 sinfl_decode(struct sinfl *s, const unsigned *tbl, int bit_len) {
-  int idx = sinfl_peek(s, bit_len);
+  sinfl_refill(s);
+  {int idx = sinfl_peek(s, bit_len);
   unsigned key = tbl[idx];
   if (key & 0x10) {
     /* sub-table lookup */
     int len = key & 0x0f;
-    sinfl_eat(s, bit_len);
+    sinfl_consume(s, bit_len);
     idx = sinfl_peek(s, len);
     key = tbl[((key >> 16) & 0xffff) + (unsigned)idx];
   }
-  sinfl_eat(s, key & 0x0f);
-  return (key >> 16) & 0x0fff;
+  sinfl_consume(s, key & 0x0f);
+  return (key >> 16) & 0x0fff;}
 }
 static int
 sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size) {
@@ -400,11 +402,11 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
     } break;
     case stored: {
       /* uncompressed block */
-      int len, nlen;
+      int len;
       sinfl_refill(&s);
       sinfl__get(&s,s.bitcnt & 7);
       len = sinfl__get(&s,16);
-      nlen = sinfl__get(&s,16);
+      //int nlen = sinfl__get(&s,16);   // @raysan5: Unused variable?
       in -= 2; s.bitcnt = 0;
 
       if (len > (e-in) || !len)
@@ -428,58 +430,40 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
       state = blk;
     } break;
     case dyn: {
-      /* dynamic huffman codes */
-      int n, i;
-      unsigned hlens[SINFL_PRE_TBL_SIZE];
-      unsigned char nlens[19] = {0}, lens[288+32];
+        /* dynamic huffman codes */
+        int n, i;
+        unsigned hlens[SINFL_PRE_TBL_SIZE];
+        unsigned char nlens[19] = {0}, lens[288+32];
 
-      sinfl_refill(&s);
-      {int nlit = 257 + sinfl__get(&s,5);
-      int ndist = 1 + sinfl__get(&s,5);
-      int nlen = 4 + sinfl__get(&s,4);
-      for (n = 0; n < nlen; n++)
-        nlens[order[n]] = (unsigned char)sinfl_get(&s,3);
-      sinfl_build(hlens, nlens, 7, 7, 19);
-
-      /* decode code lengths */
-      for (n = 0; n < nlit + ndist;) {
         sinfl_refill(&s);
-        int sym = sinfl_decode(&s, hlens, 7);
-        switch (sym) {default: lens[n++] = (unsigned char)sym; break;
-        case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
-        case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break;
-        case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;}
-      }
-      /* build lit/dist tables */
-      sinfl_build(s.lits, lens, 10, 15, nlit);
-      sinfl_build(s.dsts, lens + nlit, 8, 15, ndist);
-      state = blk;}
+        {int nlit = 257 + sinfl__get(&s,5);
+        int ndist = 1 + sinfl__get(&s,5);
+        int nlen = 4 + sinfl__get(&s,4);
+        for (n = 0; n < nlen; n++)
+          nlens[order[n]] = (unsigned char)sinfl_get(&s,3);
+        sinfl_build(hlens, nlens, 7, 7, 19);
+
+        /* decode code lengths */
+        for (n = 0; n < nlit + ndist;) {
+          int sym = sinfl_decode(&s, hlens, 7);
+          switch (sym) {default: lens[n++] = (unsigned char)sym; break;
+          case 16: for (i=3+sinfl_get(&s,2);i;i--,n++) lens[n]=lens[n-1]; break;
+          case 17: for (i=3+sinfl_get(&s,3);i;i--,n++) lens[n]=0; break;
+          case 18: for (i=11+sinfl_get(&s,7);i;i--,n++) lens[n]=0; break;}
+        }
+        /* build lit/dist tables */
+        sinfl_build(s.lits, lens, 10, 15, nlit);
+        sinfl_build(s.dsts, lens + nlit, 8, 15, ndist);
+        state = blk;}
     } break;
     case blk: {
       /* decompress block */
-      while (1) {
+      int sym = sinfl_decode(&s, s.lits, 10);
+      if (sym < 256) {
+        /* literal */
+        *out++ = (unsigned char)sym;
+      } else if (sym > 256) {sym -= 257; /* match symbol */
         sinfl_refill(&s);
-        int sym = sinfl_decode(&s, s.lits, 10);
-        if (sym < 256) {
-          /* literal */
-          if (sinfl_unlikely(out >= oe)) {
-            return (int)(out-o);
-          }
-          *out++ = (unsigned char)sym;
-          sym = sinfl_decode(&s, s.lits, 10);
-          if (sym < 256) {
-            *out++ = (unsigned char)sym;
-            continue;
-          }
-        }
-        if (sinfl_unlikely(sym == 256)) {
-          /* end of block */
-          if (last) return (int)(out-o);
-          state = hdr;
-          break;
-        }
-        /* match */
-        sym -= 257;
         {int len = sinfl__get(&s, lbits[sym]) + lbase[sym];
         int dsym = sinfl_decode(&s, s.dsts, 8);
         int offs = sinfl__get(&s, dbits[dsym]) + dbase[dsym];
@@ -492,17 +476,11 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
 #ifndef SINFL_NO_SIMD
         if (sinfl_likely(oe - out >= 16 * 3)) {
           if (offs >= 16) {
-            /* simd copy match */
+            /* copy match */
             sinfl_copy128(&dst, &src);
             sinfl_copy128(&dst, &src);
             do sinfl_copy128(&dst, &src);
             while (dst < out);
-          } else if (offs >= 8) {
-            /* word copy match */
-            sinfl_copy64(&dst, &src);
-            sinfl_copy64(&dst, &src);
-            do sinfl_copy64(&dst, &src);
-            while (dst < out);
           } else if (offs == 1) {
             /* rle match copying */
             sinfl_char16 w = sinfl_char16_char(src[0]);
@@ -511,7 +489,6 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
             do dst = sinfl_write128(dst, w);
             while (dst < out);
           } else {
-            /* byte copy match */
             *dst++ = *src++;
             *dst++ = *src++;
             do *dst++ = *src++;
@@ -521,7 +498,7 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
 #else
         if (sinfl_likely(oe - out >= 3 * 8 - 3)) {
           if (offs >= 8) {
-            /* word copy match */
+            /* copy match */
             sinfl_copy64(&dst, &src);
             sinfl_copy64(&dst, &src);
             do sinfl_copy64(&dst, &src);
@@ -536,7 +513,6 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
             do dst = sinfl_write64(dst, w);
             while (dst < out);
           } else {
-            /* byte copy match */
             *dst++ = *src++;
             *dst++ = *src++;
             do *dst++ = *src++;
@@ -548,8 +524,13 @@ sinfl_decompress(unsigned char *out, int cap, const unsigned char *in, int size)
           *dst++ = *src++;
           *dst++ = *src++;
           do *dst++ = *src++;
-          while (dst < out);
-        }}
+          while (dst < out);}
+        }
+      } else {
+        /* end of block */
+        if (last) return (int)(out-o);
+        state = hdr;
+        break;
       }
     } break;}
   }