소스 검색

stb_image: New Paeth filter

This formulation is equivalent to the original (reference)
implementation but runs _significantly_ faster - this speeds up
the filtering portion of a Paeth-heavy 8192x8192 16-bit/channel
image by a factor of more than 2 on a Zen2 CPU.

I'm investigating doing a more thorough restructuring of this pass,
but this seems like a good first step.
Fabian Giesen 2 년 전
부모
커밋
3aa1744a29
2개의 변경된 파일56개의 추가작업 그리고 7개의 파일을 삭제
  1. 9 7
      stb_image.h
  2. 47 0
      tests/test_png_paeth.c

+ 9 - 7
stb_image.h

@@ -4654,13 +4654,15 @@ static stbi_uc first_row_filter[5] =
 
 static int stbi__paeth(int a, int b, int c)
 {
-   int p = a + b - c;
-   int pa = abs(p-a);
-   int pb = abs(p-b);
-   int pc = abs(p-c);
-   if (pa <= pb && pa <= pc) return a;
-   if (pb <= pc) return b;
-   return c;
+   // This formulation looks very different from the reference in the PNG spec, but is
+   // actually equivalent and has favorable data dependencies and admits straightforward
+   // generation of branch-free code, which helps performance significantly.
+   int thresh = c*3 - (a + b);
+   int lo = a < b ? a : b;
+   int hi = a < b ? b : a;
+   int t0 = (hi <= thresh) ? lo : c;
+   int t1 = (thresh <= lo) ? hi : t0;
+   return t1;
 }
 
 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };

+ 47 - 0
tests/test_png_paeth.c

@@ -0,0 +1,47 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+// Reference Paeth filter as per PNG spec
+static int ref_paeth(int a, int b, int c)
+{
+   int p = a + b - c;
+   int pa = abs(p-a);
+   int pb = abs(p-b);
+   int pc = abs(p-c);
+   if (pa <= pb && pa <= pc) return a;
+   if (pb <= pc) return b;
+   return c;
+}
+
+// Optimized Paeth filter
+static int opt_paeth(int a, int b, int c)
+{
+   int thresh = c*3 - (a + b);
+   int lo = a < b ? a : b;
+   int hi = a < b ? b : a;
+   int t0 = (hi <= thresh) ? lo : c;
+   int t1 = (thresh <= lo) ? hi : t0;
+   return t1;
+}
+
+int main()
+{
+   // Exhaustively test the functions match for all byte inputs a, b,c in [0,255]
+   for (int i = 0; i < (1 << 24); ++i) {
+      int a = i & 0xff;
+      int b = (i >> 8) & 0xff;
+      int c = (i >> 16) & 0xff;
+
+      int ref = ref_paeth(a, b, c);
+      int opt = opt_paeth(a, b, c);
+      if (ref != opt) {
+         fprintf(stderr, "mismatch at a=%3d b=%3d c=%3d: ref=%3d opt=%3d\n", a, b, c, ref, opt);
+         return 1;
+      }
+   }
+
+   printf("all ok!\n");
+   return 0;
+}
+
+// vim:sw=3:sts=3:et