Browse Source

optimization attempts, no meaningful changes

Sean Barrett 11 years ago
parent
commit
0fc13e997b
2 changed files with 18 additions and 16 deletions
  1. 12 9
      stb_image_resize.h
  2. 6 7
      tests/resample_test.cpp

+ 12 - 9
stb_image_resize.h

@@ -1903,6 +1903,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
 
     memset(encode_buffer, 0, output_w * sizeof(float) * channels);
 
+    // I tried reblocking this for better cache usage of encode_buffer
+    // (using x_outer, k, x_inner), but it lost speed. -- stb
+
     coefficient_counter = 0;
     switch (channels) {
         case 1:
@@ -1911,9 +1914,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
                 int coefficient_index = coefficient_counter++;
                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
-                for (x = 0; x < output_w; x++)
+                for (x = 0; x < output_w; ++x)
                 {
-                    int in_pixel_index = x * channels;
+                    int in_pixel_index = x * 1;
                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
                 }
             }
@@ -1924,9 +1927,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
                 int coefficient_index = coefficient_counter++;
                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
-                for (x = 0; x < output_w; x++)
+                for (x = 0; x < output_w; ++x)
                 {
-                    int in_pixel_index = x * channels;
+                    int in_pixel_index = x * 2;
                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
                 }
@@ -1938,9 +1941,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
                 int coefficient_index = coefficient_counter++;
                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
-                for (x = 0; x < output_w; x++)
+                for (x = 0; x < output_w; ++x)
                 {
-                    int in_pixel_index = x * channels;
+                    int in_pixel_index = x * 3;
                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
                     encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
@@ -1953,9 +1956,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
                 int coefficient_index = coefficient_counter++;
                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
-                for (x = 0; x < output_w; x++)
+                for (x = 0; x < output_w; ++x)
                 {
-                    int in_pixel_index = x * channels;
+                    int in_pixel_index = x * 4;
                     encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient;
                     encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient;
                     encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient;
@@ -1969,7 +1972,7 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
                 int coefficient_index = coefficient_counter++;
                 float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
                 float coefficient = vertical_coefficients[coefficient_group + coefficient_index];
-                for (x = 0; x < output_w; x++)
+                for (x = 0; x < output_w; ++x)
                 {
                     int in_pixel_index = x * channels;
                     int c;

+ 6 - 7
tests/resample_test.cpp

@@ -132,8 +132,8 @@ static void resizer(int argc, char **argv)
 	int n;
 	int out_w, out_h;
 	input_pixels = stbi_load(argv[1], &w, &h, &n, 0);
-	out_w = w/4;
-	out_h = h/4;
+	out_w = w*3;
+	out_h = h*3;
 	output_pixels = (unsigned char*) malloc(out_w*out_h*n);
 	//stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n, -1,0);
 	stbir_resize_uint8(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n);
@@ -148,8 +148,7 @@ static void performance(int argc, char **argv)
 	int w, h, count;
 	int n, i;
 	int out_w, out_h, srgb=1;
-	input_pixels = stbi_load(argv[1], &w, &h, &n, 4);
-    n=4;
+	input_pixels = stbi_load(argv[1], &w, &h, &n, 0);
     #if 0
     out_w = w/4; out_h = h/4; count=100; // 1
     #elif 0
@@ -159,15 +158,15 @@ static void performance(int argc, char **argv)
     #elif 0
     out_w = w*3; out_h = h*3; count=2; srgb=0; // 4
     #else
-    out_w = w*3; out_h = h*3; count=1; // 5   // this is dominated by linear->sRGB conversion
+    out_w = w*3; out_h = h*3; count=2; // 5   // this is dominated by linear->sRGB conversion
     #endif
 
 	output_pixels = (unsigned char*) malloc(out_w*out_h*n);
     for (i=0; i < count; ++i)
         if (srgb)
-	        stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n, 3,0);
+	        stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n,-1,0);
         else
-	        stbir_resize(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, STBIR_TYPE_UINT8, n, 3, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, STBIR_COLORSPACE_LINEAR, NULL);
+	        stbir_resize(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, STBIR_TYPE_UINT8, n,-1, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, STBIR_COLORSPACE_LINEAR, NULL);
 	exit(0);
 }