Browse Source

replace int(floor()) with int(), since that produces same results for non-negative values and is much faster on 32-bit x86

Sean Barrett 11 years ago
parent
commit
9a6af9a8d3
3 changed files with 98 additions and 95 deletions
  1. 94 92
      stb_image_resize.h
  2. 1 1
      tests/resample_test.cpp
  3. 3 2
      tests/resize.dsp

+ 94 - 92
stb_image_resize.h

@@ -1607,128 +1607,130 @@ static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void
         }
     }
 
+    #define STBIR__ROUND_INT(f)    ((int) ((f)+0.5)) //#define STBIR__ROUND_INT(f)  (floor((f)+0.5))
+
     switch (decode)
     {
-    case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
-
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned char*)output_buffer)[index] = (unsigned char)(floor(stbir__saturate(encode_buffer[index]) * 255 + 0.5f));
-            }
-        }
-        break;
+                int pixel_index = x*channels;
 
-    case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned char*)output_buffer)[index] = (unsigned char) STBIR__ROUND_INT(stbir__saturate(encode_buffer[index]) * 255);
+                }
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
-            }
+                int pixel_index = x*channels;
 
-            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
-                ((unsigned char*)output_buffer)[pixel_index + alpha_channel] = (unsigned char)(floor(stbir__saturate(encode_buffer[pixel_index + alpha_channel]) * 255+0.5f));
-        }
-        break;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned char*)output_buffer)[index] = stbir__linear_to_srgb_uchar(encode_buffer[index]);
+                }
 
-    case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+                    ((unsigned char*)output_buffer)[pixel_index + alpha_channel] = (unsigned char) STBIR__ROUND_INT(stbir__saturate(encode_buffer[pixel_index + alpha_channel]) * 255);
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned short*)output_buffer)[index] = (unsigned short)(floor(stbir__saturate(encode_buffer[index]) * 65535+0.5f));
-            }
-        }
-        break;
+                int pixel_index = x*channels;
 
-    case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__saturate(encode_buffer[index]) * 65535);
+                }
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned short*)output_buffer)[index] = (unsigned short)(floor(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * 65535 + 0.5f));
-            }
+                int pixel_index = x*channels;
 
-            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
-                ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = (unsigned short)(floor(stbir__saturate(encode_buffer[pixel_index + alpha_channel]) * 65535 + 0.5f));
-        }
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned short*)output_buffer)[index] = (unsigned short)STBIR__ROUND_INT(stbir__linear_to_srgb(stbir__saturate(encode_buffer[index])) * 65535);
+                }
 
-        break;
+                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+                    ((unsigned short*)output_buffer)[pixel_index + alpha_channel] = (unsigned short)STBIR__ROUND_INT(stbir__saturate(encode_buffer[pixel_index + alpha_channel]) * 65535);
+            }
 
-    case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned int*)output_buffer)[index] = (unsigned int)(floor(((double)stbir__saturate(encode_buffer[index])) * 4294967295 + 0.5f));
-            }
-        }
-        break;
+                int pixel_index = x*channels;
 
-    case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[index])) * 4294967295);
+                }
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((unsigned int*)output_buffer)[index] = (unsigned int)(floor(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * 4294967295 + 0.5f));
-            }
+                int pixel_index = x*channels;
 
-            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
-                ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)(floor(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * 4294967295 + 0.5f));
-        }
-        break;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((unsigned int*)output_buffer)[index] = (unsigned int)STBIR__ROUND_INT(((double)stbir__linear_to_srgb(stbir__saturate(encode_buffer[index]))) * 4294967295);
+                }
 
-    case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+                    ((unsigned int*)output_buffer)[pixel_index + alpha_channel] = (unsigned int)STBIR__ROUND_INT(((double)stbir__saturate(encode_buffer[pixel_index + alpha_channel])) * 4294967295);
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((float*)output_buffer)[index] = encode_buffer[index];
-            }
-        }
-        break;
+                int pixel_index = x*channels;
 
-    case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
-        for (x=0; x < num_pixels; ++x)
-        {
-            int pixel_index = x*channels;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((float*)output_buffer)[index] = encode_buffer[index];
+                }
+            }
+            break;
 
-            for (n = 0; n < channels; n++)
+        case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
+            for (x=0; x < num_pixels; ++x)
             {
-                int index = pixel_index + n;
-                ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
-            }
+                int pixel_index = x*channels;
 
-            if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
-                ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
-        }
-        break;
+                for (n = 0; n < channels; n++)
+                {
+                    int index = pixel_index + n;
+                    ((float*)output_buffer)[index] = stbir__linear_to_srgb(encode_buffer[index]);
+                }
 
-    default:
-        STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
-        break;
+                if (!(stbir_info->flags&STBIR_FLAG_ALPHA_USES_COLORSPACE))
+                    ((float*)output_buffer)[pixel_index + alpha_channel] = encode_buffer[pixel_index + alpha_channel];
+            }
+            break;
+
+        default:
+            STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
+            break;
     }
 }
 

+ 1 - 1
tests/resample_test.cpp

@@ -181,7 +181,7 @@ int main(int argc, char** argv)
 	int out_w, out_h, out_stride;
 
 	//resizer(argc, argv);
-    //performance(argc, argv);
+    performance(argc, argv);
 
 #if 1
 	test_suite(argc, argv);

+ 3 - 2
tests/resize.dsp

@@ -39,9 +39,10 @@ RSC=rc.exe
 # PROP Use_Debug_Libraries 0
 # PROP Output_Dir "Release"
 # PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /W3 /GX /Z7 /O2 /I ".." /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 BSC32=bscmake.exe
@@ -49,7 +50,7 @@ BSC32=bscmake.exe
 # ADD BSC32 /nologo
 LINK32=link.exe
 # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
 
 !ELSEIF  "$(CFG)" == "resize - Win32 Debug"