Browse Source

image resize 2.11 - fix harmless asan, fix point sample crash

Jeff Roberts (Bellevue) 11 months ago
parent
commit
af07f9d6e9
1 changed files with 78 additions and 50 deletions
  1. 78 50
      stb_image_resize2.h

+ 78 - 50
stb_image_resize2.h

@@ -1,4 +1,4 @@
-/* stb_image_resize2 - v2.10 - public domain image resizing
+/* stb_image_resize2 - v2.11 - public domain image resizing
 
 
    by Jeff Roberts (v2) and Jorge L Rodriguez
    by Jeff Roberts (v2) and Jorge L Rodriguez
    http://github.com/nothings/stb
    http://github.com/nothings/stb
@@ -11,35 +11,6 @@
          #define STB_IMAGE_RESIZE_IMPLEMENTATION
          #define STB_IMAGE_RESIZE_IMPLEMENTATION
       before the #include. That will create the implementation in that file.
       before the #include. That will create the implementation in that file.
 
 
-   PORTING FROM VERSION 1
-
-      The API has changed. You can continue to use the old version of stb_image_resize.h,
-      which is available in the "deprecated/" directory.
-
-      If you're using the old simple-to-use API, porting is straightforward.
-      (For more advanced APIs, read the documentation.)
-
-        stbir_resize_uint8():
-          - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout`
-
-        stbir_resize_float():
-          - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout`
-
-        stbir_resize_uint8_srgb():
-          - function name is unchanged
-          - cast channel count to `stbir_pixel_layout`
-          - above is sufficient unless your image has alpha and it's not RGBA/BGRA
-            - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode
-
-        stbir_resize_uint8_srgb_edgemode()
-          - switch to the "medium complexity" API
-          - stbir_resize(), very similar API but a few more parameters:
-            - pixel_layout: cast channel count to `stbir_pixel_layout`
-            - data_type:    STBIR_TYPE_UINT8_SRGB
-            - edge:         unchanged (STBIR_EDGE_WRAP, etc.)
-            - filter:       STBIR_FILTER_DEFAULT
-          - which channel is alpha is specified in stbir_pixel_layout, see enum for details
-
    EASY API CALLS:
    EASY API CALLS:
      Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge.
      Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge.
 
 
@@ -296,6 +267,34 @@
       ASSERT
       ASSERT
          Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
          Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
 
 
+     PORTING FROM VERSION 1
+        The API has changed. You can continue to use the old version of stb_image_resize.h,
+        which is available in the "deprecated/" directory.
+
+        If you're using the old simple-to-use API, porting is straightforward.
+        (For more advanced APIs, read the documentation.)
+
+          stbir_resize_uint8():
+            - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout`
+
+          stbir_resize_float():
+            - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout`
+
+          stbir_resize_uint8_srgb():
+            - function name is unchanged
+            - cast channel count to `stbir_pixel_layout`
+            - above is sufficient unless your image has alpha and it's not RGBA/BGRA
+              - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode
+
+          stbir_resize_uint8_srgb_edgemode()
+            - switch to the "medium complexity" API
+            - stbir_resize(), very similar API but a few more parameters:
+              - pixel_layout: cast channel count to `stbir_pixel_layout`
+              - data_type:    STBIR_TYPE_UINT8_SRGB
+              - edge:         unchanged (STBIR_EDGE_WRAP, etc.)
+              - filter:       STBIR_FILTER_DEFAULT
+            - which channel is alpha is specified in stbir_pixel_layout, see enum for details
+
       FUTURE TODOS
       FUTURE TODOS
         *  For polyphase integral filters, we just memcpy the coeffs to dupe
         *  For polyphase integral filters, we just memcpy the coeffs to dupe
            them, but we should indirect and use the same coeff memory.
            them, but we should indirect and use the same coeff memory.
@@ -328,6 +327,9 @@
       Nathan Reed: warning fixes for 1.0
       Nathan Reed: warning fixes for 1.0
 
 
    REVISIONS
    REVISIONS
+      2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode
+                          with AVX-2, fix some weird scaling edge conditions with
+                          point sample mode.
       2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control,
       2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control,
                           fix MSVC 32-bit arm half float routines.
                           fix MSVC 32-bit arm half float routines.
       2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
       2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
@@ -3247,6 +3249,7 @@ static void stbir__calculate_in_pixel_range( int * first_pixel, int * last_pixel
 
 
   first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
   first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
   last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
   last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
+  if ( last < first ) last = first; // point sample mode can span a value *right* at 0.5, and cause these to cross
 
 
   if ( edge == STBIR_EDGE_WRAP )
   if ( edge == STBIR_EDGE_WRAP )
   {
   {
@@ -3282,6 +3285,11 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_
 
 
     stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge );
     stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge );
 
 
+    // make sure we never generate a range larger than our precalculated coeff width
+    //   this only happens in point sample mode, but it's a good safe thing to do anyway
+    if ( ( in_last_pixel - in_first_pixel + 1 ) > coefficient_width )
+      in_last_pixel = in_first_pixel + coefficient_width - 1;
+
     last_non_zero = -1;
     last_non_zero = -1;
     for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
     for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
     {
     {
@@ -3317,19 +3325,22 @@ static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_
   }
   }
 }
 }
 
 
-static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff )
+static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff, int max_width )
 {
 {
   if ( new_pixel <= contribs->n1 )  // before the end
   if ( new_pixel <= contribs->n1 )  // before the end
   {
   {
     if ( new_pixel < contribs->n0 ) // before the front?
     if ( new_pixel < contribs->n0 ) // before the front?
     {
     {
-      int j, o = contribs->n0 - new_pixel;
-      for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- )
-        coeffs[ j + o ] = coeffs[ j ];
-      for ( j = 1 ; j < o ; j-- )
-        coeffs[ j ] = coeffs[ 0 ];
-      coeffs[ 0 ] = new_coeff;
-      contribs->n0 = new_pixel;
+      if ( ( contribs->n1 - new_pixel + 1 ) <= max_width )
+      { 
+        int j, o = contribs->n0 - new_pixel;
+        for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- )
+          coeffs[ j + o ] = coeffs[ j ];
+        for ( j = 1 ; j < o ; j-- )
+          coeffs[ j ] = coeffs[ 0 ];
+        coeffs[ 0 ] = new_coeff;
+        contribs->n0 = new_pixel;
+      }
     }
     }
     else
     else
     {
     {
@@ -3338,12 +3349,15 @@ static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs,
   }
   }
   else
   else
   {
   {
-    int j, e = new_pixel - contribs->n0;
-    for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any
-      coeffs[j] = 0;
+    if ( ( new_pixel - contribs->n0 + 1 ) <= max_width )
+    {
+      int j, e = new_pixel - contribs->n0;
+      for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any
+        coeffs[j] = 0;
 
 
-    coeffs[ e ] = new_coeff;
-    contribs->n1 = new_pixel;
+      coeffs[ e ] = new_coeff;
+      contribs->n1 = new_pixel;
+    }
   }
   }
 }
 }
 
 
@@ -3522,6 +3536,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
 
 
   coeffs = coefficient_group;
   coeffs = coefficient_group;
   contribs = contributors;
   contribs = contributors;
+
   for (n = 0; n < num_contributors; n++)
   for (n = 0; n < num_contributors; n++)
   {
   {
     int i;
     int i;
@@ -3561,7 +3576,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
         int endi = contribs->n1;
         int endi = contribs->n1;
         contribs->n1 = input_last_n1;
         contribs->n1 = input_last_n1;
         for( i = input_size; i <= endi; i++ )
         for( i = input_size; i <= endi; i++ )
-          stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start] );
+          stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start], coefficient_width );
       }
       }
 
 
       // now check left hand edge
       // now check left hand edge
@@ -3573,7 +3588,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
 
 
         // reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist)
         // reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist)
         for( i = -1 ; i > contribs->n0 ; i-- )
         for( i = -1 ; i > contribs->n0 ; i-- )
-          stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c-- );
+          stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c--, coefficient_width );
         save_n0 = contribs->n0;
         save_n0 = contribs->n0;
         save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)!
         save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)!
 
 
@@ -3583,7 +3598,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
           coeffs[i] = coeffs[i-save_n0];
           coeffs[i] = coeffs[i-save_n0];
 
 
         // now that we have shrunk down the contribs, we insert the first one safely
         // now that we have shrunk down the contribs, we insert the first one safely
-        stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff );
+        stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff, coefficient_width );
       }
       }
     }
     }
 
 
@@ -3592,6 +3607,7 @@ static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter
       int diff = contribs->n1 - contribs->n0 + 1;
       int diff = contribs->n1 - contribs->n0 + 1;
       while ( diff && ( coeffs[ diff-1 ] == 0.0f ) )
       while ( diff && ( coeffs[ diff-1 ] == 0.0f ) )
         --diff;
         --diff;
+
       contribs->n1 = contribs->n0 + diff - 1;
       contribs->n1 = contribs->n0 + diff - 1;
 
 
       if ( contribs->n0 <= contribs->n1 )
       if ( contribs->n0 <= contribs->n1 )
@@ -3964,7 +3980,7 @@ static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * ot
               }
               }
               else
               else
               {
               {
-                stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc );
+                stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc, scatter_coefficient_width );
               }
               }
               STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width );
               STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width );
             }
             }
@@ -4810,12 +4826,13 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
     stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
     stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
 
 
 #define stbir__1_coeff_remnant( ofs )                \
 #define stbir__1_coeff_remnant( ofs )                \
-    { stbir__simdf t;                                \
+    { stbir__simdf t,d;                              \
     stbir__simdf_load1z( t, hc + (ofs) );            \
     stbir__simdf_load1z( t, hc + (ofs) );            \
+    stbir__simdf_load2( d, decode + (ofs) * 2 );     \
     stbir__simdf_0123to0011( t, t );                 \
     stbir__simdf_0123to0011( t, t );                 \
-    stbir__simdf_mult_mem( t, t, decode+(ofs)*2 );   \
+    stbir__simdf_mult( t, t, d );                    \
     stbir__simdf8_add4( tot0, tot0, t ); }
     stbir__simdf8_add4( tot0, tot0, t ); }
-
+ 
 #define stbir__2_coeff_remnant( ofs )                \
 #define stbir__2_coeff_remnant( ofs )                \
     { stbir__simdf t;                                \
     { stbir__simdf t;                                \
     stbir__simdf_load2( t, hc + (ofs) );             \
     stbir__simdf_load2( t, hc + (ofs) );             \
@@ -7112,6 +7129,11 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
 
 
 #ifdef STBIR__SEPARATE_ALLOCATIONS
 #ifdef STBIR__SEPARATE_ALLOCATIONS
       temp_mem_amt = decode_buffer_size;
       temp_mem_amt = decode_buffer_size;
+
+      #ifdef STBIR_SIMD8
+      if ( effective_channels == 3 )
+        --temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer
+      #endif
 #else
 #else
       temp_mem_amt = ( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * splits;
       temp_mem_amt = ( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * splits;
 #endif
 #endif
@@ -7217,6 +7239,12 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
         int t, ofs, start;
         int t, ofs, start;
 
 
         ofs = decode_buffer_size / 4;
         ofs = decode_buffer_size / 4;
+
+        #if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
+        if ( effective_channels == 3 ) 
+          --ofs; // avx in 3 channel mode needs one float at the start of the buffer, so we snap back for clearing
+        #endif
+
         start = ofs - 4;
         start = ofs - 4;
         if ( start < 0 ) start = 0;
         if ( start < 0 ) start = 0;