/// @param bmType This is a file extension to describe the type of the data [i.e. "png" for PNG file, etc]
/// @param ioStream The stream to read from
- /// @param compressionLevel Image format-specific compression level. If set to U32_MAX, we use the default compression defined when the format was registered.
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+ int i;
+ for (i=0; i < ncomp; ++i) {
+ if (z->img_comp[i].raw_data) {
+ STBI_FREE(z->img_comp[i].raw_data);
+ z->img_comp[i].raw_data = NULL;
+ z->img_comp[i].data = NULL;
+ }
+ if (z->img_comp[i].raw_coeff) {
+ STBI_FREE(z->img_comp[i].raw_coeff);
+ z->img_comp[i].raw_coeff = 0;
+ z->img_comp[i].coeff = 0;
+ }
+ if (z->img_comp[i].linebuf) {
+ STBI_FREE(z->img_comp[i].linebuf);
+ z->img_comp[i].linebuf = NULL;
+ }
+ }
+ return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
+{
+ stbi__context *s = z->s;
+ int Lf,p,i,q, h_max=1,v_max=1,c;
+ Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+ p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+ s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+static int stbi__parse_huffman_block(stbi__zbuf *a)
+{
+ char *zout = a->zout;
+ for(;;) {
+ int z = stbi__zhuffman_decode(a, &a->z_length);
+ if (z < 256) {
+ if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
+ if (zout >= a->zout_end) {
+ if (!stbi__zexpand(a, zout, 1)) return 0;
+ zout = a->zout;
+ }
+ *zout++ = (char) z;
+ } else {
+ stbi_uc *p;
+ int len,dist;
+ if (z == 256) {
+ a->zout = zout;
+ return 1;
+ }
+ if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
+ z -= 257;
+ len = stbi__zlength_base[z];
+ if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+ z = stbi__zhuffman_decode(a, &a->z_distance);
+ if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
+ dist = stbi__zdist_base[z];
+ if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+ int bytes = (depth == 16? 2 : 1);
+ stbi__context *s = a->s;
+ stbi__uint32 i,j,stride = x*out_n*bytes;
+ stbi__uint32 img_len, img_width_bytes;
+ int k;
+ int img_n = s->img_n; // copy it into a local for later
+STBIRDEF int stbir_set_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets both sub-regions (full regions by default)
+STBIRDEF int stbir_set_input_subrect( STBIR_RESIZE * resize, double s0, double t0, double s1, double t1 ); // sets input sub-region (full region by default)
+STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets output sub-region (full region by default)
+
+// when inputting AND outputting non-premultiplied alpha pixels, we use a slower but higher quality technique
+// that fills the zero alpha pixel's RGB values with something plausible. If you don't care about areas of
+// zero alpha, you can call this function to get about a 25% speed improvement for STBIR_RGBA to STBIR_RGBA
+// types of resizes.
+STBIRDEF int stbir_set_non_pm_alpha_speed_over_quality( STBIR_RESIZE * resize, int non_pma_alpha_speed_over_quality );
+ int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
+ int ring_buffer_num_entries; // Total number of entries in the ring buffer.
+
+ stbir_datatype input_type;
+ stbir_datatype output_type;
+
+ stbir_input_callback * in_pixels_cb;
+ void * user_data;
+ stbir_output_callback * out_pixels_cb;
+
+ stbir__extents scanline_extents;
+
+ void * alloced_mem;
+ stbir__per_split_info * split_info; // by default 1, but there will be N of these allocated based on the thread init you did
+#define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win.
+#endif
+
+// restrict pointers for the output pointers
+#if defined( _MSC_VER ) && !defined(__clang__)
+ #define STBIR_STREAMOUT_PTR( star ) star __restrict
+ #define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop
+#elif defined( __clang__ )
+ #define STBIR_STREAMOUT_PTR( star ) star __restrict__
+ #define stbir__simdf_load2( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values can be random (not denormal or nan for perf)
+ #define stbir__simdf_load2z( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values must be zero
+ a = _mm256_permute4x64_epi64( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg),(0<<0)+(2<<2)+(1<<4)+(3<<6)), zero ),(0<<0)+(2<<2)+(1<<4)+(3<<6)); \
+ #define stbir__simdf8_load2( out, ptr ) (out) = _mm256_castsi256_ps(_mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)(ptr)) )) // top values can be random (not denormal or nan for perf)
+ #define stbir__simdf_load2( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values can be random (not denormal or nan for perf)
+ #define stbir__simdf_load2z( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values must be zero
+ #define stbir__simdf_load1( out, ptr ) (out) = wasm_v128_load32_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf)
+ #define stbir__simdf_load2( out, ptr ) (out) = wasm_v128_load64_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf)
+ #define stbir__simdf_load2z( out, ptr ) (out) = wasm_v128_load64_zero( (void const*)(ptr) ) // top values must be zero
+static int stbir__pack_coefficients( int num_contributors, stbir__contributors* contributors, float * coefficents, int coefficient_width, int widest, int row_width )
+ // convert the pixels info the float decode_buffer, (we index from end_decode, so that when channels<effective_channels, we are right justified in the buffer)
+static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n, int contrib_n0, int contrib_n1, float const * vertical_coefficients )
+ // for non-edge-wrap modes, we never read over the edge, so clamp
+ if ( range->n0 < 0 )
+ range->n0 = 0;
+ if ( range->n1 >= input_full_size )
+ range->n1 = input_full_size - 1;
+ }
+}
+
+static void stbir__get_split_info( stbir__per_split_info* split_info, int splits, int output_height, int vertical_pixel_margin, int input_full_height )
+{
+ int i, cur;
+ int left = output_height;
+
+ cur = 0;
+ for( i = 0 ; i < splits ; i++ )
+ {
+ int each;
+ split_info[i].start_output_y = cur;
+ each = left / ( splits - i );
+ split_info[i].end_output_y = cur + each;
+ cur += each;
+ left -= each;
+
+ // scatter range (updated to minimum as you run it)
+// Figure out whether to scale along the horizontal or vertical first.
+// This only *super* important when you are scaling by a massively
+// different amount in the vertical vs the horizontal (for example, if
+// you are scaling by 2x in the width, and 0.5x in the height, then you
+// want to do the vertical scale first, because it's around 3x faster
+// in that order.
+//
+// In more normal circumstances, this makes a 20-40% differences, so
+// it's good to get right, but not critical. The normal way that you
+// decide which direction goes first is just figuring out which
+// direction does more multiplies. But with modern CPUs with their
+// fancy caches and SIMD and high IPC abilities, so there's just a lot
+// more that goes into it.
+//
+// My handwavy sort of solution is to have an app that does a whole
+// bunch of timing for both vertical and horizontal first modes,
+// and then another app that can read lots of these timing files
+// and try to search for the best weights to use. Dotimings.c
+// is the app that does a bunch of timings, and vf_train.c is the
+// app that solves for the best weights (and shows how well it
+// does currently).
+
+static int stbir__should_do_vertical_first( float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4], int horizontal_filter_pixel_width, float horizontal_scale, int horizontal_output_size, int vertical_filter_pixel_width, float vertical_scale, int vertical_output_size, int is_gather, STBIR__V_FIRST_INFO * info )
+ // but if the number of coeffs <= 12, use another set of special cases. <=12 coeffs is any enlarging resize, or shrinking resize down to about 1/3 size
+ // a few of the horizontal gather functions read one dword past the end (but mask it out), so put in a normal value so no snans or denormals accidentally sneak in
+ for( i = 0 ; i < splits ; i++ )
+ {
+ int width, ofs;
+
+ // find the right most span
+ if ( info->scanline_extents.spans[0].n1 > info->scanline_extents.spans[1].n1 )
+static void stbir__clip( int * outx, int * outsubw, int outw, double * u0, double * u1 )
+{
+ double per, adj;
+ int over;
+
+ // do left/top edge
+ if ( *outx < 0 )
+ {
+ per = ( (double)*outx ) / ( (double)*outsubw ); // is negative
+ adj = per * ( *u1 - *u0 );
+ *u0 -= adj; // increases u0
+ *outx = 0;
+ }
+
+ // do right/bot edge
+ over = outw - ( *outx + *outsubw );
+ if ( over < 0 )
+ {
+ per = ( (double)over ) / ( (double)*outsubw ); // is negative
+ adj = per * ( *u1 - *u0 );
+ *u1 += adj; // decrease u1
+ *outsubw = outw - *outx;
+ }
+}
+
+// converts a double to a rational that has less than one float bit of error (returns 0 if unable to do so)
+static int stbir__double_to_rational(double f, stbir_uint32 limit, stbir_uint32 *numer, stbir_uint32 *denom, int limit_denom ) // limit_denom (1) or limit numer (0)
+{
+ double err;
+ stbir_uint64 top, bot;
+ stbir_uint64 numer_last = 0;
+ stbir_uint64 denom_last = 1;
+ stbir_uint64 numer_estimate = 1;
+ stbir_uint64 denom_estimate = 0;
+
+ // scale to past float error range
+ top = (stbir_uint64)( f * (double)(1 << 25) );
+ bot = 1 << 25;
+
+ // keep refining, but usually stops in a few loops - usually 5 for bad cases
+ for(;;)
+ {
+ stbir_uint64 est, temp;
+
+ // hit limit, break out and do best full range estimate
+static int stbir__calculate_region_transform( stbir__scale_info * scale_info, int output_full_range, int * output_offset, int output_sub_range, int input_full_range, double input_s0, double input_s1 )
+STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ) // sets input region (full region by default)
+static int stbir__check_output_stuff( void ** ret_ptr, int * ret_pitch, void * output_pixels, int type_size, int output_w, int output_h, int output_stride_in_bytes, stbir_internal_pixel_layout pixel_layout )
+STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+
+STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
+static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
+static void stbiw__wpcrc(unsigned char **data, int len)
+{
+ unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
+ stbiw__wp32(*data, crc);
+}
+
+static unsigned char stbiw__paeth(int a, int b, int c)
+{
+ int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
+ if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
+ if (pb <= pc) return STBIW_UCHAR(b);
+ return STBIW_UCHAR(c);
+}
+
+// @OPTIMIZE: provide an option that always forces left-predict or paeth predict
+static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
+ // The rotator is modified from fig 4-8 to avoid extra negations.
+ z5 = (tmp10 - tmp12) * 0.382683433f; // c6
+ z2 = tmp10 * 0.541196100f + z5; // c2-c6
+ z4 = tmp12 * 1.306562965f + z5; // c2+c6
+ z3 = tmp11 * 0.707106781f; // c4
+
+ z11 = tmp7 + z3; // phase 5
+ z13 = tmp7 - z3;
+
+ *d5p = z13 + z2; // phase 6
+ *d3p = z13 - z2;
+ *d1p = z11 + z4;
+ *d7p = z11 - z4;
+
+ *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6;
+}
+
+static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
+ int tmp1 = val < 0 ? -val : val;
+ val = val < 0 ? val-1 : val;
+ bits[1] = 1;
+ while(tmp1 >>= 1) {
+ ++bits[1];
+ }
+ bits[0] = val & ((1<<bits[1])-1);
+}
+
+static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, int du_stride, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
+ const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
+ const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };