stbirtest.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. //#define HEAVYTM
  5. #include "tm.h"
  6. #ifdef RADUSETM3
  7. tm_api * g_tm_api;
  8. //#define PROFILE_MODE
  9. #endif
  10. #include <math.h>
  11. #ifdef _MSC_VER
  12. #define stop() __debugbreak()
  13. #include <windows.h>
  14. #define int64 __int64
  15. #define uint64 unsigned __int64
  16. #else
  17. #define stop() __builtin_trap()
  18. #define int64 long long
  19. #define uint64 unsigned long long
  20. #endif
  21. #ifdef _MSC_VER
  22. #pragma warning(disable:4127)
  23. #endif
  24. //#define NOCOMP
  25. //#define PROFILE_NEW_ONLY
  26. //#define PROFILE_MODE
  27. #if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ )
  28. #ifdef _MSC_VER
  29. uint64 __rdtsc();
  30. #define __cycles() __rdtsc()
  31. #else // non msvc
  32. static inline uint64 __cycles()
  33. {
  34. unsigned int lo, hi;
  35. asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) );
  36. return ( ( (uint64) hi ) << 32 ) | ( (uint64) lo );
  37. }
  38. #endif // msvc
  39. #elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__)
  40. #ifdef _MSC_VER
  41. #define __cycles() _ReadStatusReg(ARM64_CNTVCT)
  42. #else
  43. static inline uint64 __cycles()
  44. {
  45. uint64 tsc;
  46. asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
  47. return tsc;
  48. }
  49. #endif
  50. #else // x64, arm
  51. #error Unknown platform for timing.
  52. #endif //x64 and
  53. #ifdef PROFILE_MODE
  54. #define STBIR_ASSERT(cond)
  55. #endif
  56. #ifdef _DEBUG
  57. #undef STBIR_ASSERT
  58. #define STBIR_ASSERT(cond) { if (!(cond)) stop(); }
  59. #endif
  60. #define SHRINKBYW 2
  61. #define ZOOMBYW 2
  62. #define SHRINKBYH 2
  63. #define ZOOMBYH 2
  64. int mem_count = 0;
  65. #ifdef TEST_WITH_VALLOC
  66. #define STBIR__SEPARATE_ALLOCATIONS
  67. #if TEST_WITH_LIMIT_AT_FRONT
  68. void * wmalloc(SIZE_T size)
  69. {
  70. static unsigned int pagesize=0;
  71. void* p;
  72. SIZE_T s;
  73. // get the page size, if we haven't yet
  74. if (pagesize==0)
  75. {
  76. SYSTEM_INFO si;
  77. GetSystemInfo(&si);
  78. pagesize=si.dwPageSize;
  79. }
  80. // we need room for the size, 8 bytes to hide the original pointer and a
  81. // validation dword, and enough data to completely fill one page
  82. s=(size+(pagesize-1))&~(pagesize-1);
  83. // allocate the size plus a page (for the guard)
  84. p=VirtualAlloc(0,(SIZE_T)s,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
  85. return p;
  86. }
  87. void wfree(void * ptr)
  88. {
  89. if (ptr)
  90. {
  91. if ( ((ptrdiff_t)ptr) & 4095 ) stop();
  92. if ( VirtualFree(ptr,0,MEM_RELEASE) == 0 ) stop();
  93. }
  94. }
  95. #else
  96. void * wmalloc(SIZE_T size)
  97. {
  98. static unsigned int pagesize=0;
  99. void* p;
  100. SIZE_T s;
  101. // get the page size, if we haven't yet
  102. if (pagesize==0)
  103. {
  104. SYSTEM_INFO si;
  105. GetSystemInfo(&si);
  106. pagesize=si.dwPageSize;
  107. }
  108. // we need room for the size, 8 bytes to hide the original pointer and a
  109. // validation dword, and enough data to completely fill one page
  110. s=(size+16+(pagesize-1))&~(pagesize-1);
  111. // allocate the size plus a page (for the guard)
  112. p=VirtualAlloc(0,(SIZE_T)(s+pagesize+pagesize),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
  113. if (p)
  114. {
  115. DWORD oldprot;
  116. void* orig=p;
  117. // protect the first page
  118. VirtualProtect(((char*)p),pagesize,PAGE_NOACCESS,&oldprot);
  119. // protect the final page
  120. VirtualProtect(((char*)p)+s+pagesize,pagesize,PAGE_NOACCESS,&oldprot);
  121. // now move the returned pointer so that it bumps right up against the
  122. // the next (protected) page (this may result in unaligned return
  123. // addresses - pre-align the sizes if you always want aligned ptrs)
  124. //#define ERROR_ON_FRONT
  125. #ifdef ERROR_ON_FRONT
  126. p=((char*)p)+pagesize+16;
  127. #else
  128. p=((char*)p)+(s-size)+pagesize;
  129. #endif
  130. // hide the validation value and the original pointer (which we'll
  131. // need used for freeing) right behind the returned pointer
  132. ((unsigned int*)p)[-1]=0x98765432;
  133. ((void**)p)[-2]=orig;
  134. ++mem_count;
  135. //printf("aloc: %p bytes: %d\n",p,(int)size);
  136. return(p);
  137. }
  138. return 0;
  139. }
  140. void wfree(void * ptr)
  141. {
  142. if (ptr)
  143. {
  144. int err=0;
  145. // is this one of our allocations?
  146. if (((((unsigned int*)ptr)[-1])!=0x98765432) || ((((void**)ptr)[-2])==0))
  147. {
  148. err=1;
  149. }
  150. if (err)
  151. {
  152. __debugbreak();
  153. }
  154. else
  155. {
  156. // back up to find the original pointer
  157. void* p=((void**)ptr)[-2];
  158. // clear the validation value and the original pointer
  159. ((unsigned int*)ptr)[-1]=0;
  160. ((void**)ptr)[-2]=0;
  161. //printf("free: %p\n",ptr);
  162. --mem_count;
  163. // now free the pages
  164. if (p)
  165. VirtualFree(p,0,MEM_RELEASE);
  166. }
  167. }
  168. }
  169. #endif
  170. #define STBIR_MALLOC(size,user_data) ((void)(user_data), wmalloc(size))
  171. #define STBIR_FREE(ptr,user_data) ((void)(user_data), wfree(ptr))
  172. #endif
  173. #define STBIR_PROFILE
  174. //#define STBIR_NO_SIMD
  175. //#define STBIR_AVX
  176. //#define STBIR_AVX2
  177. #define STB_IMAGE_RESIZE_IMPLEMENTATION
  178. #include "stb_image_resize2.h" // new one!
  179. #define STB_IMAGE_WRITE_IMPLEMENTATION
  180. #include "stb_image_write.h"
  181. int tsizes[5] = { 1, 1, 2, 4, 2 };
  182. int ttypes[5] = { STBIR_TYPE_UINT8, STBIR_TYPE_UINT8_SRGB, STBIR_TYPE_UINT16, STBIR_TYPE_FLOAT, STBIR_TYPE_HALF_FLOAT };
  183. int cedges[4] = { STBIR_EDGE_CLAMP, STBIR_EDGE_REFLECT, STBIR_EDGE_ZERO, STBIR_EDGE_WRAP };
  184. int flts[5] = { STBIR_FILTER_BOX, STBIR_FILTER_TRIANGLE, STBIR_FILTER_CUBICBSPLINE, STBIR_FILTER_CATMULLROM, STBIR_FILTER_MITCHELL };
  185. int buffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
  186. STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
  187. STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
  188. STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
  189. STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
  190. };
  191. int obuffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
  192. STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
  193. STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
  194. STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
  195. STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
  196. };
  197. int bchannels[20] = { 1, 2, 3, 4, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2 };
  198. int alphapos[20] = { -1, -1, -1, -1, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0,3,0, 1,0 };
  199. char const * buffstrs[20] = { "1ch", "2ch", "3ch", "4ch", "RGBA", "ARGB", "RA", "AR", "RGBA_both_pre", "ARGB_both_pre", "RA_both_pre", "AR_both_pre", "RGBA_out_pre", "ARGB_out_pre", "RA_out_pre", "AR_out_pre", "RGBA_in_pre", "ARGB_in_pre", "RA_in_pre", "AR_in_pre" };
  200. char const * typestrs[5] = { "Bytes", "BytesSRGB", "Shorts", "Floats", "Half Floats"};
  201. char const * edgestrs[4] = { "Clamp", "Reflect", "Zero", "Wrap" };
  202. char const * fltstrs[5] = { "Box", "Triangle", "Cubic", "Catmullrom", "Mitchell" };
  203. #ifdef STBIR_PROFILE
  204. static void do_acc_zones( STBIR_PROFILE_INFO * profile )
  205. {
  206. stbir_uint32 j;
  207. stbir_uint64 start = tmGetAccumulationStart( tm_mask ); start=start;
  208. for( j = 0 ; j < profile->count ; j++ )
  209. {
  210. if ( profile->clocks[j] )
  211. tmEmitAccumulationZone( 0, 0, (tm_uint64*)&start, 0, profile->clocks[j], profile->descriptions[j] );
  212. }
  213. }
  214. #else
  215. #define do_acc_zones(...)
  216. #endif
  217. int64 vert;
  218. //#define WINTHREADTEST
  219. #ifdef WINTHREADTEST
  220. static STBIR_RESIZE * thread_resize;
  221. static LONG which;
  222. static int threads_started = 0;
  223. static HANDLE threads[32];
  224. static HANDLE starts,stops;
  225. static DWORD resize_shim( LPVOID p )
  226. {
  227. for(;;)
  228. {
  229. LONG wh;
  230. WaitForSingleObject( starts, INFINITE );
  231. wh = InterlockedAdd( &which, 1 ) - 1;
  232. ENTER( "Split %d", wh );
  233. stbir_resize_split( thread_resize, wh, 1 );
  234. #ifdef STBIR_PROFILE
  235. { STBIR_PROFILE_INFO profile; stbir_resize_split_profile_info( &profile, thread_resize, wh, 1 ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
  236. #endif
  237. LEAVE();
  238. ReleaseSemaphore( stops, 1, 0 );
  239. }
  240. }
  241. #endif
  242. void nresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt )
  243. {
  244. STBIR_RESIZE resize;
  245. stbir_resize_init( &resize, i, ix, iy, ip, o, ox, oy, op, buffers[buf], ttypes[type] );
  246. stbir_set_pixel_layouts( &resize, buffers[buf], obuffers[buf] );
  247. stbir_set_edgemodes( &resize, cedges[edg], cedges[edg] );
  248. stbir_set_filters( &resize, flts[flt], /*STBIR_FILTER_POINT_SAMPLE */ flts[flt] );
  249. //stbir_set_input_subrect( &resize, 0.55f,0.333f,0.75f,0.50f);
  250. //stbir_set_output_pixel_subrect( &resize, 00, 00, ox/2,oy/2);
  251. //stbir_set_pixel_subrect(&resize, 1430,1361,30,30);
  252. ENTER( "Resize" );
  253. #ifndef WINTHREADTEST
  254. ENTER( "Filters" );
  255. stbir_build_samplers_with_splits( &resize, 1 );
  256. #ifdef STBIR_PROFILE
  257. { STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
  258. #endif
  259. LEAVE();
  260. ENTER( "Resize" );
  261. if(!stbir_resize_extended( &resize ) )
  262. stop();
  263. #ifdef STBIR_PROFILE
  264. { STBIR_PROFILE_INFO profile; stbir_resize_extended_profile_info( &profile, &resize ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
  265. #endif
  266. LEAVE();
  267. #else
  268. {
  269. int c, cnt;
  270. ENTER( "Filters" );
  271. cnt = stbir_build_samplers_with_splits( &resize, 4 );
  272. #ifdef STBIR_PROFILE
  273. { STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
  274. #endif
  275. LEAVE();
  276. ENTER( "Thread start" );
  277. if ( threads_started == 0 )
  278. {
  279. starts = CreateSemaphore( 0, 0, 32, 0 );
  280. stops = CreateSemaphore( 0, 0, 32, 0 );
  281. }
  282. for( c = threads_started ; c < cnt ; c++ )
  283. threads[ c ] = CreateThread( 0, 2048*1024, resize_shim, 0, 0, 0 );
  284. threads_started = cnt;
  285. thread_resize = &resize;
  286. which = 0;
  287. LEAVE();
  288. // starts the threads
  289. ReleaseSemaphore( starts, cnt, 0 );
  290. ENTER( "Wait" );
  291. for( c = 0 ; c < cnt; c++ )
  292. WaitForSingleObject( stops, INFINITE );
  293. LEAVE();
  294. }
  295. #endif
  296. ENTER( "Free" );
  297. stbir_free_samplers( &resize );
  298. LEAVE();
  299. LEAVE();
  300. }
  301. #define STB_IMAGE_IMPLEMENTATION
  302. #include "stb_image.h"
  303. extern void oresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt );
  304. #define TYPESTART 0
  305. #define TYPEEND 4
  306. #define LAYOUTSTART 0
  307. #define LAYOUTEND 19
  308. #define SIZEWSTART 0
  309. #define SIZEWEND 2
  310. #define SIZEHSTART 0
  311. #define SIZEHEND 2
  312. #define EDGESTART 0
  313. #define EDGEEND 3
  314. #define FILTERSTART 0
  315. #define FILTEREND 4
  316. #define HEIGHTSTART 0
  317. #define HEIGHTEND 2
  318. #define WIDTHSTART 0
  319. #define WIDTHEND 2
  320. static void * convert8to16( unsigned char * i, int w, int h, int c )
  321. {
  322. unsigned short * ret;
  323. int p;
  324. ret = malloc( w*h*c*sizeof(short) );
  325. for(p = 0 ; p < (w*h*c) ; p++ )
  326. {
  327. ret[p]=(short)((((int)i[p])<<8)+i[p]);
  328. }
  329. return ret;
  330. }
  331. static void * convert8tof( unsigned char * i, int w, int h, int c )
  332. {
  333. float * ret;
  334. int p;
  335. ret = malloc( w*h*c*sizeof(float) );
  336. for(p = 0 ; p < (w*h*c) ; p++ )
  337. {
  338. ret[p]=((float)i[p])*(1.0f/255.0f);
  339. }
  340. return ret;
  341. }
  342. static void * convert8tohf( unsigned char * i, int w, int h, int c )
  343. {
  344. stbir__FP16 * ret;
  345. int p;
  346. ret = malloc( w*h*c*sizeof(stbir__FP16) );
  347. for(p = 0 ; p < (w*h*c) ; p++ )
  348. {
  349. ret[p]=stbir__float_to_half(((float)i[p])*(1.0f/255.0f));
  350. }
  351. return ret;
  352. }
  353. static void * convert8tohff( unsigned char * i, int w, int h, int c )
  354. {
  355. float * ret;
  356. int p;
  357. ret = malloc( w*h*c*sizeof(float) );
  358. for(p = 0 ; p < (w*h*c) ; p++ )
  359. {
  360. ret[p]=stbir__half_to_float(stbir__float_to_half(((float)i[p])*(1.0f/255.0f)));
  361. }
  362. return ret;
  363. }
  364. static int isprime( int v )
  365. {
  366. int i;
  367. if ( v <= 3 )
  368. return ( v > 1 );
  369. if ( ( v & 1 ) == 0 )
  370. return 0;
  371. if ( ( v % 3 ) == 0 )
  372. return 0;
  373. i = 5;
  374. while ( (i*i) <= v )
  375. {
  376. if ( ( v % i ) == 0 )
  377. return 0;
  378. if ( ( v % ( i + 2 ) ) == 0 )
  379. return 0;
  380. i += 6;
  381. }
  382. return 1;
  383. }
  384. static int getprime( int v )
  385. {
  386. int i;
  387. i = 0;
  388. for(;;)
  389. {
  390. if ( i >= v )
  391. return v; // can't find any, just return orig
  392. if (isprime(v - i))
  393. return v - i;
  394. if (isprime(v + i))
  395. return v + i;
  396. ++i;
  397. }
  398. }
  399. int main( int argc, char ** argv )
  400. {
  401. int ix, iy, ic;
  402. unsigned char * input[6];
  403. char * ir1;
  404. char * ir2;
  405. int szhs[3];
  406. int szws[3];
  407. int aw, ah, ac;
  408. unsigned char * correctalpha;
  409. int layouts, types, heights, widths, edges, filters;
  410. if ( argc != 2 )
  411. {
  412. printf("command: stbirtest [imagefile]\n");
  413. exit(1);
  414. }
  415. SetupTM( "127.0.0.1" );
  416. correctalpha = stbi_load( "correctalpha.png", &aw, &ah, &ac, 0 );
  417. input[0] = stbi_load( argv[1], &ix, &iy, &ic, 0 );
  418. input[1] = input[0];
  419. input[2] = convert8to16( input[0], ix, iy, ic );
  420. input[3] = convert8tof( input[0], ix, iy, ic );
  421. input[4] = convert8tohf( input[0], ix, iy, ic );
  422. input[5] = convert8tohff( input[0], ix, iy, ic );
  423. printf("Input %dx%d (%d channels)\n",ix,iy,ic);
  424. ir1 = malloc( 4 * 4 * 3000 * 3000ULL );
  425. ir2 = malloc( 4 * 4 * 3000 * 3000ULL );
  426. szhs[0] = getprime( iy/SHRINKBYH );
  427. szhs[1] = iy;
  428. szhs[2] = getprime( iy*ZOOMBYH );
  429. szws[0] = getprime( ix/SHRINKBYW );
  430. szws[1] = ix;
  431. szws[2] = getprime( ix*ZOOMBYW );
  432. #if 1
  433. for( types = TYPESTART ; types <= TYPEEND ; types++ )
  434. #else
  435. for( types = 1 ; types <= 1 ; types++ )
  436. #endif
  437. {
  438. ENTER( "Test type: %s",typestrs[types]);
  439. #if 1
  440. for( layouts = LAYOUTSTART ; layouts <= LAYOUTEND ; layouts++ )
  441. #else
  442. for( layouts = 16; layouts <= 16 ; layouts++ )
  443. #endif
  444. {
  445. ENTER( "Test layout: %s",buffstrs[layouts]);
  446. #if 0
  447. for( heights = HEIGHTSTART ; heights <= HEIGHTEND ; heights++ )
  448. {
  449. int w, h = szhs[heights];
  450. #else
  451. for( heights = 0 ; heights <= 11 ; heights++ )
  452. {
  453. static int szhsz[12]={32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
  454. int w, h = szhsz[heights];
  455. #endif
  456. ENTER( "Test height: %d %s %d",iy,(h<iy)?"Down":((h>iy)?"Up":"Same"),h);
  457. #if 0
  458. for( widths = WIDTHSTART ; widths <= WIDTHEND ; widths++ )
  459. {
  460. w = szws[widths];
  461. #else
  462. for( widths = 0 ; widths <= 12 ; widths++ )
  463. {
  464. static int szwsz[13]={2, 32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
  465. w = szwsz[widths];
  466. #endif
  467. ENTER( "Test width: %d %s %d",ix, (w<ix)?"Down":((w>ix)?"Up":"Same"), w);
  468. #if 0
  469. for( edges = EDGESTART ; edges <= EDGEEND ; edges++ )
  470. #else
  471. for( edges = 0 ; edges <= 0 ; edges++ )
  472. #endif
  473. {
  474. ENTER( "Test edge: %s",edgestrs[edges]);
  475. #if 0
  476. for( filters = FILTERSTART ; filters <= FILTEREND ; filters++ )
  477. #else
  478. for( filters = 3 ; filters <= 3 ; filters++ )
  479. #endif
  480. {
  481. int op, opw, np,npw, c, a;
  482. #ifdef COMPARE_SAME
  483. int oldtypes = types;
  484. #else
  485. int oldtypes = (types==4)?3:types;
  486. #endif
  487. ENTER( "Test filter: %s",fltstrs[filters]);
  488. {
  489. c = bchannels[layouts];
  490. a = alphapos[layouts];
  491. op = w*tsizes[oldtypes]*c + 60;
  492. opw = w*tsizes[oldtypes]*c;
  493. np = w*tsizes[types]*c + 60;
  494. npw = w*tsizes[types]*c;
  495. printf( "%s:layout: %s w: %d h: %d edge: %s filt: %s\n", typestrs[types],buffstrs[layouts], w, h, edgestrs[edges], fltstrs[filters] );
  496. // clear pixel area to different, right edge to zero
  497. #ifndef NOCLEAR
  498. ENTER( "Test clear padding" );
  499. {
  500. int d;
  501. for( d = 0 ; d < h ; d++ )
  502. {
  503. int oofs = d * op;
  504. int nofs = d * np;
  505. memset( ir1 + oofs, 192, opw );
  506. memset( ir1 + oofs+opw, 79, op-opw );
  507. memset( ir2 + nofs, 255, npw );
  508. memset( ir2 + nofs+npw, 79, np-npw );
  509. }
  510. }
  511. LEAVE();
  512. #endif
  513. #ifdef COMPARE_SAME
  514. #define TIMINGS 1
  515. #else
  516. #define TIMINGS 1
  517. #endif
  518. ENTER( "Test both" );
  519. {
  520. #ifndef PROFILE_NEW_ONLY
  521. {
  522. int ttt, max = 0x7fffffff;
  523. ENTER( "Test old" );
  524. for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
  525. {
  526. int64 m = __cycles();
  527. oresize( ir1, w, h, op,
  528. #ifdef COMPARE_SAME
  529. input[types],
  530. #else
  531. input[(types==4)?5:types],
  532. #endif
  533. ix, iy, ix*ic*tsizes[oldtypes], layouts, oldtypes, edges, filters );
  534. m = __cycles() - m;
  535. if ( ( (int)m ) < max )
  536. max = (int) m;
  537. }
  538. LEAVE();
  539. printf("old: %d\n", max );
  540. }
  541. #endif
  542. {
  543. int ttt, max = 0x7fffffff, maxv = 0x7fffffff;
  544. ENTER( "Test new" );
  545. for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
  546. {
  547. int64 m = __cycles();
  548. nresize( ir2, w, h, np, input[types], ix, iy, ix*ic*tsizes[types], layouts, types, edges, filters );
  549. m = __cycles() - m;
  550. if ( ( (int)m ) < max )
  551. max = (int) m;
  552. if ( ( (int)vert ) < maxv )
  553. maxv = (int) vert;
  554. }
  555. LEAVE(); // test new
  556. printf("new: %d (v: %d)\n", max, maxv );
  557. }
  558. }
  559. LEAVE(); // test both
  560. if ( mem_count!= 0 )
  561. stop();
  562. #ifndef NOCOMP
  563. ENTER( "Test compare" );
  564. {
  565. int x,y,ch;
  566. int nums = 0;
  567. for( y = 0 ; y < h ; y++ )
  568. {
  569. for( x = 0 ; x < w ; x++ )
  570. {
  571. switch(types)
  572. {
  573. case 0:
  574. case 1: //SRGB
  575. {
  576. unsigned char * p1 = (unsigned char *)&ir1[y*op+x*c];
  577. unsigned char * p2 = (unsigned char *)&ir2[y*np+x*c];
  578. for( ch = 0 ; ch < c ; ch++ )
  579. {
  580. float pp1,pp2,d;
  581. float av = (a==-1)?1.0f:((float)p1[a]/255.0f);
  582. pp1 = p1[ch];
  583. pp2 = p2[ch];
  584. // compare in premult space
  585. #ifndef COMPARE_SAME
  586. if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >=16 ) && ( layouts <= 19 ) ) )
  587. {
  588. pp1 *= av;
  589. pp2 *= av;
  590. }
  591. #endif
  592. d = pp1 - pp2;
  593. if ( d < 0 ) d = -d;
  594. #ifdef COMPARE_SAME
  595. if ( d > 0 )
  596. #else
  597. if ( d > 1 )
  598. #endif
  599. {
  600. printf("Error at %d x %d (chan %d) (d: %g a: %g) [%d %d %d %d] [%d %d %d %d]\n",x,y,ch, d,av, p1[0],p1[1],p1[2],p1[3], p2[0],p2[1],p2[2],p2[3]);
  601. ++nums;
  602. if ( nums > 16 ) goto ex;
  603. //if (d) exit(1);
  604. //goto ex;
  605. }
  606. }
  607. }
  608. break;
  609. case 2:
  610. {
  611. unsigned short * p1 = (unsigned short *)&ir1[y*op+x*c*sizeof(short)];
  612. unsigned short * p2 = (unsigned short *)&ir2[y*np+x*c*sizeof(short)];
  613. for( ch = 0 ; ch < c ; ch++ )
  614. {
  615. float thres,pp1,pp2,d;
  616. float av = (a==-1)?1.0f:((float)p1[a]/65535.0f);
  617. pp1 = p1[ch];
  618. pp2 = p2[ch];
  619. // compare in premult space
  620. #ifndef COMPARE_SAME
  621. if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
  622. {
  623. pp1 *= av;
  624. pp2 *= av;
  625. }
  626. #endif
  627. d = pp1 - pp2;
  628. if ( d < 0 ) d = -d;
  629. thres=((float)p1[ch]*0.007f)+2.0f;
  630. if (thres<4) thres = 4;
  631. #ifdef COMPARE_SAME
  632. if ( d > 0 )
  633. #else
  634. if ( d > thres)
  635. #endif
  636. {
  637. printf("Error at %d x %d (chan %d) %d %d [df: %g th: %g al: %g] (%d %d %d %d) (%d %d %d %d)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
  638. ++nums;
  639. if ( nums > 16 ) goto ex;
  640. //if (d) exit(1);
  641. //goto ex;
  642. }
  643. }
  644. }
  645. break;
  646. case 3:
  647. {
  648. float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
  649. float * p2 = (float *)&ir2[y*np+x*c*sizeof(float)];
  650. for( ch = 0 ; ch < c ; ch++ )
  651. {
  652. float pp1 = p1[ch], pp2 = p2[ch];
  653. float av = (a==-1)?1.0f:p1[a];
  654. float thres, d;
  655. // clamp
  656. if (pp1<=0.0f) pp1 = 0;
  657. if (pp2<=0.0f) pp2 = 0;
  658. if (av<=0.0f) av = 0;
  659. if (pp1>1.0f) pp1 = 1.0f;
  660. if (pp2>1.0f) pp2 = 1.0f;
  661. if (av>1.0f) av = 1.0f;
  662. // compare in premult space
  663. #ifndef COMPARE_SAME
  664. if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
  665. {
  666. pp1 *= av;
  667. pp2 *= av;
  668. }
  669. #endif
  670. d = pp1 - pp2;
  671. if ( d < 0 ) d = -d;
  672. thres=(p1[ch]*0.002f)+0.0002f;
  673. if ( thres < 0 ) thres = -thres;
  674. #ifdef COMPARE_SAME
  675. if ( d != 0.0f )
  676. #else
  677. if ( d > thres )
  678. #endif
  679. {
  680. printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
  681. ++nums;
  682. if ( nums > 16 ) goto ex;
  683. //if (d) exit(1);
  684. //goto ex;
  685. }
  686. }
  687. }
  688. break;
  689. case 4:
  690. {
  691. #ifdef COMPARE_SAME
  692. stbir__FP16 * p1 = (stbir__FP16 *)&ir1[y*op+x*c*sizeof(stbir__FP16)];
  693. #else
  694. float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
  695. #endif
  696. stbir__FP16 * p2 = (stbir__FP16 *)&ir2[y*np+x*c*sizeof(stbir__FP16)];
  697. for( ch = 0 ; ch < c ; ch++ )
  698. {
  699. #ifdef COMPARE_SAME
  700. float pp1 = stbir__half_to_float(p1[ch]);
  701. float av = (a==-1)?1.0f:stbir__half_to_float(p1[a]);
  702. #else
  703. float pp1 = stbir__half_to_float(stbir__float_to_half(p1[ch]));
  704. float av = (a==-1)?1.0f:stbir__half_to_float(stbir__float_to_half(p1[a]));
  705. #endif
  706. float pp2 = stbir__half_to_float(p2[ch]);
  707. float d, thres;
  708. // clamp
  709. if (pp1<=0.0f) pp1 = 0;
  710. if (pp2<=0.0f) pp2 = 0;
  711. if (av<=0.0f) av = 0;
  712. if (pp1>1.0f) pp1 = 1.0f;
  713. if (pp2>1.0f) pp2 = 1.0f;
  714. if (av>1.0f) av = 1.0f;
  715. thres=(pp1*0.002f)+0.0002f;
  716. // compare in premult space
  717. #ifndef COMPARE_SAME
  718. if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
  719. {
  720. pp1 *= av;
  721. pp2 *= av;
  722. }
  723. #endif
  724. d = pp1 - pp2;
  725. if ( d < 0 ) d = -d;
  726. #ifdef COMPARE_SAME
  727. if ( d != 0.0f )
  728. #else
  729. if ( d > thres )
  730. #endif
  731. {
  732. printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch,
  733. #ifdef COMPARE_SAME
  734. stbir__half_to_float(p1[ch]),
  735. #else
  736. p1[ch],
  737. #endif
  738. stbir__half_to_float(p2[ch]),
  739. d,thres,av,
  740. #ifdef COMPARE_SAME
  741. stbir__half_to_float(p1[0]),stbir__half_to_float(p1[1]),stbir__half_to_float(p1[2]),stbir__half_to_float(p1[3]),
  742. #else
  743. p1[0],p1[1],p1[2],p1[3],
  744. #endif
  745. stbir__half_to_float(p2[0]),stbir__half_to_float(p2[1]),stbir__half_to_float(p2[2]),stbir__half_to_float(p2[3]) );
  746. ++nums;
  747. if ( nums > 16 ) goto ex;
  748. //if (d) exit(1);
  749. //goto ex;
  750. }
  751. }
  752. }
  753. break;
  754. }
  755. }
  756. for( x = (w*c)*tsizes[oldtypes]; x < op; x++ )
  757. {
  758. if ( ir1[y*op+x] != 79 )
  759. {
  760. printf("Margin error at %d x %d %d (should be 79) OLD!\n",x,y,(unsigned char)ir1[y*op+x]);
  761. goto ex;
  762. }
  763. }
  764. for( x = (w*c)*tsizes[types]; x < np; x++ )
  765. {
  766. if ( ir2[y*np+x] != 79 )
  767. {
  768. printf("Margin error at %d x %d %d (should be 79) NEW\n",x,y,(unsigned char)ir2[y*np+x]);
  769. goto ex;
  770. }
  771. }
  772. }
  773. ex:
  774. ENTER( "OUTPUT IMAGES" );
  775. printf(" tot pix: %d, errs: %d\n", w*h*c,nums );
  776. if (nums)
  777. {
  778. stbi_write_png("old.png", w, h, c, ir1, op);
  779. stbi_write_png("new.png", w, h, c, ir2, np);
  780. exit(1);
  781. }
  782. LEAVE(); // output images
  783. }
  784. LEAVE(); //test compare
  785. #endif
  786. }
  787. LEAVE(); // test filter
  788. }
  789. LEAVE(); // test edge
  790. }
  791. LEAVE(); // test width
  792. }
  793. LEAVE(); // test height
  794. }
  795. LEAVE(); // test type
  796. }
  797. LEAVE(); // test layout
  798. }
  799. CloseTM();
  800. return 0;
  801. }