jcdctmgr.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /*
  2. * jcdctmgr.c
  3. *
  4. * This file was part of the Independent JPEG Group's software:
  5. * Copyright (C) 1994-1996, Thomas G. Lane.
  6. * libjpeg-turbo Modifications:
  7. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  8. * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
  9. * Copyright (C) 2011 D. R. Commander
  10. * For conditions of distribution and use, see the accompanying README file.
  11. *
  12. * This file contains the forward-DCT management logic.
  13. * This code selects a particular DCT implementation to be used,
  14. * and it performs related housekeeping chores including coefficient
  15. * quantization.
  16. */
  17. #define JPEG_INTERNALS
  18. #include "jinclude.h"
  19. #include "jpeglib.h"
  20. #include "jdct.h" /* Private declarations for DCT subsystem */
  21. #include "jsimddct.h"
  22. /* Private subobject for this module */
  23. typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
  24. typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
  25. typedef JMETHOD(void, convsamp_method_ptr,
  26. (JSAMPARRAY sample_data, JDIMENSION start_col,
  27. DCTELEM * workspace));
  28. typedef JMETHOD(void, float_convsamp_method_ptr,
  29. (JSAMPARRAY sample_data, JDIMENSION start_col,
  30. FAST_FLOAT *workspace));
  31. typedef JMETHOD(void, quantize_method_ptr,
  32. (JCOEFPTR coef_block, DCTELEM * divisors,
  33. DCTELEM * workspace));
  34. typedef JMETHOD(void, float_quantize_method_ptr,
  35. (JCOEFPTR coef_block, FAST_FLOAT * divisors,
  36. FAST_FLOAT * workspace));
  37. METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
  38. typedef struct {
  39. struct jpeg_forward_dct pub; /* public fields */
  40. /* Pointer to the DCT routine actually in use */
  41. forward_DCT_method_ptr dct;
  42. convsamp_method_ptr convsamp;
  43. quantize_method_ptr quantize;
  44. /* The actual post-DCT divisors --- not identical to the quant table
  45. * entries, because of scaling (especially for an unnormalized DCT).
  46. * Each table is given in normal array order.
  47. */
  48. DCTELEM * divisors[NUM_QUANT_TBLS];
  49. /* work area for FDCT subroutine */
  50. DCTELEM * workspace;
  51. #ifdef DCT_FLOAT_SUPPORTED
  52. /* Same as above for the floating-point case. */
  53. float_DCT_method_ptr float_dct;
  54. float_convsamp_method_ptr float_convsamp;
  55. float_quantize_method_ptr float_quantize;
  56. FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  57. FAST_FLOAT * float_workspace;
  58. #endif
  59. } my_fdct_controller;
  60. typedef my_fdct_controller * my_fdct_ptr;
  61. /*
  62. * Find the highest bit in an integer through binary search.
  63. */
  64. LOCAL(int)
  65. flss (UINT16 val)
  66. {
  67. int bit;
  68. bit = 16;
  69. if (!val)
  70. return 0;
  71. if (!(val & 0xff00)) {
  72. bit -= 8;
  73. val <<= 8;
  74. }
  75. if (!(val & 0xf000)) {
  76. bit -= 4;
  77. val <<= 4;
  78. }
  79. if (!(val & 0xc000)) {
  80. bit -= 2;
  81. val <<= 2;
  82. }
  83. if (!(val & 0x8000)) {
  84. bit -= 1;
  85. val <<= 1;
  86. }
  87. return bit;
  88. }
  89. /*
  90. * Compute values to do a division using reciprocal.
  91. *
  92. * This implementation is based on an algorithm described in
  93. * "How to optimize for the Pentium family of microprocessors"
  94. * (http://www.agner.org/assem/).
  95. * More information about the basic algorithm can be found in
  96. * the paper "Integer Division Using Reciprocals" by Robert Alverson.
  97. *
  98. * The basic idea is to replace x/d by x * d^-1. In order to store
  99. * d^-1 with enough precision we shift it left a few places. It turns
  100. * out that this algoright gives just enough precision, and also fits
  101. * into DCTELEM:
  102. *
  103. * b = (the number of significant bits in divisor) - 1
  104. * r = (word size) + b
  105. * f = 2^r / divisor
  106. *
  107. * f will not be an integer for most cases, so we need to compensate
  108. * for the rounding error introduced:
  109. *
  110. * no fractional part:
  111. *
  112. * result = input >> r
  113. *
  114. * fractional part of f < 0.5:
  115. *
  116. * round f down to nearest integer
  117. * result = ((input + 1) * f) >> r
  118. *
  119. * fractional part of f > 0.5:
  120. *
  121. * round f up to nearest integer
  122. * result = (input * f) >> r
  123. *
  124. * This is the original algorithm that gives truncated results. But we
  125. * want properly rounded results, so we replace "input" with
  126. * "input + divisor/2".
  127. *
  128. * In order to allow SIMD implementations we also tweak the values to
  129. * allow the same calculation to be made at all times:
  130. *
  131. * dctbl[0] = f rounded to nearest integer
  132. * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
  133. * dctbl[2] = 1 << ((word size) * 2 - r)
  134. * dctbl[3] = r - (word size)
  135. *
  136. * dctbl[2] is for stupid instruction sets where the shift operation
  137. * isn't member wise (e.g. MMX).
  138. *
  139. * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
  140. * is that most SIMD implementations have a "multiply and store top
  141. * half" operation.
  142. *
  143. * Lastly, we store each of the values in their own table instead
  144. * of in a consecutive manner, yet again in order to allow SIMD
  145. * routines.
  146. */
  147. LOCAL(int)
  148. compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
  149. {
  150. UDCTELEM2 fq, fr;
  151. UDCTELEM c;
  152. int b, r;
  153. b = flss(divisor) - 1;
  154. r = sizeof(DCTELEM) * 8 + b;
  155. fq = ((UDCTELEM2)1 << r) / divisor;
  156. fr = ((UDCTELEM2)1 << r) % divisor;
  157. c = divisor / 2; /* for rounding */
  158. if (fr == 0) { /* divisor is power of two */
  159. /* fq will be one bit too large to fit in DCTELEM, so adjust */
  160. fq >>= 1;
  161. r--;
  162. } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
  163. c++;
  164. } else { /* fractional part is > 0.5 */
  165. fq++;
  166. }
  167. dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
  168. dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
  169. dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
  170. dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
  171. if(r <= 16) return 0;
  172. else return 1;
  173. }
  174. /*
  175. * Initialize for a processing pass.
  176. * Verify that all referenced Q-tables are present, and set up
  177. * the divisor table for each one.
  178. * In the current implementation, DCT of all components is done during
  179. * the first pass, even if only some components will be output in the
  180. * first scan. Hence all components should be examined here.
  181. */
  182. METHODDEF(void)
  183. start_pass_fdctmgr (j_compress_ptr cinfo)
  184. {
  185. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  186. int ci, qtblno, i;
  187. jpeg_component_info *compptr;
  188. JQUANT_TBL * qtbl;
  189. DCTELEM * dtbl;
  190. for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
  191. ci++, compptr++) {
  192. qtblno = compptr->quant_tbl_no;
  193. /* Make sure specified quantization table is present */
  194. if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
  195. cinfo->quant_tbl_ptrs[qtblno] == NULL)
  196. ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
  197. qtbl = cinfo->quant_tbl_ptrs[qtblno];
  198. /* Compute divisors for this quant table */
  199. /* We may do this more than once for same table, but it's not a big deal */
  200. switch (cinfo->dct_method) {
  201. #ifdef DCT_ISLOW_SUPPORTED
  202. case JDCT_ISLOW:
  203. /* For LL&M IDCT method, divisors are equal to raw quantization
  204. * coefficients multiplied by 8 (to counteract scaling).
  205. */
  206. if (fdct->divisors[qtblno] == NULL) {
  207. fdct->divisors[qtblno] = (DCTELEM *)
  208. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  209. (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
  210. }
  211. dtbl = fdct->divisors[qtblno];
  212. for (i = 0; i < DCTSIZE2; i++) {
  213. if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
  214. && fdct->quantize == jsimd_quantize)
  215. fdct->quantize = quantize;
  216. }
  217. break;
  218. #endif
  219. #ifdef DCT_IFAST_SUPPORTED
  220. case JDCT_IFAST:
  221. {
  222. /* For AA&N IDCT method, divisors are equal to quantization
  223. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  224. * scalefactor[0] = 1
  225. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  226. * We apply a further scale factor of 8.
  227. */
  228. #define CONST_BITS 14
  229. static const INT16 aanscales[DCTSIZE2] = {
  230. /* precomputed values scaled up by 14 bits */
  231. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  232. 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
  233. 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
  234. 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
  235. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  236. 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
  237. 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
  238. 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
  239. };
  240. SHIFT_TEMPS
  241. if (fdct->divisors[qtblno] == NULL) {
  242. fdct->divisors[qtblno] = (DCTELEM *)
  243. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  244. (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
  245. }
  246. dtbl = fdct->divisors[qtblno];
  247. for (i = 0; i < DCTSIZE2; i++) {
  248. if(!compute_reciprocal(
  249. DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
  250. (INT32) aanscales[i]),
  251. CONST_BITS-3), &dtbl[i])
  252. && fdct->quantize == jsimd_quantize)
  253. fdct->quantize = quantize;
  254. }
  255. }
  256. break;
  257. #endif
  258. #ifdef DCT_FLOAT_SUPPORTED
  259. case JDCT_FLOAT:
  260. {
  261. /* For float AA&N IDCT method, divisors are equal to quantization
  262. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  263. * scalefactor[0] = 1
  264. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  265. * We apply a further scale factor of 8.
  266. * What's actually stored is 1/divisor so that the inner loop can
  267. * use a multiplication rather than a division.
  268. */
  269. FAST_FLOAT * fdtbl;
  270. int row, col;
  271. static const double aanscalefactor[DCTSIZE] = {
  272. 1.0, 1.387039845, 1.306562965, 1.175875602,
  273. 1.0, 0.785694958, 0.541196100, 0.275899379
  274. };
  275. if (fdct->float_divisors[qtblno] == NULL) {
  276. fdct->float_divisors[qtblno] = (FAST_FLOAT *)
  277. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  278. DCTSIZE2 * SIZEOF(FAST_FLOAT));
  279. }
  280. fdtbl = fdct->float_divisors[qtblno];
  281. i = 0;
  282. for (row = 0; row < DCTSIZE; row++) {
  283. for (col = 0; col < DCTSIZE; col++) {
  284. fdtbl[i] = (FAST_FLOAT)
  285. (1.0 / (((double) qtbl->quantval[i] *
  286. aanscalefactor[row] * aanscalefactor[col] * 8.0)));
  287. i++;
  288. }
  289. }
  290. }
  291. break;
  292. #endif
  293. default:
  294. ERREXIT(cinfo, JERR_NOT_COMPILED);
  295. break;
  296. }
  297. }
  298. }
  299. /*
  300. * Load data into workspace, applying unsigned->signed conversion.
  301. */
  302. METHODDEF(void)
  303. convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
  304. {
  305. register DCTELEM *workspaceptr;
  306. register JSAMPROW elemptr;
  307. register int elemr;
  308. workspaceptr = workspace;
  309. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  310. elemptr = sample_data[elemr] + start_col;
  311. #if DCTSIZE == 8 /* unroll the inner loop */
  312. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  313. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  314. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  315. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  316. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  317. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  318. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  319. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  320. #else
  321. {
  322. register int elemc;
  323. for (elemc = DCTSIZE; elemc > 0; elemc--)
  324. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  325. }
  326. #endif
  327. }
  328. }
  329. /*
  330. * Quantize/descale the coefficients, and store into coef_blocks[].
  331. */
  332. METHODDEF(void)
  333. quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
  334. {
  335. int i;
  336. DCTELEM temp;
  337. UDCTELEM recip, corr, shift;
  338. UDCTELEM2 product;
  339. JCOEFPTR output_ptr = coef_block;
  340. for (i = 0; i < DCTSIZE2; i++) {
  341. temp = workspace[i];
  342. recip = divisors[i + DCTSIZE2 * 0];
  343. corr = divisors[i + DCTSIZE2 * 1];
  344. shift = divisors[i + DCTSIZE2 * 3];
  345. if (temp < 0) {
  346. temp = -temp;
  347. product = (UDCTELEM2)(temp + corr) * recip;
  348. product >>= shift + sizeof(DCTELEM)*8;
  349. temp = product;
  350. temp = -temp;
  351. } else {
  352. product = (UDCTELEM2)(temp + corr) * recip;
  353. product >>= shift + sizeof(DCTELEM)*8;
  354. temp = product;
  355. }
  356. output_ptr[i] = (JCOEF) temp;
  357. }
  358. }
  359. /*
  360. * Perform forward DCT on one or more blocks of a component.
  361. *
  362. * The input samples are taken from the sample_data[] array starting at
  363. * position start_row/start_col, and moving to the right for any additional
  364. * blocks. The quantized coefficients are returned in coef_blocks[].
  365. */
  366. METHODDEF(void)
  367. forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  368. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  369. JDIMENSION start_row, JDIMENSION start_col,
  370. JDIMENSION num_blocks)
  371. /* This version is used for integer DCT implementations. */
  372. {
  373. /* This routine is heavily used, so it's worth coding it tightly. */
  374. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  375. DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  376. DCTELEM * workspace;
  377. JDIMENSION bi;
  378. /* Make sure the compiler doesn't look up these every pass */
  379. forward_DCT_method_ptr do_dct = fdct->dct;
  380. convsamp_method_ptr do_convsamp = fdct->convsamp;
  381. quantize_method_ptr do_quantize = fdct->quantize;
  382. workspace = fdct->workspace;
  383. sample_data += start_row; /* fold in the vertical offset once */
  384. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  385. /* Load data into workspace, applying unsigned->signed conversion */
  386. (*do_convsamp) (sample_data, start_col, workspace);
  387. /* Perform the DCT */
  388. (*do_dct) (workspace);
  389. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  390. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  391. }
  392. }
  393. #ifdef DCT_FLOAT_SUPPORTED
  394. METHODDEF(void)
  395. convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
  396. {
  397. register FAST_FLOAT *workspaceptr;
  398. register JSAMPROW elemptr;
  399. register int elemr;
  400. workspaceptr = workspace;
  401. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  402. elemptr = sample_data[elemr] + start_col;
  403. #if DCTSIZE == 8 /* unroll the inner loop */
  404. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  405. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  406. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  407. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  408. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  409. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  410. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  411. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  412. #else
  413. {
  414. register int elemc;
  415. for (elemc = DCTSIZE; elemc > 0; elemc--)
  416. *workspaceptr++ = (FAST_FLOAT)
  417. (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  418. }
  419. #endif
  420. }
  421. }
  422. METHODDEF(void)
  423. quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
  424. {
  425. register FAST_FLOAT temp;
  426. register int i;
  427. register JCOEFPTR output_ptr = coef_block;
  428. for (i = 0; i < DCTSIZE2; i++) {
  429. /* Apply the quantization and scaling factor */
  430. temp = workspace[i] * divisors[i];
  431. /* Round to nearest integer.
  432. * Since C does not specify the direction of rounding for negative
  433. * quotients, we have to force the dividend positive for portability.
  434. * The maximum coefficient size is +-16K (for 12-bit data), so this
  435. * code should work for either 16-bit or 32-bit ints.
  436. */
  437. output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
  438. }
  439. }
  440. METHODDEF(void)
  441. forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  442. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  443. JDIMENSION start_row, JDIMENSION start_col,
  444. JDIMENSION num_blocks)
  445. /* This version is used for floating-point DCT implementations. */
  446. {
  447. /* This routine is heavily used, so it's worth coding it tightly. */
  448. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  449. FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  450. FAST_FLOAT * workspace;
  451. JDIMENSION bi;
  452. /* Make sure the compiler doesn't look up these every pass */
  453. float_DCT_method_ptr do_dct = fdct->float_dct;
  454. float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  455. float_quantize_method_ptr do_quantize = fdct->float_quantize;
  456. workspace = fdct->float_workspace;
  457. sample_data += start_row; /* fold in the vertical offset once */
  458. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  459. /* Load data into workspace, applying unsigned->signed conversion */
  460. (*do_convsamp) (sample_data, start_col, workspace);
  461. /* Perform the DCT */
  462. (*do_dct) (workspace);
  463. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  464. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  465. }
  466. }
  467. #endif /* DCT_FLOAT_SUPPORTED */
  468. /*
  469. * Initialize FDCT manager.
  470. */
  471. GLOBAL(void)
  472. jinit_forward_dct (j_compress_ptr cinfo)
  473. {
  474. my_fdct_ptr fdct;
  475. int i;
  476. fdct = (my_fdct_ptr)
  477. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  478. SIZEOF(my_fdct_controller));
  479. cinfo->fdct = (struct jpeg_forward_dct *) fdct;
  480. fdct->pub.start_pass = start_pass_fdctmgr;
  481. /* First determine the DCT... */
  482. switch (cinfo->dct_method) {
  483. #ifdef DCT_ISLOW_SUPPORTED
  484. case JDCT_ISLOW:
  485. fdct->pub.forward_DCT = forward_DCT;
  486. if (jsimd_can_fdct_islow())
  487. fdct->dct = jsimd_fdct_islow;
  488. else
  489. fdct->dct = jpeg_fdct_islow;
  490. break;
  491. #endif
  492. #ifdef DCT_IFAST_SUPPORTED
  493. case JDCT_IFAST:
  494. fdct->pub.forward_DCT = forward_DCT;
  495. if (jsimd_can_fdct_ifast())
  496. fdct->dct = jsimd_fdct_ifast;
  497. else
  498. fdct->dct = jpeg_fdct_ifast;
  499. break;
  500. #endif
  501. #ifdef DCT_FLOAT_SUPPORTED
  502. case JDCT_FLOAT:
  503. fdct->pub.forward_DCT = forward_DCT_float;
  504. if (jsimd_can_fdct_float())
  505. fdct->float_dct = jsimd_fdct_float;
  506. else
  507. fdct->float_dct = jpeg_fdct_float;
  508. break;
  509. #endif
  510. default:
  511. ERREXIT(cinfo, JERR_NOT_COMPILED);
  512. break;
  513. }
  514. /* ...then the supporting stages. */
  515. switch (cinfo->dct_method) {
  516. #ifdef DCT_ISLOW_SUPPORTED
  517. case JDCT_ISLOW:
  518. #endif
  519. #ifdef DCT_IFAST_SUPPORTED
  520. case JDCT_IFAST:
  521. #endif
  522. #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
  523. if (jsimd_can_convsamp())
  524. fdct->convsamp = jsimd_convsamp;
  525. else
  526. fdct->convsamp = convsamp;
  527. if (jsimd_can_quantize())
  528. fdct->quantize = jsimd_quantize;
  529. else
  530. fdct->quantize = quantize;
  531. break;
  532. #endif
  533. #ifdef DCT_FLOAT_SUPPORTED
  534. case JDCT_FLOAT:
  535. if (jsimd_can_convsamp_float())
  536. fdct->float_convsamp = jsimd_convsamp_float;
  537. else
  538. fdct->float_convsamp = convsamp_float;
  539. if (jsimd_can_quantize_float())
  540. fdct->float_quantize = jsimd_quantize_float;
  541. else
  542. fdct->float_quantize = quantize_float;
  543. break;
  544. #endif
  545. default:
  546. ERREXIT(cinfo, JERR_NOT_COMPILED);
  547. break;
  548. }
  549. /* Allocate workspace memory */
  550. #ifdef DCT_FLOAT_SUPPORTED
  551. if (cinfo->dct_method == JDCT_FLOAT)
  552. fdct->float_workspace = (FAST_FLOAT *)
  553. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  554. SIZEOF(FAST_FLOAT) * DCTSIZE2);
  555. else
  556. #endif
  557. fdct->workspace = (DCTELEM *)
  558. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  559. SIZEOF(DCTELEM) * DCTSIZE2);
  560. /* Mark divisor tables unallocated */
  561. for (i = 0; i < NUM_QUANT_TBLS; i++) {
  562. fdct->divisors[i] = NULL;
  563. #ifdef DCT_FLOAT_SUPPORTED
  564. fdct->float_divisors[i] = NULL;
  565. #endif
  566. }
  567. }