jcdctmgr.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716
  1. /*
  2. * jcdctmgr.c
  3. *
  4. * This file was part of the Independent JPEG Group's software:
  5. * Copyright (C) 1994-1996, Thomas G. Lane.
  6. * libjpeg-turbo Modifications:
  7. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  8. * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
  9. * Copyright (C) 2011, 2014-2015 D. R. Commander
  10. * For conditions of distribution and use, see the accompanying README file.
  11. *
  12. * This file contains the forward-DCT management logic.
  13. * This code selects a particular DCT implementation to be used,
  14. * and it performs related housekeeping chores including coefficient
  15. * quantization.
  16. */
  17. #define JPEG_INTERNALS
  18. #include "jinclude.h"
  19. #include "jpeglib.h"
  20. #include "jdct.h" /* Private declarations for DCT subsystem */
  21. #include "jsimddct.h"
  22. /* Private subobject for this module */
  23. typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
  24. typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
  25. typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
  26. JDIMENSION start_col,
  27. DCTELEM * workspace);
  28. typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
  29. JDIMENSION start_col,
  30. FAST_FLOAT *workspace);
  31. typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM * divisors,
  32. DCTELEM * workspace);
  33. typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
  34. FAST_FLOAT * divisors,
  35. FAST_FLOAT * workspace);
  36. METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
  37. typedef struct {
  38. struct jpeg_forward_dct pub; /* public fields */
  39. /* Pointer to the DCT routine actually in use */
  40. forward_DCT_method_ptr dct;
  41. convsamp_method_ptr convsamp;
  42. quantize_method_ptr quantize;
  43. /* The actual post-DCT divisors --- not identical to the quant table
  44. * entries, because of scaling (especially for an unnormalized DCT).
  45. * Each table is given in normal array order.
  46. */
  47. DCTELEM * divisors[NUM_QUANT_TBLS];
  48. /* work area for FDCT subroutine */
  49. DCTELEM * workspace;
  50. #ifdef DCT_FLOAT_SUPPORTED
  51. /* Same as above for the floating-point case. */
  52. float_DCT_method_ptr float_dct;
  53. float_convsamp_method_ptr float_convsamp;
  54. float_quantize_method_ptr float_quantize;
  55. FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  56. FAST_FLOAT * float_workspace;
  57. #endif
  58. } my_fdct_controller;
  59. typedef my_fdct_controller * my_fdct_ptr;
  60. #if BITS_IN_JSAMPLE == 8
  61. /*
  62. * Find the highest bit in an integer through binary search.
  63. */
  64. LOCAL(int)
  65. flss (UINT16 val)
  66. {
  67. int bit;
  68. bit = 16;
  69. if (!val)
  70. return 0;
  71. if (!(val & 0xff00)) {
  72. bit -= 8;
  73. val <<= 8;
  74. }
  75. if (!(val & 0xf000)) {
  76. bit -= 4;
  77. val <<= 4;
  78. }
  79. if (!(val & 0xc000)) {
  80. bit -= 2;
  81. val <<= 2;
  82. }
  83. if (!(val & 0x8000)) {
  84. bit -= 1;
  85. val <<= 1;
  86. }
  87. return bit;
  88. }
  89. /*
  90. * Compute values to do a division using reciprocal.
  91. *
  92. * This implementation is based on an algorithm described in
  93. * "How to optimize for the Pentium family of microprocessors"
  94. * (http://www.agner.org/assem/).
  95. * More information about the basic algorithm can be found in
  96. * the paper "Integer Division Using Reciprocals" by Robert Alverson.
  97. *
  98. * The basic idea is to replace x/d by x * d^-1. In order to store
  99. * d^-1 with enough precision we shift it left a few places. It turns
  100. * out that this algoright gives just enough precision, and also fits
  101. * into DCTELEM:
  102. *
  103. * b = (the number of significant bits in divisor) - 1
  104. * r = (word size) + b
  105. * f = 2^r / divisor
  106. *
  107. * f will not be an integer for most cases, so we need to compensate
  108. * for the rounding error introduced:
  109. *
  110. * no fractional part:
  111. *
  112. * result = input >> r
  113. *
  114. * fractional part of f < 0.5:
  115. *
  116. * round f down to nearest integer
  117. * result = ((input + 1) * f) >> r
  118. *
  119. * fractional part of f > 0.5:
  120. *
  121. * round f up to nearest integer
  122. * result = (input * f) >> r
  123. *
  124. * This is the original algorithm that gives truncated results. But we
  125. * want properly rounded results, so we replace "input" with
  126. * "input + divisor/2".
  127. *
  128. * In order to allow SIMD implementations we also tweak the values to
  129. * allow the same calculation to be made at all times:
  130. *
  131. * dctbl[0] = f rounded to nearest integer
  132. * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
  133. * dctbl[2] = 1 << ((word size) * 2 - r)
  134. * dctbl[3] = r - (word size)
  135. *
  136. * dctbl[2] is for stupid instruction sets where the shift operation
  137. * isn't member wise (e.g. MMX).
  138. *
  139. * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
  140. * is that most SIMD implementations have a "multiply and store top
  141. * half" operation.
  142. *
  143. * Lastly, we store each of the values in their own table instead
  144. * of in a consecutive manner, yet again in order to allow SIMD
  145. * routines.
  146. */
  147. LOCAL(int)
  148. compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
  149. {
  150. UDCTELEM2 fq, fr;
  151. UDCTELEM c;
  152. int b, r;
  153. if (divisor == 1) {
  154. /* divisor == 1 means unquantized, so these reciprocal/correction/shift
  155. * values will cause the C quantization algorithm to act like the
  156. * identity function. Since only the C quantization algorithm is used in
  157. * these cases, the scale value is irrelevant.
  158. */
  159. dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */
  160. dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */
  161. dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */
  162. dtbl[DCTSIZE2 * 3] = (DCTELEM) (-sizeof(DCTELEM) * 8); /* shift */
  163. return 0;
  164. }
  165. b = flss(divisor) - 1;
  166. r = sizeof(DCTELEM) * 8 + b;
  167. fq = ((UDCTELEM2)1 << r) / divisor;
  168. fr = ((UDCTELEM2)1 << r) % divisor;
  169. c = divisor / 2; /* for rounding */
  170. if (fr == 0) { /* divisor is power of two */
  171. /* fq will be one bit too large to fit in DCTELEM, so adjust */
  172. fq >>= 1;
  173. r--;
  174. } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
  175. c++;
  176. } else { /* fractional part is > 0.5 */
  177. fq++;
  178. }
  179. dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
  180. dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
  181. dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
  182. dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
  183. if(r <= 16) return 0;
  184. else return 1;
  185. }
  186. #endif
  187. /*
  188. * Initialize for a processing pass.
  189. * Verify that all referenced Q-tables are present, and set up
  190. * the divisor table for each one.
  191. * In the current implementation, DCT of all components is done during
  192. * the first pass, even if only some components will be output in the
  193. * first scan. Hence all components should be examined here.
  194. */
  195. METHODDEF(void)
  196. start_pass_fdctmgr (j_compress_ptr cinfo)
  197. {
  198. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  199. int ci, qtblno, i;
  200. jpeg_component_info *compptr;
  201. JQUANT_TBL * qtbl;
  202. DCTELEM * dtbl;
  203. for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
  204. ci++, compptr++) {
  205. qtblno = compptr->quant_tbl_no;
  206. /* Make sure specified quantization table is present */
  207. if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
  208. cinfo->quant_tbl_ptrs[qtblno] == NULL)
  209. ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
  210. qtbl = cinfo->quant_tbl_ptrs[qtblno];
  211. /* Compute divisors for this quant table */
  212. /* We may do this more than once for same table, but it's not a big deal */
  213. switch (cinfo->dct_method) {
  214. #ifdef DCT_ISLOW_SUPPORTED
  215. case JDCT_ISLOW:
  216. /* For LL&M IDCT method, divisors are equal to raw quantization
  217. * coefficients multiplied by 8 (to counteract scaling).
  218. */
  219. if (fdct->divisors[qtblno] == NULL) {
  220. fdct->divisors[qtblno] = (DCTELEM *)
  221. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  222. (DCTSIZE2 * 4) * sizeof(DCTELEM));
  223. }
  224. dtbl = fdct->divisors[qtblno];
  225. for (i = 0; i < DCTSIZE2; i++) {
  226. #if BITS_IN_JSAMPLE == 8
  227. if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
  228. && fdct->quantize == jsimd_quantize)
  229. fdct->quantize = quantize;
  230. #else
  231. dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
  232. #endif
  233. }
  234. break;
  235. #endif
  236. #ifdef DCT_IFAST_SUPPORTED
  237. case JDCT_IFAST:
  238. {
  239. /* For AA&N IDCT method, divisors are equal to quantization
  240. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  241. * scalefactor[0] = 1
  242. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  243. * We apply a further scale factor of 8.
  244. */
  245. #define CONST_BITS 14
  246. static const INT16 aanscales[DCTSIZE2] = {
  247. /* precomputed values scaled up by 14 bits */
  248. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  249. 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
  250. 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
  251. 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
  252. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  253. 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
  254. 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
  255. 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
  256. };
  257. SHIFT_TEMPS
  258. if (fdct->divisors[qtblno] == NULL) {
  259. fdct->divisors[qtblno] = (DCTELEM *)
  260. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  261. (DCTSIZE2 * 4) * sizeof(DCTELEM));
  262. }
  263. dtbl = fdct->divisors[qtblno];
  264. for (i = 0; i < DCTSIZE2; i++) {
  265. #if BITS_IN_JSAMPLE == 8
  266. if(!compute_reciprocal(
  267. DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
  268. (INT32) aanscales[i]),
  269. CONST_BITS-3), &dtbl[i])
  270. && fdct->quantize == jsimd_quantize)
  271. fdct->quantize = quantize;
  272. #else
  273. dtbl[i] = (DCTELEM)
  274. DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
  275. (INT32) aanscales[i]),
  276. CONST_BITS-3);
  277. #endif
  278. }
  279. }
  280. break;
  281. #endif
  282. #ifdef DCT_FLOAT_SUPPORTED
  283. case JDCT_FLOAT:
  284. {
  285. /* For float AA&N IDCT method, divisors are equal to quantization
  286. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  287. * scalefactor[0] = 1
  288. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  289. * We apply a further scale factor of 8.
  290. * What's actually stored is 1/divisor so that the inner loop can
  291. * use a multiplication rather than a division.
  292. */
  293. FAST_FLOAT * fdtbl;
  294. int row, col;
  295. static const double aanscalefactor[DCTSIZE] = {
  296. 1.0, 1.387039845, 1.306562965, 1.175875602,
  297. 1.0, 0.785694958, 0.541196100, 0.275899379
  298. };
  299. if (fdct->float_divisors[qtblno] == NULL) {
  300. fdct->float_divisors[qtblno] = (FAST_FLOAT *)
  301. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  302. DCTSIZE2 * sizeof(FAST_FLOAT));
  303. }
  304. fdtbl = fdct->float_divisors[qtblno];
  305. i = 0;
  306. for (row = 0; row < DCTSIZE; row++) {
  307. for (col = 0; col < DCTSIZE; col++) {
  308. fdtbl[i] = (FAST_FLOAT)
  309. (1.0 / (((double) qtbl->quantval[i] *
  310. aanscalefactor[row] * aanscalefactor[col] * 8.0)));
  311. i++;
  312. }
  313. }
  314. }
  315. break;
  316. #endif
  317. default:
  318. ERREXIT(cinfo, JERR_NOT_COMPILED);
  319. break;
  320. }
  321. }
  322. }
  323. /*
  324. * Load data into workspace, applying unsigned->signed conversion.
  325. */
  326. METHODDEF(void)
  327. convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
  328. {
  329. register DCTELEM *workspaceptr;
  330. register JSAMPROW elemptr;
  331. register int elemr;
  332. workspaceptr = workspace;
  333. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  334. elemptr = sample_data[elemr] + start_col;
  335. #if DCTSIZE == 8 /* unroll the inner loop */
  336. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  337. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  338. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  339. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  340. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  341. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  342. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  343. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  344. #else
  345. {
  346. register int elemc;
  347. for (elemc = DCTSIZE; elemc > 0; elemc--)
  348. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  349. }
  350. #endif
  351. }
  352. }
  353. /*
  354. * Quantize/descale the coefficients, and store into coef_blocks[].
  355. */
  356. METHODDEF(void)
  357. quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
  358. {
  359. int i;
  360. DCTELEM temp;
  361. JCOEFPTR output_ptr = coef_block;
  362. #if BITS_IN_JSAMPLE == 8
  363. UDCTELEM recip, corr;
  364. int shift;
  365. UDCTELEM2 product;
  366. for (i = 0; i < DCTSIZE2; i++) {
  367. temp = workspace[i];
  368. recip = divisors[i + DCTSIZE2 * 0];
  369. corr = divisors[i + DCTSIZE2 * 1];
  370. shift = divisors[i + DCTSIZE2 * 3];
  371. if (temp < 0) {
  372. temp = -temp;
  373. product = (UDCTELEM2)(temp + corr) * recip;
  374. product >>= shift + sizeof(DCTELEM)*8;
  375. temp = product;
  376. temp = -temp;
  377. } else {
  378. product = (UDCTELEM2)(temp + corr) * recip;
  379. product >>= shift + sizeof(DCTELEM)*8;
  380. temp = product;
  381. }
  382. output_ptr[i] = (JCOEF) temp;
  383. }
  384. #else
  385. register DCTELEM qval;
  386. for (i = 0; i < DCTSIZE2; i++) {
  387. qval = divisors[i];
  388. temp = workspace[i];
  389. /* Divide the coefficient value by qval, ensuring proper rounding.
  390. * Since C does not specify the direction of rounding for negative
  391. * quotients, we have to force the dividend positive for portability.
  392. *
  393. * In most files, at least half of the output values will be zero
  394. * (at default quantization settings, more like three-quarters...)
  395. * so we should ensure that this case is fast. On many machines,
  396. * a comparison is enough cheaper than a divide to make a special test
  397. * a win. Since both inputs will be nonnegative, we need only test
  398. * for a < b to discover whether a/b is 0.
  399. * If your machine's division is fast enough, define FAST_DIVIDE.
  400. */
  401. #ifdef FAST_DIVIDE
  402. #define DIVIDE_BY(a,b) a /= b
  403. #else
  404. #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
  405. #endif
  406. if (temp < 0) {
  407. temp = -temp;
  408. temp += qval>>1; /* for rounding */
  409. DIVIDE_BY(temp, qval);
  410. temp = -temp;
  411. } else {
  412. temp += qval>>1; /* for rounding */
  413. DIVIDE_BY(temp, qval);
  414. }
  415. output_ptr[i] = (JCOEF) temp;
  416. }
  417. #endif
  418. }
  419. /*
  420. * Perform forward DCT on one or more blocks of a component.
  421. *
  422. * The input samples are taken from the sample_data[] array starting at
  423. * position start_row/start_col, and moving to the right for any additional
  424. * blocks. The quantized coefficients are returned in coef_blocks[].
  425. */
  426. METHODDEF(void)
  427. forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  428. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  429. JDIMENSION start_row, JDIMENSION start_col,
  430. JDIMENSION num_blocks)
  431. /* This version is used for integer DCT implementations. */
  432. {
  433. /* This routine is heavily used, so it's worth coding it tightly. */
  434. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  435. DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  436. DCTELEM * workspace;
  437. JDIMENSION bi;
  438. /* Make sure the compiler doesn't look up these every pass */
  439. forward_DCT_method_ptr do_dct = fdct->dct;
  440. convsamp_method_ptr do_convsamp = fdct->convsamp;
  441. quantize_method_ptr do_quantize = fdct->quantize;
  442. workspace = fdct->workspace;
  443. sample_data += start_row; /* fold in the vertical offset once */
  444. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  445. /* Load data into workspace, applying unsigned->signed conversion */
  446. (*do_convsamp) (sample_data, start_col, workspace);
  447. /* Perform the DCT */
  448. (*do_dct) (workspace);
  449. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  450. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  451. }
  452. }
  453. #ifdef DCT_FLOAT_SUPPORTED
  454. METHODDEF(void)
  455. convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
  456. {
  457. register FAST_FLOAT *workspaceptr;
  458. register JSAMPROW elemptr;
  459. register int elemr;
  460. workspaceptr = workspace;
  461. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  462. elemptr = sample_data[elemr] + start_col;
  463. #if DCTSIZE == 8 /* unroll the inner loop */
  464. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  465. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  466. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  467. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  468. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  469. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  470. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  471. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  472. #else
  473. {
  474. register int elemc;
  475. for (elemc = DCTSIZE; elemc > 0; elemc--)
  476. *workspaceptr++ = (FAST_FLOAT)
  477. (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  478. }
  479. #endif
  480. }
  481. }
  482. METHODDEF(void)
  483. quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
  484. {
  485. register FAST_FLOAT temp;
  486. register int i;
  487. register JCOEFPTR output_ptr = coef_block;
  488. for (i = 0; i < DCTSIZE2; i++) {
  489. /* Apply the quantization and scaling factor */
  490. temp = workspace[i] * divisors[i];
  491. /* Round to nearest integer.
  492. * Since C does not specify the direction of rounding for negative
  493. * quotients, we have to force the dividend positive for portability.
  494. * The maximum coefficient size is +-16K (for 12-bit data), so this
  495. * code should work for either 16-bit or 32-bit ints.
  496. */
  497. output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
  498. }
  499. }
  500. METHODDEF(void)
  501. forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  502. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  503. JDIMENSION start_row, JDIMENSION start_col,
  504. JDIMENSION num_blocks)
  505. /* This version is used for floating-point DCT implementations. */
  506. {
  507. /* This routine is heavily used, so it's worth coding it tightly. */
  508. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  509. FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  510. FAST_FLOAT * workspace;
  511. JDIMENSION bi;
  512. /* Make sure the compiler doesn't look up these every pass */
  513. float_DCT_method_ptr do_dct = fdct->float_dct;
  514. float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  515. float_quantize_method_ptr do_quantize = fdct->float_quantize;
  516. workspace = fdct->float_workspace;
  517. sample_data += start_row; /* fold in the vertical offset once */
  518. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  519. /* Load data into workspace, applying unsigned->signed conversion */
  520. (*do_convsamp) (sample_data, start_col, workspace);
  521. /* Perform the DCT */
  522. (*do_dct) (workspace);
  523. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  524. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  525. }
  526. }
  527. #endif /* DCT_FLOAT_SUPPORTED */
  528. /*
  529. * Initialize FDCT manager.
  530. */
  531. GLOBAL(void)
  532. jinit_forward_dct (j_compress_ptr cinfo)
  533. {
  534. my_fdct_ptr fdct;
  535. int i;
  536. fdct = (my_fdct_ptr)
  537. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  538. sizeof(my_fdct_controller));
  539. cinfo->fdct = (struct jpeg_forward_dct *) fdct;
  540. fdct->pub.start_pass = start_pass_fdctmgr;
  541. /* First determine the DCT... */
  542. switch (cinfo->dct_method) {
  543. #ifdef DCT_ISLOW_SUPPORTED
  544. case JDCT_ISLOW:
  545. fdct->pub.forward_DCT = forward_DCT;
  546. if (jsimd_can_fdct_islow())
  547. fdct->dct = jsimd_fdct_islow;
  548. else
  549. fdct->dct = jpeg_fdct_islow;
  550. break;
  551. #endif
  552. #ifdef DCT_IFAST_SUPPORTED
  553. case JDCT_IFAST:
  554. fdct->pub.forward_DCT = forward_DCT;
  555. if (jsimd_can_fdct_ifast())
  556. fdct->dct = jsimd_fdct_ifast;
  557. else
  558. fdct->dct = jpeg_fdct_ifast;
  559. break;
  560. #endif
  561. #ifdef DCT_FLOAT_SUPPORTED
  562. case JDCT_FLOAT:
  563. fdct->pub.forward_DCT = forward_DCT_float;
  564. if (jsimd_can_fdct_float())
  565. fdct->float_dct = jsimd_fdct_float;
  566. else
  567. fdct->float_dct = jpeg_fdct_float;
  568. break;
  569. #endif
  570. default:
  571. ERREXIT(cinfo, JERR_NOT_COMPILED);
  572. break;
  573. }
  574. /* ...then the supporting stages. */
  575. switch (cinfo->dct_method) {
  576. #ifdef DCT_ISLOW_SUPPORTED
  577. case JDCT_ISLOW:
  578. #endif
  579. #ifdef DCT_IFAST_SUPPORTED
  580. case JDCT_IFAST:
  581. #endif
  582. #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
  583. if (jsimd_can_convsamp())
  584. fdct->convsamp = jsimd_convsamp;
  585. else
  586. fdct->convsamp = convsamp;
  587. if (jsimd_can_quantize())
  588. fdct->quantize = jsimd_quantize;
  589. else
  590. fdct->quantize = quantize;
  591. break;
  592. #endif
  593. #ifdef DCT_FLOAT_SUPPORTED
  594. case JDCT_FLOAT:
  595. if (jsimd_can_convsamp_float())
  596. fdct->float_convsamp = jsimd_convsamp_float;
  597. else
  598. fdct->float_convsamp = convsamp_float;
  599. if (jsimd_can_quantize_float())
  600. fdct->float_quantize = jsimd_quantize_float;
  601. else
  602. fdct->float_quantize = quantize_float;
  603. break;
  604. #endif
  605. default:
  606. ERREXIT(cinfo, JERR_NOT_COMPILED);
  607. break;
  608. }
  609. /* Allocate workspace memory */
  610. #ifdef DCT_FLOAT_SUPPORTED
  611. if (cinfo->dct_method == JDCT_FLOAT)
  612. fdct->float_workspace = (FAST_FLOAT *)
  613. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  614. sizeof(FAST_FLOAT) * DCTSIZE2);
  615. else
  616. #endif
  617. fdct->workspace = (DCTELEM *)
  618. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  619. sizeof(DCTELEM) * DCTSIZE2);
  620. /* Mark divisor tables unallocated */
  621. for (i = 0; i < NUM_QUANT_TBLS; i++) {
  622. fdct->divisors[i] = NULL;
  623. #ifdef DCT_FLOAT_SUPPORTED
  624. fdct->float_divisors[i] = NULL;
  625. #endif
  626. }
  627. }