encoder_idct.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
  9. * by the Xiph.Org Foundation http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function: C implementation of the Theora iDCT
  13. last mod: $Id: encoder_idct.c 14714 2008-04-12 01:04:43Z giles $
  14. ********************************************************************/
  15. #include <string.h>
  16. #include "codec_internal.h"
  17. #include "quant_lookup.h"
  18. #define IdctAdjustBeforeShift 8
  19. /* cos(n*pi/16) or sin(8-n)*pi/16) */
  20. #define xC1S7 64277
  21. #define xC2S6 60547
  22. #define xC3S5 54491
  23. #define xC4S4 46341
  24. #define xC5S3 36410
  25. #define xC6S2 25080
  26. #define xC7S1 12785
  27. /* compute the 16 bit signed 1D inverse DCT - spec version */
  28. /*
  29. static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
  30. ogg_int32_t t[8], r;
  31. ogg_int16_t *y = InputData;
  32. ogg_int16_t *x = OutputData;
  33. t[0] = y[0] + y[4];
  34. t[0] &= 0xffff;
  35. t[0] = (xC4S4 * t[0]) >> 16;
  36. t[1] = y[0] - y[4];
  37. t[1] &= 0xffff;
  38. t[1] = (xC4S4 * t[1]) >> 16;
  39. t[2] = ((xC6S2 * y[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
  40. t[3] = ((xC2S6 * y[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
  41. t[4] = ((xC7S1 * y[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
  42. t[5] = ((xC3S5 * y[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
  43. t[6] = ((xC5S3 * y[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
  44. t[7] = ((xC1S7 * y[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
  45. r = t[4] + t[5];
  46. t[5] = t[4] - t[5];
  47. t[5] &= 0xffff;
  48. t[5] = (xC4S4 * (-t[5])) >> 16;
  49. t[4] = r;
  50. r = t[7] + t[6];
  51. t[6] = t[7] - t[6];
  52. t[6] &= 0xffff;
  53. t[6] = (xC4S4 * t[6]) >> 16;
  54. t[7] = r;
  55. r = t[0] + t[3];
  56. t[3] = t[0] - t[3];
  57. t[0] = r;
  58. r = t[1] + t[2];
  59. t[2] = t[1] - t[2];
  60. t[1] = r;
  61. r = t[6] + t[5];
  62. t[5] = t[6] - t[5];
  63. t[6] = r;
  64. r = t[0] + t[7];
  65. r &= 0xffff;
  66. x[0] = r;
  67. r = t[1] + t[6];
  68. r &= 0xffff;
  69. x[1] = r;
  70. r = t[2] + t[5];
  71. r &= 0xffff;
  72. x[2] = r;
  73. r = t[3] + t[4];
  74. r &= 0xffff;
  75. x[3] = r;
  76. r = t[3] - t[4];
  77. r &= 0xffff;
  78. x[4] = r;
  79. r = t[2] - t[5];
  80. r &= 0xffff;
  81. x[5] = r;
  82. r = t[1] - t[6];
  83. r &= 0xffff;
  84. x[6] = r;
  85. r = t[0] - t[7];
  86. r &= 0xffff;
  87. x[7] = r;
  88. }
  89. */
  90. static void dequant_slow( ogg_int16_t * dequant_coeffs,
  91. ogg_int16_t * quantized_list,
  92. ogg_int32_t * DCT_block) {
  93. int i;
  94. for(i=0;i<64;i++)
  95. DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
  96. }
  97. void IDctSlow__c( Q_LIST_ENTRY * InputData,
  98. ogg_int16_t *QuantMatrix,
  99. ogg_int16_t * OutputData ) {
  100. ogg_int32_t IntermediateData[64];
  101. ogg_int32_t * ip = IntermediateData;
  102. ogg_int16_t * op = OutputData;
  103. ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
  104. ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  105. ogg_int32_t t1, t2;
  106. int loop;
  107. dequant_slow( QuantMatrix, InputData, IntermediateData);
  108. /* Inverse DCT on the rows now */
  109. for ( loop = 0; loop < 8; loop++){
  110. /* Check for non-zero values */
  111. if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
  112. t1 = (xC1S7 * ip[1]);
  113. t2 = (xC7S1 * ip[7]);
  114. t1 >>= 16;
  115. t2 >>= 16;
  116. _A = t1 + t2;
  117. t1 = (xC7S1 * ip[1]);
  118. t2 = (xC1S7 * ip[7]);
  119. t1 >>= 16;
  120. t2 >>= 16;
  121. _B = t1 - t2;
  122. t1 = (xC3S5 * ip[3]);
  123. t2 = (xC5S3 * ip[5]);
  124. t1 >>= 16;
  125. t2 >>= 16;
  126. _C = t1 + t2;
  127. t1 = (xC3S5 * ip[5]);
  128. t2 = (xC5S3 * ip[3]);
  129. t1 >>= 16;
  130. t2 >>= 16;
  131. _D = t1 - t2;
  132. t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
  133. t1 >>= 16;
  134. _Ad = t1;
  135. t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
  136. t1 >>= 16;
  137. _Bd = t1;
  138. _Cd = _A + _C;
  139. _Dd = _B + _D;
  140. t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4]));
  141. t1 >>= 16;
  142. _E = t1;
  143. t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4]));
  144. t1 >>= 16;
  145. _F = t1;
  146. t1 = (xC2S6 * ip[2]);
  147. t2 = (xC6S2 * ip[6]);
  148. t1 >>= 16;
  149. t2 >>= 16;
  150. _G = t1 + t2;
  151. t1 = (xC6S2 * ip[2]);
  152. t2 = (xC2S6 * ip[6]);
  153. t1 >>= 16;
  154. t2 >>= 16;
  155. _H = t1 - t2;
  156. _Ed = _E - _G;
  157. _Gd = _E + _G;
  158. _Add = _F + _Ad;
  159. _Bdd = _Bd - _H;
  160. _Fd = _F - _Ad;
  161. _Hd = _Bd + _H;
  162. /* Final sequence of operations over-write original inputs. */
  163. ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
  164. ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
  165. ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
  166. ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
  167. ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
  168. ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
  169. ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
  170. ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
  171. }
  172. ip += 8; /* next row */
  173. }
  174. ip = IntermediateData;
  175. for ( loop = 0; loop < 8; loop++){
  176. /* Check for non-zero values (bitwise or faster than ||) */
  177. if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  178. ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
  179. t1 = (xC1S7 * ip[1*8]);
  180. t2 = (xC7S1 * ip[7*8]);
  181. t1 >>= 16;
  182. t2 >>= 16;
  183. _A = t1 + t2;
  184. t1 = (xC7S1 * ip[1*8]);
  185. t2 = (xC1S7 * ip[7*8]);
  186. t1 >>= 16;
  187. t2 >>= 16;
  188. _B = t1 - t2;
  189. t1 = (xC3S5 * ip[3*8]);
  190. t2 = (xC5S3 * ip[5*8]);
  191. t1 >>= 16;
  192. t2 >>= 16;
  193. _C = t1 + t2;
  194. t1 = (xC3S5 * ip[5*8]);
  195. t2 = (xC5S3 * ip[3*8]);
  196. t1 >>= 16;
  197. t2 >>= 16;
  198. _D = t1 - t2;
  199. t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
  200. t1 >>= 16;
  201. _Ad = t1;
  202. t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
  203. t1 >>= 16;
  204. _Bd = t1;
  205. _Cd = _A + _C;
  206. _Dd = _B + _D;
  207. t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8]));
  208. t1 >>= 16;
  209. _E = t1;
  210. t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8]));
  211. t1 >>= 16;
  212. _F = t1;
  213. t1 = (xC2S6 * ip[2*8]);
  214. t2 = (xC6S2 * ip[6*8]);
  215. t1 >>= 16;
  216. t2 >>= 16;
  217. _G = t1 + t2;
  218. t1 = (xC6S2 * ip[2*8]);
  219. t2 = (xC2S6 * ip[6*8]);
  220. t1 >>= 16;
  221. t2 >>= 16;
  222. _H = t1 - t2;
  223. _Ed = _E - _G;
  224. _Gd = _E + _G;
  225. _Add = _F + _Ad;
  226. _Bdd = _Bd - _H;
  227. _Fd = _F - _Ad;
  228. _Hd = _Bd + _H;
  229. _Gd += IdctAdjustBeforeShift;
  230. _Add += IdctAdjustBeforeShift;
  231. _Ed += IdctAdjustBeforeShift;
  232. _Fd += IdctAdjustBeforeShift;
  233. /* Final sequence of operations over-write original inputs. */
  234. op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
  235. op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
  236. op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
  237. op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
  238. op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
  239. op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
  240. op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
  241. op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
  242. }else{
  243. op[0*8] = 0;
  244. op[7*8] = 0;
  245. op[1*8] = 0;
  246. op[2*8] = 0;
  247. op[3*8] = 0;
  248. op[4*8] = 0;
  249. op[5*8] = 0;
  250. op[6*8] = 0;
  251. }
  252. ip++; /* next column */
  253. op++;
  254. }
  255. }
  256. /************************
  257. x x x x 0 0 0 0
  258. x x x 0 0 0 0 0
  259. x x 0 0 0 0 0 0
  260. x 0 0 0 0 0 0 0
  261. 0 0 0 0 0 0 0 0
  262. 0 0 0 0 0 0 0 0
  263. 0 0 0 0 0 0 0 0
  264. 0 0 0 0 0 0 0 0
  265. *************************/
  266. static void dequant_slow10( ogg_int16_t * dequant_coeffs,
  267. ogg_int16_t * quantized_list,
  268. ogg_int32_t * DCT_block){
  269. int i;
  270. memset(DCT_block,0, 128);
  271. for(i=0;i<10;i++)
  272. DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
  273. }
  274. void IDct10__c( Q_LIST_ENTRY * InputData,
  275. ogg_int16_t *QuantMatrix,
  276. ogg_int16_t * OutputData ){
  277. ogg_int32_t IntermediateData[64];
  278. ogg_int32_t * ip = IntermediateData;
  279. ogg_int16_t * op = OutputData;
  280. ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
  281. ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  282. ogg_int32_t t1, t2;
  283. int loop;
  284. dequant_slow10( QuantMatrix, InputData, IntermediateData);
  285. /* Inverse DCT on the rows now */
  286. for ( loop = 0; loop < 4; loop++){
  287. /* Check for non-zero values */
  288. if ( ip[0] | ip[1] | ip[2] | ip[3] ){
  289. t1 = (xC1S7 * ip[1]);
  290. t1 >>= 16;
  291. _A = t1;
  292. t1 = (xC7S1 * ip[1]);
  293. t1 >>= 16;
  294. _B = t1 ;
  295. t1 = (xC3S5 * ip[3]);
  296. t1 >>= 16;
  297. _C = t1;
  298. t2 = (xC5S3 * ip[3]);
  299. t2 >>= 16;
  300. _D = -t2;
  301. t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
  302. t1 >>= 16;
  303. _Ad = t1;
  304. t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
  305. t1 >>= 16;
  306. _Bd = t1;
  307. _Cd = _A + _C;
  308. _Dd = _B + _D;
  309. t1 = (xC4S4 * ip[0] );
  310. t1 >>= 16;
  311. _E = t1;
  312. _F = t1;
  313. t1 = (xC2S6 * ip[2]);
  314. t1 >>= 16;
  315. _G = t1;
  316. t1 = (xC6S2 * ip[2]);
  317. t1 >>= 16;
  318. _H = t1 ;
  319. _Ed = _E - _G;
  320. _Gd = _E + _G;
  321. _Add = _F + _Ad;
  322. _Bdd = _Bd - _H;
  323. _Fd = _F - _Ad;
  324. _Hd = _Bd + _H;
  325. /* Final sequence of operations over-write original inputs. */
  326. ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
  327. ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
  328. ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
  329. ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
  330. ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
  331. ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
  332. ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
  333. ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
  334. }
  335. ip += 8; /* next row */
  336. }
  337. ip = IntermediateData;
  338. for ( loop = 0; loop < 8; loop++) {
  339. /* Check for non-zero values (bitwise or faster than ||) */
  340. if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
  341. t1 = (xC1S7 * ip[1*8]);
  342. t1 >>= 16;
  343. _A = t1 ;
  344. t1 = (xC7S1 * ip[1*8]);
  345. t1 >>= 16;
  346. _B = t1 ;
  347. t1 = (xC3S5 * ip[3*8]);
  348. t1 >>= 16;
  349. _C = t1 ;
  350. t2 = (xC5S3 * ip[3*8]);
  351. t2 >>= 16;
  352. _D = - t2;
  353. t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
  354. t1 >>= 16;
  355. _Ad = t1;
  356. t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
  357. t1 >>= 16;
  358. _Bd = t1;
  359. _Cd = _A + _C;
  360. _Dd = _B + _D;
  361. t1 = (xC4S4 * ip[0*8]);
  362. t1 >>= 16;
  363. _E = t1;
  364. _F = t1;
  365. t1 = (xC2S6 * ip[2*8]);
  366. t1 >>= 16;
  367. _G = t1;
  368. t1 = (xC6S2 * ip[2*8]);
  369. t1 >>= 16;
  370. _H = t1;
  371. _Ed = _E - _G;
  372. _Gd = _E + _G;
  373. _Add = _F + _Ad;
  374. _Bdd = _Bd - _H;
  375. _Fd = _F - _Ad;
  376. _Hd = _Bd + _H;
  377. _Gd += IdctAdjustBeforeShift;
  378. _Add += IdctAdjustBeforeShift;
  379. _Ed += IdctAdjustBeforeShift;
  380. _Fd += IdctAdjustBeforeShift;
  381. /* Final sequence of operations over-write original inputs. */
  382. op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
  383. op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
  384. op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
  385. op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
  386. op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
  387. op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
  388. op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
  389. op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
  390. }else{
  391. op[0*8] = 0;
  392. op[7*8] = 0;
  393. op[1*8] = 0;
  394. op[2*8] = 0;
  395. op[3*8] = 0;
  396. op[4*8] = 0;
  397. op[5*8] = 0;
  398. op[6*8] = 0;
  399. }
  400. ip++; /* next column */
  401. op++;
  402. }
  403. }
  404. /***************************
  405. x 0 0 0 0 0 0 0
  406. 0 0 0 0 0 0 0 0
  407. 0 0 0 0 0 0 0 0
  408. 0 0 0 0 0 0 0 0
  409. 0 0 0 0 0 0 0 0
  410. 0 0 0 0 0 0 0 0
  411. 0 0 0 0 0 0 0 0
  412. 0 0 0 0 0 0 0 0
  413. **************************/
  414. void IDct1( Q_LIST_ENTRY * InputData,
  415. ogg_int16_t *QuantMatrix,
  416. ogg_int16_t * OutputData ){
  417. int loop;
  418. ogg_int16_t OutD;
  419. OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
  420. for(loop=0;loop<64;loop++)
  421. OutputData[loop]=OutD;
  422. }
  423. void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
  424. {
  425. funcs->IDctSlow = IDctSlow__c;
  426. funcs->IDct10 = IDct10__c;
  427. funcs->IDct3 = IDct10__c;
  428. #if defined(USE_ASM)
  429. // todo: make mmx encoder idct for MSC one day...
  430. #if !defined (_MSC_VER)
  431. if (cpu_flags & OC_CPU_X86_MMX) {
  432. dsp_mmx_idct_init(funcs);
  433. }
  434. #endif
  435. #endif
  436. }