c64xdec.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
  9. * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function:
  13. last mod: $Id$
  14. ********************************************************************/
  15. #include "c64xdec.h"
  16. #if defined(OC_C64X_ASM)
  17. void oc_dec_accel_init_c64x(oc_dec_ctx *_dec){
  18. # if defined(OC_DEC_USE_VTABLE)
  19. _dec->opt_vtable.dc_unpredict_mcu_plane=oc_dec_dc_unpredict_mcu_plane_c64x;
  20. # endif
  21. }
  22. /*Undo the DC prediction in a single plane of an MCU (one or two super block
  23. rows).
  24. As a side effect, the number of coded and uncoded fragments in this plane of
  25. the MCU is also computed.*/
  26. void oc_dec_dc_unpredict_mcu_plane_c64x(oc_dec_ctx *_dec,
  27. oc_dec_pipeline_state *_pipe,int _pli){
  28. const oc_fragment_plane *fplane;
  29. oc_fragment *frags;
  30. int *pred_last;
  31. ptrdiff_t ncoded_fragis;
  32. ptrdiff_t fragi;
  33. int fragx;
  34. int fragy;
  35. int fragy0;
  36. int fragy_end;
  37. int nhfrags;
  38. /*Compute the first and last fragment row of the current MCU for this
  39. plane.*/
  40. fplane=_dec->state.fplanes+_pli;
  41. fragy0=_pipe->fragy0[_pli];
  42. fragy_end=_pipe->fragy_end[_pli];
  43. nhfrags=fplane->nhfrags;
  44. pred_last=_pipe->pred_last[_pli];
  45. frags=_dec->state.frags;
  46. ncoded_fragis=0;
  47. fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
  48. for(fragy=fragy0;fragy<fragy_end;fragy++){
  49. if(fragy==0){
  50. /*For the first row, all of the cases reduce to just using the previous
  51. predictor for the same reference frame.*/
  52. for(fragx=0;fragx<nhfrags;fragx++,fragi++){
  53. int coded;
  54. int refi;
  55. /*The TI compiler refuses to pipeline this if we put it in an if(coded)
  56. block.
  57. We can do the loads unconditionally, which helps move them earlier.
  58. We do the store unconditionally too, because if we use a conditional
  59. store, the compiler propagates the condition back to the operations
  60. the store depended on, presumably to reduce cache pressure by
  61. eliminating dead loads.
  62. However, these loads are "free" in the cache sense, since reading the
  63. coded flag brings in all four bytes anyway, and starting the loads
  64. before we know the coded flag saves 6 cycles.*/
  65. refi=frags[fragi].refi;
  66. coded=frags[fragi].coded;
  67. frags[fragi].dc=pred_last[refi]+=frags[fragi].dc&-coded;
  68. ncoded_fragis+=coded;
  69. }
  70. }
  71. else{
  72. oc_fragment *u_frags;
  73. int l_ref;
  74. int ul_ref;
  75. int u_ref;
  76. u_frags=frags-nhfrags;
  77. l_ref=-1;
  78. ul_ref=-1;
  79. u_ref=u_frags[fragi].refi;
  80. for(fragx=0;fragx<nhfrags;fragx++,fragi++){
  81. int ur_ref;
  82. int refi;
  83. if(fragx+1>=nhfrags)ur_ref=-1;
  84. else ur_ref=u_frags[fragi+1].refi;
  85. refi=frags[fragi].refi;
  86. if(frags[fragi].coded){
  87. static const int OC_PRED_SCALE[16][2]={
  88. {0x00000000,0x00000000},
  89. {0x00000000,0x00000080},
  90. {0x00800000,0x00000000},
  91. {0x00000000,0x00000080},
  92. {0x00000080,0x00000000},
  93. {0x00000040,0x00000040},
  94. {0x00000080,0x00000000},
  95. {0xFF980074,0x00000074},
  96. {0x00000000,0x00800000},
  97. {0x00000000,0x0035004B},
  98. {0x00400000,0x00400000},
  99. {0x00000000,0x0035004B},
  100. {0x00000080,0x00000000},
  101. {0x00000000,0x0035004B},
  102. {0x00180050,0x00180000},
  103. {0xFF980074,0x00000074},
  104. };
  105. ogg_int16_t p0;
  106. ogg_int16_t p1;
  107. ogg_int16_t p2;
  108. ogg_int16_t p3;
  109. int pred;
  110. int pflags;
  111. /*29 cycles.*/
  112. /*HACK: This p0 reference could potentially be out of bounds, but
  113. because we know what allocator Leonora is using, we know it can't
  114. segfault.*/
  115. p0=u_frags[fragi-1].dc;
  116. p1=u_frags[fragi].dc;
  117. p2=u_frags[fragi+1].dc;
  118. p3=frags[fragi-1].dc;
  119. pflags=_cmpeq4(_packl4(_pack2(ur_ref,u_ref),_pack2(ul_ref,l_ref)),
  120. _packl4(_pack2(refi,refi),_pack2(refi,refi)));
  121. if(pflags==0)pred=pred_last[refi];
  122. else{
  123. pred=(_dotp2(_pack2(p0,p1),OC_PRED_SCALE[pflags][0])
  124. +_dotp2(_pack2(p2,p3),OC_PRED_SCALE[pflags][1]))/128;
  125. if((pflags&7)==7){
  126. if(abs(pred-p1)>128)pred=p1;
  127. else if(abs(pred-p3)>128)pred=p3;
  128. else if(abs(pred-p0)>128)pred=p0;
  129. }
  130. }
  131. pred_last[refi]=frags[fragi].dc+=pred;
  132. ncoded_fragis++;
  133. l_ref=refi;
  134. }
  135. else l_ref=-1;
  136. ul_ref=u_ref;
  137. u_ref=ur_ref;
  138. }
  139. }
  140. }
  141. _pipe->ncoded_fragis[_pli]=ncoded_fragis;
  142. /*Also save the number of uncoded fragments so we know how many to copy.*/
  143. _pipe->nuncoded_fragis[_pli]=
  144. (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
  145. }
  146. #endif