armbits.asm 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. #ifdef OC_ARM_ASM
  2. @********************************************************************
  3. @* *
  4. @* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  5. @* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  6. @* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  7. @* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  8. @* *
  9. @* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
  10. @* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  11. @* *
  12. @********************************************************************
  13. @
  14. @ function:
  15. @ last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
  16. @
  17. @********************************************************************
  18. .text; .p2align 2
  19. .global _oc_pack_read_arm
  20. .global _oc_pack_read1_arm
  21. .global _oc_huff_token_decode_arm
  22. @ .type oc_pack_read1_arm, %function; oc_pack_read1_arm: @ PROC
  23. _oc_pack_read1_arm:
  24. @ r0 = oc_pack_buf *_b
  25. ADD r12,r0,#8
  26. LDMIA r12,{r2,r3} @ r2 = window
  27. @ Stall... ; r3 = available
  28. @ Stall...
  29. SUBS r3,r3,#1 @ r3 = available-1, available<1 => LT
  30. BLT oc_pack_read1_refill
  31. MOV r0,r2,LSR #31 @ r0 = window>>31
  32. MOV r2,r2,LSL #1 @ r2 = window<<=1
  33. STMIA r12,{r2,r3} @ window = r2
  34. @ available = r3
  35. MOV PC,r14
  36. @ .size oc_pack_read1_arm, .-oc_pack_read1_arm @ ENDP
  37. @ .type oc_pack_read_arm, %function; oc_pack_read_arm: @ PROC
  38. _oc_pack_read_arm:
  39. @ r0 = oc_pack_buf *_b
  40. @ r1 = int _bits
  41. ADD r12,r0,#8
  42. LDMIA r12,{r2,r3} @ r2 = window
  43. @ Stall... ; r3 = available
  44. @ Stall...
  45. SUBS r3,r3,r1 @ r3 = available-_bits, available<_bits => LT
  46. BLT oc_pack_read_refill
  47. RSB r0,r1,#32 @ r0 = 32-_bits
  48. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  49. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  50. STMIA r12,{r2,r3} @ window = r2
  51. @ available = r3
  52. MOV PC,r14
  53. @ We need to refill window.
  54. oc_pack_read1_refill:
  55. MOV r1,#1
  56. oc_pack_read_refill:
  57. STMFD r13!,{r10,r11,r14}
  58. LDMIA r0,{r10,r11} @ r10 = stop
  59. @ r11 = ptr
  60. RSB r0,r1,#32 @ r0 = 32-_bits
  61. RSB r3,r3,r0 @ r3 = 32-available
  62. @ We can use unsigned compares for both the pointers and for available
  63. @ (allowing us to chain condition codes) because available will never be
  64. @ larger than 32 (or we wouldn't be here), and thus 32-available will never be
  65. @ negative.
  66. CMP r10,r11 @ ptr<stop => HI
  67. CMPHI r3,#7 @ available<=24 => HI
  68. LDRBHI r14,[r11],#1 @ r14 = *ptr++
  69. SUBHI r3,#8 @ available += 8
  70. @ (HI) Stall...
  71. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  72. CMPHI r10,r11 @ ptr<stop => HI
  73. CMPHI r3,#7 @ available<=24 => HI
  74. LDRBHI r14,[r11],#1 @ r14 = *ptr++
  75. SUBHI r3,#8 @ available += 8
  76. @ (HI) Stall...
  77. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  78. CMPHI r10,r11 @ ptr<stop => HI
  79. CMPHI r3,#7 @ available<=24 => HI
  80. LDRBHI r14,[r11],#1 @ r14 = *ptr++
  81. SUBHI r3,#8 @ available += 8
  82. @ (HI) Stall...
  83. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  84. CMPHI r10,r11 @ ptr<stop => HI
  85. CMPHI r3,#7 @ available<=24 => HI
  86. LDRBHI r14,[r11],#1 @ r14 = *ptr++
  87. SUBHI r3,#8 @ available += 8
  88. @ (HI) Stall...
  89. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  90. SUBS r3,r0,r3 @ r3 = available-=_bits, available<bits => GT
  91. BLT oc_pack_read_refill_last
  92. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  93. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  94. STR r11,[r12,#-4] @ ptr = r11
  95. STMIA r12,{r2,r3} @ window = r2
  96. @ available = r3
  97. LDMFD r13!,{r10,r11,PC}
  98. @ Either we wanted to read more than 24 bits and didn't have enough room to
  99. @ stuff the last byte into the window, or we hit the end of the packet.
  100. oc_pack_read_refill_last:
  101. CMP r11,r10 @ ptr<stop => LO
  102. @ If we didn't hit the end of the packet, then pull enough of the next byte to
  103. @ to fill up the window.
  104. LDRBLO r14,[r11] @ (LO) r14 = *ptr
  105. @ Otherwise, set the EOF flag and pretend we have lots of available bits.
  106. MOVHS r14,#1 @ (HS) r14 = 1
  107. ADDLO r10,r3,r1 @ (LO) r10 = available
  108. STRHS r14,[r12,#8] @ (HS) eof = 1
  109. ANDLO r10,r10,#7 @ (LO) r10 = available0x7
  110. MOVHS r3,#1<<30 @ (HS) available = OC_LOTS_OF_BITS
  111. ORRLO r2,r14,LSL r10 @ (LO) r2 = window|=*ptr>>(available0x7)
  112. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  113. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  114. STR r11,[r12,#-4] @ ptr = r11
  115. STMIA r12,{r2,r3} @ window = r2
  116. @ available = r3
  117. LDMFD r13!,{r10,r11,PC}
  118. @ .size oc_pack_read_arm, .-oc_pack_read_arm @ ENDP
  119. @ .type oc_huff_token_decode_arm, %function; oc_huff_token_decode_arm: @ PROC
  120. _oc_huff_token_decode_arm:
  121. @ r0 = oc_pack_buf *_b
  122. @ r1 = const ogg_int16_t *_tree
  123. STMFD r13!,{r4,r5,r10,r14}
  124. LDRSH r10,[r1] @ r10 = n=_tree[0]
  125. LDMIA r0,{r2-r5} @ r2 = stop
  126. @ Stall... ; r3 = ptr
  127. @ Stall... ; r4 = window
  128. @ r5 = available
  129. CMP r10,r5 @ n>available => GT
  130. BGT oc_huff_token_decode_refill0
  131. RSB r14,r10,#32 @ r14 = 32-n
  132. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  133. ADD r14,r1,r14,LSL #1 @ r14 = _tree+bits
  134. LDRSH r12,[r14,#2] @ r12 = node=_tree[1+bits]
  135. @ Stall...
  136. @ Stall...
  137. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  138. BMI oc_huff_token_decode_continue
  139. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  140. MOV r4,r4,LSL r10 @ r4 = window<<=n
  141. SUB r5,r10 @ r5 = available-=n
  142. STMIB r0,{r3-r5} @ ptr = r3
  143. @ window = r4
  144. @ available = r5
  145. AND r0,r14,#255 @ r0 = node0x255
  146. LDMFD r13!,{r4,r5,r10,pc}
  147. @ The first tree node wasn't enough to reach a leaf, read another
  148. oc_huff_token_decode_continue:
  149. ADD r12,r1,r12,LSL #1 @ r12 = _tree+node
  150. MOV r4,r4,LSL r10 @ r4 = window<<=n
  151. SUB r5,r5,r10 @ r5 = available-=n
  152. LDRSH r10,[r12],#2 @ r10 = n=_tree[node]
  153. @ Stall... ; r12 = _tree+node+1
  154. @ Stall...
  155. CMP r10,r5 @ n>available => GT
  156. BGT oc_huff_token_decode_refill
  157. RSB r14,r10,#32 @ r14 = 32-n
  158. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  159. ADD r12,r12,r14 @
  160. LDRSH r12,[r12,r14] @ r12 = node=_tree[node+1+bits]
  161. @ Stall...
  162. @ Stall...
  163. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  164. BMI oc_huff_token_decode_continue
  165. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  166. MOV r4,r4,LSL r10 @ r4 = window<<=n
  167. SUB r5,r10 @ r5 = available-=n
  168. STMIB r0,{r3-r5} @ ptr = r3
  169. @ window = r4
  170. @ available = r5
  171. AND r0,r14,#255 @ r0 = node0x255
  172. LDMFD r13!,{r4,r5,r10,pc}
  173. oc_huff_token_decode_refill0:
  174. ADD r12,r1,#2 @ r12 = _tree+1
  175. oc_huff_token_decode_refill:
  176. @ We can't possibly need more than 15 bits, so available must be <= 15.
  177. @ Therefore we can load at least two bytes without checking it.
  178. CMP r2,r3 @ ptr<stop => HI
  179. LDRBHI r14,[r3],#1 @ r14 = *ptr++
  180. RSBHI r5,r5,#24 @ (HI) available = 32-(available+=8)
  181. RSBLS r5,r5,#32 @ (LS) r5 = 32-available
  182. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  183. CMPHI r2,r3 @ ptr<stop => HI
  184. LDRBHI r14,[r3],#1 @ r14 = *ptr++
  185. SUBHI r5,#8 @ available += 8
  186. @ (HI) Stall...
  187. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  188. @ We can use unsigned compares for both the pointers and for available
  189. @ (allowing us to chain condition codes) because available will never be
  190. @ larger than 32 (or we wouldn't be here), and thus 32-available will never be
  191. @ negative.
  192. CMPHI r2,r3 @ ptr<stop => HI
  193. CMPHI r5,#7 @ available<=24 => HI
  194. LDRBHI r14,[r3],#1 @ r14 = *ptr++
  195. SUBHI r5,#8 @ available += 8
  196. @ (HI) Stall...
  197. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  198. CMP r2,r3 @ ptr<stop => HI
  199. MOVLS r5,#-1<<30 @ (LS) available = OC_LOTS_OF_BITS+32
  200. CMPHI r5,#7 @ (HI) available<=24 => HI
  201. LDRBHI r14,[r3],#1 @ (HI) r14 = *ptr++
  202. SUBHI r5,#8 @ (HI) available += 8
  203. @ (HI) Stall...
  204. ORRHI r4,r14,LSL r5 @ (HI) r4 = window|=r14<<32-available
  205. RSB r14,r10,#32 @ r14 = 32-n
  206. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  207. ADD r12,r12,r14 @
  208. LDRSH r12,[r12,r14] @ r12 = node=_tree[node+1+bits]
  209. RSB r5,r5,#32 @ r5 = available
  210. @ Stall...
  211. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  212. BMI oc_huff_token_decode_continue
  213. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  214. MOV r4,r4,LSL r10 @ r4 = window<<=n
  215. SUB r5,r10 @ r5 = available-=n
  216. STMIB r0,{r3-r5} @ ptr = r3
  217. @ window = r4
  218. @ available = r5
  219. AND r0,r14,#255 @ r0 = node0x255
  220. LDMFD r13!,{r4,r5,r10,pc}
  221. @ .size oc_huff_token_decode_arm, .-oc_huff_token_decode_arm @ ENDP
  222. @ END
  223. @ .section .note.GNU-stack,"",%progbits
  224. #endif