armbits.asm 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. @********************************************************************
  2. @* *
  3. @* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. @* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. @* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. @* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. @* *
  8. @* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
  9. @* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  10. @* *
  11. @********************************************************************
  12. @
  13. @ function:
  14. @ last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
  15. @
  16. @********************************************************************
  17. .text; .p2align 2
  18. .global oc_pack_read_arm
  19. .global oc_pack_read1_arm
  20. .global oc_huff_token_decode_arm
  21. .type oc_pack_read1_arm, %function; oc_pack_read1_arm: @ PROC
  22. @ r0 = oc_pack_buf *_b
  23. ADD r12,r0,#8
  24. LDMIA r12,{r2,r3} @ r2 = window
  25. @ Stall... ; r3 = available
  26. @ Stall...
  27. SUBS r3,r3,#1 @ r3 = available-1, available<1 => LT
  28. BLT oc_pack_read1_refill
  29. MOV r0,r2,LSR #31 @ r0 = window>>31
  30. MOV r2,r2,LSL #1 @ r2 = window<<=1
  31. STMIA r12,{r2,r3} @ window = r2
  32. @ available = r3
  33. MOV PC,r14
  34. .size oc_pack_read1_arm, .-oc_pack_read1_arm @ ENDP
  35. .type oc_pack_read_arm, %function; oc_pack_read_arm: @ PROC
  36. @ r0 = oc_pack_buf *_b
  37. @ r1 = int _bits
  38. ADD r12,r0,#8
  39. LDMIA r12,{r2,r3} @ r2 = window
  40. @ Stall... ; r3 = available
  41. @ Stall...
  42. SUBS r3,r3,r1 @ r3 = available-_bits, available<_bits => LT
  43. BLT oc_pack_read_refill
  44. RSB r0,r1,#32 @ r0 = 32-_bits
  45. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  46. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  47. STMIA r12,{r2,r3} @ window = r2
  48. @ available = r3
  49. MOV PC,r14
  50. @ We need to refill window.
  51. oc_pack_read1_refill:
  52. MOV r1,#1
  53. oc_pack_read_refill:
  54. STMFD r13!,{r10,r11,r14}
  55. LDMIA r0,{r10,r11} @ r10 = stop
  56. @ r11 = ptr
  57. RSB r0,r1,#32 @ r0 = 32-_bits
  58. RSB r3,r3,r0 @ r3 = 32-available
  59. @ We can use unsigned compares for both the pointers and for available
  60. @ (allowing us to chain condition codes) because available will never be
  61. @ larger than 32 (or we wouldn't be here), and thus 32-available will never be
  62. @ negative.
  63. CMP r10,r11 @ ptr<stop => HI
  64. CMPHI r3,#7 @ available<=24 => HI
  65. LDRHIB r14,[r11],#1 @ r14 = *ptr++
  66. SUBHI r3,#8 @ available += 8
  67. @ (HI) Stall...
  68. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  69. CMPHI r10,r11 @ ptr<stop => HI
  70. CMPHI r3,#7 @ available<=24 => HI
  71. LDRHIB r14,[r11],#1 @ r14 = *ptr++
  72. SUBHI r3,#8 @ available += 8
  73. @ (HI) Stall...
  74. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  75. CMPHI r10,r11 @ ptr<stop => HI
  76. CMPHI r3,#7 @ available<=24 => HI
  77. LDRHIB r14,[r11],#1 @ r14 = *ptr++
  78. SUBHI r3,#8 @ available += 8
  79. @ (HI) Stall...
  80. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  81. CMPHI r10,r11 @ ptr<stop => HI
  82. CMPHI r3,#7 @ available<=24 => HI
  83. LDRHIB r14,[r11],#1 @ r14 = *ptr++
  84. SUBHI r3,#8 @ available += 8
  85. @ (HI) Stall...
  86. ORRHI r2,r14,LSL r3 @ r2 = window|=r14<<32-available
  87. SUBS r3,r0,r3 @ r3 = available-=_bits, available<bits => GT
  88. BLT oc_pack_read_refill_last
  89. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  90. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  91. STR r11,[r12,#-4] @ ptr = r11
  92. STMIA r12,{r2,r3} @ window = r2
  93. @ available = r3
  94. LDMFD r13!,{r10,r11,PC}
  95. @ Either we wanted to read more than 24 bits and didn't have enough room to
  96. @ stuff the last byte into the window, or we hit the end of the packet.
  97. oc_pack_read_refill_last:
  98. CMP r11,r10 @ ptr<stop => LO
  99. @ If we didn't hit the end of the packet, then pull enough of the next byte to
  100. @ to fill up the window.
  101. LDRLOB r14,[r11] @ (LO) r14 = *ptr
  102. @ Otherwise, set the EOF flag and pretend we have lots of available bits.
  103. MOVHS r14,#1 @ (HS) r14 = 1
  104. ADDLO r10,r3,r1 @ (LO) r10 = available
  105. STRHS r14,[r12,#8] @ (HS) eof = 1
  106. ANDLO r10,r10,#7 @ (LO) r10 = available0x7
  107. MOVHS r3,#1<<30 @ (HS) available = OC_LOTS_OF_BITS
  108. ORRLO r2,r14,LSL r10 @ (LO) r2 = window|=*ptr>>(available0x7)
  109. MOV r0,r2,LSR r0 @ r0 = window>>32-_bits
  110. MOV r2,r2,LSL r1 @ r2 = window<<=_bits
  111. STR r11,[r12,#-4] @ ptr = r11
  112. STMIA r12,{r2,r3} @ window = r2
  113. @ available = r3
  114. LDMFD r13!,{r10,r11,PC}
  115. .size oc_pack_read_arm, .-oc_pack_read_arm @ ENDP
  116. .type oc_huff_token_decode_arm, %function; oc_huff_token_decode_arm: @ PROC
  117. @ r0 = oc_pack_buf *_b
  118. @ r1 = const ogg_int16_t *_tree
  119. STMFD r13!,{r4,r5,r10,r14}
  120. LDRSH r10,[r1] @ r10 = n=_tree[0]
  121. LDMIA r0,{r2-r5} @ r2 = stop
  122. @ Stall... ; r3 = ptr
  123. @ Stall... ; r4 = window
  124. @ r5 = available
  125. CMP r10,r5 @ n>available => GT
  126. BGT oc_huff_token_decode_refill0
  127. RSB r14,r10,#32 @ r14 = 32-n
  128. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  129. ADD r14,r1,r14,LSL #1 @ r14 = _tree+bits
  130. LDRSH r12,[r14,#2] @ r12 = node=_tree[1+bits]
  131. @ Stall...
  132. @ Stall...
  133. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  134. BMI oc_huff_token_decode_continue
  135. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  136. MOV r4,r4,LSL r10 @ r4 = window<<=n
  137. SUB r5,r10 @ r5 = available-=n
  138. STMIB r0,{r3-r5} @ ptr = r3
  139. @ window = r4
  140. @ available = r5
  141. AND r0,r14,#255 @ r0 = node0x255
  142. LDMFD r13!,{r4,r5,r10,pc}
  143. @ The first tree node wasn't enough to reach a leaf, read another
  144. oc_huff_token_decode_continue:
  145. ADD r12,r1,r12,LSL #1 @ r12 = _tree+node
  146. MOV r4,r4,LSL r10 @ r4 = window<<=n
  147. SUB r5,r5,r10 @ r5 = available-=n
  148. LDRSH r10,[r12],#2 @ r10 = n=_tree[node]
  149. @ Stall... ; r12 = _tree+node+1
  150. @ Stall...
  151. CMP r10,r5 @ n>available => GT
  152. BGT oc_huff_token_decode_refill
  153. RSB r14,r10,#32 @ r14 = 32-n
  154. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  155. ADD r12,r12,r14 @
  156. LDRSH r12,[r12,r14] @ r12 = node=_tree[node+1+bits]
  157. @ Stall...
  158. @ Stall...
  159. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  160. BMI oc_huff_token_decode_continue
  161. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  162. MOV r4,r4,LSL r10 @ r4 = window<<=n
  163. SUB r5,r10 @ r5 = available-=n
  164. STMIB r0,{r3-r5} @ ptr = r3
  165. @ window = r4
  166. @ available = r5
  167. AND r0,r14,#255 @ r0 = node0x255
  168. LDMFD r13!,{r4,r5,r10,pc}
  169. oc_huff_token_decode_refill0:
  170. ADD r12,r1,#2 @ r12 = _tree+1
  171. oc_huff_token_decode_refill:
  172. @ We can't possibly need more than 15 bits, so available must be <= 15.
  173. @ Therefore we can load at least two bytes without checking it.
  174. CMP r2,r3 @ ptr<stop => HI
  175. LDRHIB r14,[r3],#1 @ r14 = *ptr++
  176. RSBHI r5,r5,#24 @ (HI) available = 32-(available+=8)
  177. RSBLS r5,r5,#32 @ (LS) r5 = 32-available
  178. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  179. CMPHI r2,r3 @ ptr<stop => HI
  180. LDRHIB r14,[r3],#1 @ r14 = *ptr++
  181. SUBHI r5,#8 @ available += 8
  182. @ (HI) Stall...
  183. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  184. @ We can use unsigned compares for both the pointers and for available
  185. @ (allowing us to chain condition codes) because available will never be
  186. @ larger than 32 (or we wouldn't be here), and thus 32-available will never be
  187. @ negative.
  188. CMPHI r2,r3 @ ptr<stop => HI
  189. CMPHI r5,#7 @ available<=24 => HI
  190. LDRHIB r14,[r3],#1 @ r14 = *ptr++
  191. SUBHI r5,#8 @ available += 8
  192. @ (HI) Stall...
  193. ORRHI r4,r14,LSL r5 @ r4 = window|=r14<<32-available
  194. CMP r2,r3 @ ptr<stop => HI
  195. MOVLS r5,#-1<<30 @ (LS) available = OC_LOTS_OF_BITS+32
  196. CMPHI r5,#7 @ (HI) available<=24 => HI
  197. LDRHIB r14,[r3],#1 @ (HI) r14 = *ptr++
  198. SUBHI r5,#8 @ (HI) available += 8
  199. @ (HI) Stall...
  200. ORRHI r4,r14,LSL r5 @ (HI) r4 = window|=r14<<32-available
  201. RSB r14,r10,#32 @ r14 = 32-n
  202. MOV r14,r4,LSR r14 @ r14 = bits=window>>32-n
  203. ADD r12,r12,r14 @
  204. LDRSH r12,[r12,r14] @ r12 = node=_tree[node+1+bits]
  205. RSB r5,r5,#32 @ r5 = available
  206. @ Stall...
  207. RSBS r14,r12,#0 @ r14 = -node, node>0 => MI
  208. BMI oc_huff_token_decode_continue
  209. MOV r10,r14,LSR #8 @ r10 = n=node>>8
  210. MOV r4,r4,LSL r10 @ r4 = window<<=n
  211. SUB r5,r10 @ r5 = available-=n
  212. STMIB r0,{r3-r5} @ ptr = r3
  213. @ window = r4
  214. @ available = r5
  215. AND r0,r14,#255 @ r0 = node0x255
  216. LDMFD r13!,{r4,r5,r10,pc}
  217. .size oc_huff_token_decode_arm, .-oc_huff_token_decode_arm @ ENDP
  218. @ END
  219. .section .note.GNU-stack,"",%progbits