as_callfunc_arm64_gcc.S 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. //
  2. // AngelCode Scripting Library
  3. // Copyright (c) 2020-2022 Andreas Jonsson
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any
  7. // damages arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any
  10. // purpose, including commercial applications, and to alter it and
  11. // redistribute it freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented// you
  14. // must not claim that you wrote the original software. If you use
  15. // this software in a product, an acknowledgment in the product
  16. // documentation would be appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and
  19. // must not be misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. //
  24. // The original version of this library can be located at:
  25. // http://www.angelcode.com/angelscript/
  26. //
  27. // Andreas Jonsson
  28. // [email protected]
  29. //
  30. // Assembly routines for the ARM64/AArch64 call convention used for Linux
  31. // Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm,
  32. // with assistance & guidance provided by Sir Kane
  33. // Compile with GCC/GAS
  34. #if !defined(AS_MAX_PORTABILITY)
  35. #if defined(__aarch64__)
  36. .arch armv8-a
  37. .text
  38. .global GetHFAReturnDouble
  39. .global GetHFAReturnFloat
  40. .global CallARM64Ret128
  41. .global CallARM64RetInMemory
  42. .global CallARM64Double
  43. .global CallARM64Float
  44. .global CallARM64
  45. #if !defined(__MACH__)
  46. .type GetHFAReturnDouble, %function
  47. .type GetHFAReturnFloat, %function
  48. .type CallARM64Ret128, %function
  49. .type CallARM64RetInMemory, %function
  50. .type CallARM64Double, %function
  51. .type CallARM64Float, %function
  52. .type CallARM64, %function
  53. #endif /* __MACH__ */
  54. .align 2
  55. GetHFAReturnDouble:
  56. adr x9, populateDoubles
  57. sub x9, x9, x1, lsr 1 // x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
  58. br x9
  59. str d3, [x0, #0x18]
  60. str d2, [x0, #0x10]
  61. str d1, [x1]
  62. str d0, [x0]
  63. populateDoubles:
  64. ret
  65. .align 2
  66. GetHFAReturnFloat:
  67. adr x9, populateFloats
  68. sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
  69. br x9
  70. str s3, [x1, #0x4]
  71. str s2, [x1]
  72. str s1, [x0, #0x4]
  73. str s0, [x0]
  74. populateFloats:
  75. ret
  76. //[returnType] CallARM64[type](
  77. // const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
  78. // const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
  79. // const asQWORD *stackArgs, asQWORD numStackArgs,
  80. // asFUNCTION_t func
  81. //)
  82. .align 2
  83. CallARM64Double:
  84. CallARM64Float:
  85. CallARM64:
  86. .cfi_startproc
  87. stp fp, lr, [sp,#-0x20]!
  88. str x20, [sp,#0x10]
  89. .cfi_def_cfa_offset 0x20
  90. .cfi_offset 20, 0x10
  91. .cfi_offset fp, -0x20
  92. .cfi_offset lr, -0x18
  93. mov fp, sp
  94. mov x20, #0
  95. cbz x5, stackArgsLoopEnd
  96. // Align count to 2, then multiply by 8, resulting in a size aligned to 16
  97. add x20, x5, #1
  98. lsl x20, x20, #3
  99. and x20, x20, #-0x10
  100. // Multiply count by 8
  101. lsl x10, x5, #3
  102. sub sp, sp, x20
  103. stackArgsLoopStart:
  104. ldp x9,x11, [x4],#16
  105. stp x9,x11, [sp],#16
  106. subs x10, x10, #16
  107. bgt stackArgsLoopStart
  108. stackArgsLoopEnd:
  109. // Calculate amount to jump forward, avoiding pointless instructions
  110. adr x9, populateFloatRegisterArgsEnd
  111. sub x9, x9, x3, lsl 2 // x9 -= numFloatRegArgs * 4
  112. br x9
  113. ldr d7, [x2, #0x38]
  114. ldr d6, [x2, #0x30]
  115. ldr d5, [x2, #0x28]
  116. ldr d4, [x2, #0x20]
  117. ldr d3, [x2, #0x18]
  118. ldr d2, [x2, #0x10]
  119. ldr d1, [x2, #0x08]
  120. ldr d0, [x2]
  121. populateFloatRegisterArgsEnd:
  122. mov x15, x6
  123. // Calculate amount to jump forward, avoiding pointless instructions
  124. adr x9, populateGPRegisterArgsEnd
  125. sub x9, x9, x1, lsl 2 // x9 -= numGPRegArgs * 4
  126. br x9
  127. ldr x7, [x0, #0x38]
  128. ldr x6, [x0, #0x30]
  129. ldr x5, [x0, #0x28]
  130. ldr x4, [x0, #0x20]
  131. ldr x3, [x0, #0x18]
  132. ldr x2, [x0, #0x10]
  133. ldr x1, [x0, #0x08]
  134. ldr x0, [x0]
  135. populateGPRegisterArgsEnd:
  136. // Actually call function
  137. sub sp, sp, x20
  138. blr x15
  139. add sp, sp, x20
  140. ldr x20, [sp,#0x10]
  141. ldp fp, lr, [sp],#0x20
  142. .cfi_restore lr
  143. .cfi_restore fp
  144. .cfi_restore 20
  145. .cfi_def_cfa_offset 0
  146. ret
  147. .cfi_endproc
  148. .align 2
  149. CallARM64Ret128:
  150. .cfi_startproc
  151. stp fp, lr, [sp,#-0x20]!
  152. str x20, [sp,#0x10]
  153. .cfi_def_cfa_offset 0x20
  154. .cfi_offset 20, 0x10
  155. .cfi_offset fp, -0x20
  156. .cfi_offset lr, -0x18
  157. mov fp, sp
  158. mov x20, x6
  159. mov x6, x7
  160. mov x7, #0
  161. bl CallARM64
  162. str x1, [x20]
  163. ldr x20, [sp,#0x10]
  164. ldp fp, lr, [sp],#0x20
  165. .cfi_restore lr
  166. .cfi_restore fp
  167. .cfi_restore 20
  168. .cfi_def_cfa_offset 0
  169. ret
  170. .cfi_endproc
  171. .align 2
  172. CallARM64RetInMemory:
  173. .cfi_startproc
  174. stp fp, lr, [sp,#-0x10]!
  175. mov fp, sp
  176. .cfi_def_cfa_offset 0x10
  177. .cfi_offset fp, -0x10
  178. .cfi_offset lr, -0x08
  179. mov x8, x6
  180. mov x6, x7
  181. mov x7, #0
  182. bl CallARM64
  183. mov x0, x8
  184. ldp fp, lr, [sp],#0x10
  185. .cfi_restore lr
  186. .cfi_restore fp
  187. .cfi_def_cfa_offset 0
  188. ret
  189. .cfi_endproc
  190. #endif /* __aarch64__ */
  191. #endif /* !AS_MAX_PORTABILITY */