as_callfunc_x64_gcc.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. AngelCode Scripting Library
  3. Copyright (c) 2003-2011 Andreas Jonsson
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any
  6. damages arising from the use of this software.
  7. Permission is granted to anyone to use this software for any
  8. purpose, including commercial applications, and to alter it and
  9. redistribute it freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you
  11. must not claim that you wrote the original software. If you use
  12. this software in a product, an acknowledgment in the product
  13. documentation would be appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and
  15. must not be misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source
  17. distribution.
  18. The original version of this library can be located at:
  19. http://www.angelcode.com/angelscript/
  20. Andreas Jonsson
  21. [email protected]
  22. */
  23. /*
  24. * Implements the AMD64 calling convention for gcc-based 64bit Unices
  25. *
  26. * Author: Ionut "gargltk" Leonte <[email protected]>
  27. *
  28. * Initial author: niteice
  29. */
  30. #include "as_config.h"
  31. #ifndef AS_MAX_PORTABILITY
  32. #ifdef AS_X64_GCC
  33. #include "as_scriptengine.h"
  34. #include "as_texts.h"
  35. BEGIN_AS_NAMESPACE
  36. enum argTypes { x64ENDARG = 0, x64INTARG = 1, x64FLOATARG = 2, x64DOUBLEARG = 3, x64VARIABLE = 4 };
  37. typedef asQWORD ( *funcptr_t )( void );
  38. #define X64_MAX_ARGS 32
  39. #define MAX_CALL_INT_REGISTERS 6
  40. #define MAX_CALL_SSE_REGISTERS 8
  41. #define CALLSTACK_MULTIPLIER 2
  42. #define X64_CALLSTACK_SIZE ( X64_MAX_ARGS + MAX_CALL_SSE_REGISTERS + 3 )
  43. // Note to self: Always remember to inform the used registers on the clobber line,
  44. // so that the gcc optimizer doesn't try to use them for other things
  45. #define PUSH_LONG( val ) \
  46. __asm__ __volatile__ ( \
  47. "movq %0, %%rax\n" \
  48. "pushq %%rax" \
  49. : \
  50. : "m" ( val ) \
  51. : "%rax" \
  52. )
  53. // While movq really should be used to move from general
  54. // purpose register to xmm register, this is isn't accepted
  55. // by older GNUC versions, where movd should be used instead.
  56. // Reference: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43215
  57. #if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ <= 2)
  58. #define POP_LONG( reg ) \
  59. __asm__ __volatile__ ( \
  60. "popq %%rax\n" \
  61. "movq %%rax, %" reg \
  62. : \
  63. : \
  64. : "%rax", reg \
  65. )
  66. #define POP_LONG_XMM( reg ) \
  67. __asm__ __volatile__ ( \
  68. "popq %%rax\n" \
  69. "movd %%rax, %" reg \
  70. : \
  71. : \
  72. : "%rax", reg \
  73. )
  74. #else
  75. #define POP_LONG( reg ) \
  76. __asm__ __volatile__ ( \
  77. "popq %%rax\n" \
  78. "movq %%rax, %" reg \
  79. : \
  80. : \
  81. : "%rax", reg \
  82. )
  83. #define POP_LONG_XMM( reg ) POP_LONG( reg )
  84. #endif
  85. #define ASM_GET_REG( name, dest ) \
  86. __asm__ __volatile__ ( \
  87. "movq %" name ", %0\n" \
  88. : \
  89. : "m" ( dest ) \
  90. : name \
  91. )
  92. static asDWORD GetReturnedFloat()
  93. {
  94. float retval = 0.0f;
  95. asDWORD ret = 0;
  96. __asm__ __volatile__ (
  97. "lea %0, %%rax\n"
  98. "movss %%xmm0, (%%rax)"
  99. : /* no output */
  100. : "m" (retval)
  101. : "%rax"
  102. );
  103. // We need to avoid implicit conversions from float to unsigned - we need
  104. // a bit-wise-correct-and-complete copy of the value
  105. memcpy( &ret, &retval, sizeof( ret ) );
  106. return ( asDWORD )ret;
  107. }
  108. static asQWORD GetReturnedDouble()
  109. {
  110. double retval = 0.0f;
  111. asQWORD ret = 0;
  112. __asm__ __volatile__ (
  113. "lea %0, %%rax\n"
  114. "movlpd %%xmm0, (%%rax)"
  115. : /* no optput */
  116. : "m" (retval)
  117. : "%rax"
  118. );
  119. // We need to avoid implicit conversions from double to unsigned long long - we need
  120. // a bit-wise-correct-and-complete copy of the value
  121. memcpy( &ret, &retval, sizeof( ret ) );
  122. return ret;
  123. }
  124. // Note to self: If there is any trouble with a function when it is optimized, gcc supports
  125. // turning off optimization for individual functions by adding the following to the declaration:
  126. // __attribute__ ((optimize(0)))
  127. static asQWORD __attribute__ ((noinline)) X64_CallFunction( const asDWORD* pArgs, const asBYTE *pArgsType, void *func )
  128. {
  129. asQWORD retval = 0;
  130. asQWORD ( *call )() = (asQWORD (*)())func;
  131. int i = 0;
  132. /* push the stack parameters */
  133. for ( i = MAX_CALL_INT_REGISTERS + MAX_CALL_SSE_REGISTERS; pArgsType[i] != x64ENDARG && ( i < X64_MAX_ARGS + MAX_CALL_SSE_REGISTERS + 3 ); i++ ) {
  134. PUSH_LONG( pArgs[i * CALLSTACK_MULTIPLIER] );
  135. }
  136. /* push integer parameters */
  137. for ( i = 0; i < MAX_CALL_INT_REGISTERS; i++ ) {
  138. PUSH_LONG( pArgs[i * CALLSTACK_MULTIPLIER] );
  139. }
  140. /* push floating point parameters */
  141. for ( i = MAX_CALL_INT_REGISTERS; i < MAX_CALL_INT_REGISTERS + MAX_CALL_SSE_REGISTERS; i++ ) {
  142. PUSH_LONG( pArgs[i * CALLSTACK_MULTIPLIER] );
  143. }
  144. /* now pop the registers in reverse order and make the call */
  145. POP_LONG_XMM( "%xmm7" );
  146. POP_LONG_XMM( "%xmm6" );
  147. POP_LONG_XMM( "%xmm5" );
  148. POP_LONG_XMM( "%xmm4" );
  149. POP_LONG_XMM( "%xmm3" );
  150. POP_LONG_XMM( "%xmm2" );
  151. POP_LONG_XMM( "%xmm1" );
  152. POP_LONG_XMM( "%xmm0" );
  153. POP_LONG( "%r9" );
  154. POP_LONG( "%r8" );
  155. POP_LONG( "%rcx" );
  156. POP_LONG( "%rdx" );
  157. POP_LONG( "%rsi" );
  158. POP_LONG( "%rdi" );
  159. // call the function with the arguments
  160. retval = call();
  161. return retval;
  162. }
  163. // returns true if the given parameter is a 'variable argument'
  164. inline bool IsVariableArgument( asCDataType type )
  165. {
  166. return ( type.GetTokenType() == ttQuestion ) ? true : false;
  167. }
  168. asQWORD CallSystemFunctionNative(asCContext *context, asCScriptFunction *descr, void *obj, asDWORD *args, void *retPointer, asQWORD &retQW2)
  169. {
  170. asSSystemFunctionInterface *sysFunc = descr->sysFuncIntf;
  171. int callConv = sysFunc->callConv;
  172. asQWORD retQW = 0;
  173. void *func = ( void * )sysFunc->func;
  174. asDWORD *stack_pointer = args;
  175. funcptr_t *vftable = NULL;
  176. int totalArgumentCount = 0;
  177. int n = 0;
  178. int base_n = 0;
  179. int a = 0;
  180. int param_pre = 0;
  181. int param_post = 0;
  182. int argIndex = 0;
  183. int argumentCount = 0;
  184. asDWORD tempBuff[CALLSTACK_MULTIPLIER * X64_CALLSTACK_SIZE] = { 0 };
  185. asBYTE tempType[X64_CALLSTACK_SIZE] = { 0 };
  186. asDWORD paramBuffer[CALLSTACK_MULTIPLIER * X64_CALLSTACK_SIZE] = { 0 };
  187. asBYTE argsType[X64_CALLSTACK_SIZE] = { 0 };
  188. asBYTE argsSet[X64_CALLSTACK_SIZE] = { 0 };
  189. if( sysFunc->hostReturnInMemory ) {
  190. // The return is made in memory
  191. callConv++;
  192. }
  193. argumentCount = ( int )descr->parameterTypes.GetLength();
  194. asASSERT( argumentCount <= X64_MAX_ARGS );
  195. // TODO: optimize: argsType should be computed in PrepareSystemFunction
  196. for( a = 0; a < argumentCount; ++a, ++argIndex ) {
  197. // get the base type
  198. argsType[argIndex] = x64INTARG;
  199. if ( descr->parameterTypes[a].IsFloatType() && !descr->parameterTypes[a].IsReference() ) {
  200. argsType[argIndex] = x64FLOATARG;
  201. }
  202. if ( descr->parameterTypes[a].IsDoubleType() && !descr->parameterTypes[a].IsReference() ) {
  203. argsType[argIndex] = x64DOUBLEARG;
  204. }
  205. if ( descr->parameterTypes[a].GetSizeOnStackDWords() == 2 && !descr->parameterTypes[a].IsDoubleType() && !descr->parameterTypes[a].IsReference() ) {
  206. argsType[argIndex] = x64INTARG;
  207. }
  208. if ( IsVariableArgument( descr->parameterTypes[a] ) ) {
  209. argsType[argIndex] = x64VARIABLE;
  210. }
  211. }
  212. asASSERT( argIndex == argumentCount );
  213. for ( a = 0; a < argumentCount && totalArgumentCount <= X64_MAX_ARGS; a++ ) {
  214. switch ( argsType[a] ) {
  215. case x64ENDARG:
  216. case x64INTARG:
  217. case x64FLOATARG:
  218. case x64DOUBLEARG: {
  219. if ( totalArgumentCount < X64_MAX_ARGS )
  220. tempType[totalArgumentCount++] = argsType[a];
  221. break;
  222. }
  223. case x64VARIABLE: {
  224. if ( totalArgumentCount < X64_MAX_ARGS )
  225. tempType[totalArgumentCount++] = x64VARIABLE;
  226. if ( totalArgumentCount < X64_MAX_ARGS )
  227. tempType[totalArgumentCount++] = x64INTARG;
  228. break;
  229. }
  230. }
  231. }
  232. asASSERT( totalArgumentCount <= X64_MAX_ARGS );
  233. if ( totalArgumentCount > argumentCount ) {
  234. memcpy( argsType, tempType, totalArgumentCount );
  235. }
  236. memset( tempType, 0, sizeof( tempType ) );
  237. // TODO: This should be checked in PrepareSystemFunction
  238. #ifndef COMPLEX_OBJS_PASSED_BY_REF
  239. if( sysFunc->takesObjByVal ) {
  240. /* I currently know of no way we can predict register usage for passing complex
  241. objects by value when the compiler does not pass them by reference instead. I
  242. will quote the example from the AMD64 ABI to demonstrate this:
  243. (http://www.x86-64.org/documentation/abi.pdf - page 22)
  244. ------------------------------ BEGIN EXAMPLE -------------------------------
  245. Let us consider the following C code:
  246. typedef struct {
  247. int a, b;
  248. double d;
  249. } structparm;
  250. structparm s;
  251. int e, f, g, h, i, j, k;
  252. long double ld;
  253. double m, n;
  254. extern void func (int e, int f,
  255. structparm s, int g, int h,
  256. long double ld, double m,
  257. double n, int i, int j, int k);
  258. func (e, f, s, g, h, ld, m, n, i, j, k);
  259. Register allocation for the call:
  260. --------------------------+--------------------------+-------------------
  261. General Purpose Registers | Floating Point Registers | Stack Frame Offset
  262. --------------------------+--------------------------+-------------------
  263. %rdi: e | %xmm0: s.d | 0: ld
  264. %rsi: f | %xmm1: m | 16: j
  265. %rdx: s.a,s.b | %xmm2: n | 24: k
  266. %rcx: g | |
  267. %r8: h | |
  268. %r9: i | |
  269. --------------------------+--------------------------+-------------------
  270. */
  271. context->SetInternalException( TXT_INVALID_CALLING_CONVENTION );
  272. return 0;
  273. }
  274. #endif
  275. if ( obj && ( callConv == ICC_VIRTUAL_THISCALL || callConv == ICC_VIRTUAL_THISCALL_RETURNINMEM ) ) {
  276. vftable = *( ( funcptr_t ** )obj );
  277. func = ( void * )vftable[( asQWORD )func >> 3];
  278. }
  279. switch ( callConv ) {
  280. case ICC_CDECL_RETURNINMEM:
  281. case ICC_STDCALL_RETURNINMEM: {
  282. if ( totalArgumentCount ) {
  283. memmove( argsType + 1, argsType, totalArgumentCount );
  284. }
  285. memcpy( paramBuffer, &retPointer, sizeof( retPointer ) );
  286. argsType[0] = x64INTARG;
  287. base_n = 1;
  288. param_pre = 1;
  289. break;
  290. }
  291. case ICC_THISCALL:
  292. case ICC_VIRTUAL_THISCALL:
  293. case ICC_CDECL_OBJFIRST: {
  294. if ( totalArgumentCount ) {
  295. memmove( argsType + 1, argsType, totalArgumentCount );
  296. }
  297. memcpy( paramBuffer, &obj, sizeof( obj ) );
  298. argsType[0] = x64INTARG;
  299. param_pre = 1;
  300. break;
  301. }
  302. case ICC_THISCALL_RETURNINMEM:
  303. case ICC_VIRTUAL_THISCALL_RETURNINMEM:
  304. case ICC_CDECL_OBJFIRST_RETURNINMEM: {
  305. if ( totalArgumentCount ) {
  306. memmove( argsType + 2, argsType, totalArgumentCount );
  307. }
  308. memcpy( paramBuffer, &retPointer, sizeof( retPointer ) );
  309. memcpy( paramBuffer + CALLSTACK_MULTIPLIER, &obj, sizeof( &obj ) );
  310. argsType[0] = x64INTARG;
  311. argsType[1] = x64INTARG;
  312. param_pre = 2;
  313. break;
  314. }
  315. case ICC_CDECL_OBJLAST: {
  316. memcpy( paramBuffer + totalArgumentCount * CALLSTACK_MULTIPLIER, &obj, sizeof( obj ) );
  317. argsType[totalArgumentCount] = x64INTARG;
  318. param_post = 1;
  319. break;
  320. }
  321. case ICC_CDECL_OBJLAST_RETURNINMEM: {
  322. if ( totalArgumentCount ) {
  323. memmove( argsType + 1, argsType, totalArgumentCount );
  324. }
  325. memcpy( paramBuffer, &retPointer, sizeof( retPointer ) );
  326. argsType[0] = x64INTARG;
  327. memcpy( paramBuffer + ( totalArgumentCount + 1 ) * CALLSTACK_MULTIPLIER, &obj, sizeof( obj ) );
  328. argsType[totalArgumentCount + 1] = x64INTARG;
  329. param_pre = 1;
  330. param_post = 1;
  331. break;
  332. }
  333. default: {
  334. base_n = 0;
  335. break;
  336. }
  337. }
  338. int adjust = 0;
  339. for( n = 0; n < ( int )( param_pre + totalArgumentCount + param_post ); n++ ) {
  340. int copy_count = 0;
  341. if ( n >= param_pre && n < ( int )( param_pre + totalArgumentCount ) ) {
  342. copy_count = descr->parameterTypes[n - param_pre - adjust].GetSizeOnStackDWords();
  343. if ( argsType[n] == x64VARIABLE ) {
  344. adjust += 1;
  345. argsType[n] = x64INTARG;
  346. n += 1;
  347. }
  348. }
  349. if ( copy_count > CALLSTACK_MULTIPLIER ) {
  350. if ( copy_count > CALLSTACK_MULTIPLIER + 1 ) {
  351. context->SetInternalException( TXT_INVALID_CALLING_CONVENTION );
  352. return 0;
  353. }
  354. memcpy( paramBuffer + ( n - 1 ) * CALLSTACK_MULTIPLIER, stack_pointer, AS_PTR_SIZE * sizeof( asDWORD ) );
  355. stack_pointer += AS_PTR_SIZE;
  356. memcpy( paramBuffer + n * CALLSTACK_MULTIPLIER, stack_pointer, sizeof( asDWORD ) );
  357. stack_pointer += 1;
  358. } else {
  359. if ( copy_count ) {
  360. memcpy( paramBuffer + n * CALLSTACK_MULTIPLIER, stack_pointer, copy_count * sizeof( asDWORD ) );
  361. stack_pointer += copy_count;
  362. }
  363. }
  364. }
  365. // If we are returning an object not by reference, we need to make the
  366. // pointer to the space allocated to the object the first parameter.
  367. if( descr->returnType.IsObject() && ( descr->returnType.GetObjectType()->flags & asOBJ_APP_CLASS_CA ) == asOBJ_APP_CLASS_CA &&
  368. !descr->returnType.IsReference() && !sysFunc->hostReturnInMemory )
  369. {
  370. if ( totalArgumentCount )
  371. {
  372. memmove( paramBuffer + CALLSTACK_MULTIPLIER, paramBuffer, ( CALLSTACK_MULTIPLIER * ( X64_CALLSTACK_SIZE - 1 ) ) );
  373. memmove( argsType + 1, argsType, X64_CALLSTACK_SIZE - 1 );
  374. }
  375. memcpy( paramBuffer, &retPointer, sizeof( retPointer ) );
  376. argsType[ 0 ] = x64INTARG;
  377. }
  378. /*
  379. * Q: WTF is going on here !?
  380. *
  381. * A: The idea is to pre-arange the parameters so that X64_CallFunction() can do
  382. * it's little magic which must work regardless of how the compiler decides to
  383. * allocate registers. Basically:
  384. * - the first MAX_CALL_INT_REGISTERS entries in tempBuff and tempType will
  385. * contain the values/types of the x64INTARG parameters - that is the ones who
  386. * go into the registers. If the function has less then MAX_CALL_INT_REGISTERS
  387. * integer parameters then the last entries will be set to 0
  388. * - the next MAX_CALL_SSE_REGISTERS entries will contain the float/double arguments
  389. * that go into the floating point registers. If the function has less than
  390. * MAX_CALL_SSE_REGISTERS floating point parameters then the last entries will
  391. * be set to 0
  392. * - index MAX_CALL_INT_REGISTERS + MAX_CALL_SSE_REGISTERS marks the start of the
  393. * parameters which will get passed on the stack. These are added to the array
  394. * in reverse order so that X64_CallFunction() can simply push them to the stack
  395. * without the need to perform further tests
  396. */
  397. int used_int_regs = 0;
  398. int used_sse_regs = 0;
  399. int idx = 0;
  400. base_n = 0;
  401. for ( n = 0; ( n < X64_CALLSTACK_SIZE ) && ( used_int_regs < MAX_CALL_INT_REGISTERS ); n++ ) {
  402. if ( argsType[n] == x64INTARG ) {
  403. idx = base_n;
  404. argsSet[n] = 1;
  405. tempType[idx] = argsType[n];
  406. memcpy( tempBuff + idx * CALLSTACK_MULTIPLIER, paramBuffer + n * CALLSTACK_MULTIPLIER, CALLSTACK_MULTIPLIER * sizeof( asDWORD ) );
  407. base_n++;
  408. used_int_regs++;
  409. }
  410. }
  411. base_n = 0;
  412. for ( n = 0; ( n < X64_CALLSTACK_SIZE ) && ( used_sse_regs < MAX_CALL_SSE_REGISTERS ); n++ ) {
  413. if ( argsType[n] == x64FLOATARG || argsType[n] == x64DOUBLEARG ) {
  414. idx = MAX_CALL_INT_REGISTERS + base_n;
  415. argsSet[n] = 1;
  416. tempType[idx] = argsType[n];
  417. memcpy( tempBuff + idx * CALLSTACK_MULTIPLIER, paramBuffer + n * CALLSTACK_MULTIPLIER, CALLSTACK_MULTIPLIER * sizeof( asDWORD ) );
  418. base_n++;
  419. used_sse_regs++;
  420. }
  421. }
  422. base_n = 0;
  423. for ( n = X64_CALLSTACK_SIZE - 1; n >= 0; n-- ) {
  424. if ( argsType[n] != x64ENDARG && !argsSet[n] ) {
  425. idx = MAX_CALL_INT_REGISTERS + MAX_CALL_SSE_REGISTERS + base_n;
  426. argsSet[n] = 1;
  427. tempType[idx] = argsType[n];
  428. memcpy( tempBuff + idx * CALLSTACK_MULTIPLIER, paramBuffer + n * CALLSTACK_MULTIPLIER, CALLSTACK_MULTIPLIER * sizeof( asDWORD ) );
  429. base_n++;
  430. }
  431. }
  432. context->isCallingSystemFunction = true;
  433. retQW = X64_CallFunction( tempBuff, tempType, ( asDWORD * )func );
  434. ASM_GET_REG( "%rdx", retQW2 );
  435. context->isCallingSystemFunction = false;
  436. // If the return is a float value we need to get the value from the FP register
  437. if( sysFunc->hostReturnFloat )
  438. {
  439. if( sysFunc->hostReturnSize == 1 )
  440. *(asDWORD*)&retQW = GetReturnedFloat();
  441. else
  442. retQW = GetReturnedDouble();
  443. }
  444. return retQW;
  445. }
  446. END_AS_NAMESPACE
  447. #endif // AS_X64_GCC
  448. #endif // AS_MAX_PORTABILITY