2
0

ncpuinl.pas 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates AAarch64 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuinl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. taarch64inlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_round_real: tnode; override;
  28. function first_trunc_real: tnode; override;
  29. function first_int_real: tnode; override;
  30. function first_frac_real: tnode; override;
  31. function first_fma : tnode; override;
  32. procedure second_abs_real; override;
  33. procedure second_sqr_real; override;
  34. procedure second_sqrt_real; override;
  35. procedure second_abs_long; override;
  36. procedure second_round_real; override;
  37. procedure second_trunc_real; override;
  38. procedure second_int_real; override;
  39. procedure second_frac_real; override;
  40. procedure second_get_frame; override;
  41. procedure second_fma; override;
  42. procedure second_prefetch; override;
  43. private
  44. procedure load_fpu_location;
  45. end;
  46. implementation
  47. uses
  48. globtype,verbose,globals,
  49. cpuinfo, defutil,symdef,aasmdata,aasmcpu,
  50. cgbase,cgutils,pass_1,pass_2,
  51. ncal,nutils,
  52. cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
  53. {*****************************************************************************
  54. taarch64inlinenode
  55. *****************************************************************************}
  56. procedure taarch64inlinenode.load_fpu_location;
  57. begin
  58. secondpass(left);
  59. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  60. location_copy(location,left.location);
  61. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  62. location.loc:=LOC_MMREGISTER;
  63. end;
  64. function taarch64inlinenode.first_abs_real : tnode;
  65. begin
  66. expectloc:=LOC_MMREGISTER;
  67. result:=nil;
  68. end;
  69. function taarch64inlinenode.first_sqr_real : tnode;
  70. begin
  71. expectloc:=LOC_MMREGISTER;
  72. result:=nil;
  73. end;
  74. function taarch64inlinenode.first_sqrt_real : tnode;
  75. begin
  76. expectloc:=LOC_MMREGISTER;
  77. result:=nil;
  78. end;
  79. function taarch64inlinenode.first_round_real: tnode;
  80. begin
  81. expectloc:=LOC_MMREGISTER;
  82. result:=nil;
  83. end;
  84. function taarch64inlinenode.first_trunc_real: tnode;
  85. begin
  86. expectloc:=LOC_MMREGISTER;
  87. result:=nil;
  88. end;
  89. function taarch64inlinenode.first_int_real : tnode;
  90. begin
  91. expectloc:=LOC_MMREGISTER;
  92. result:=nil;
  93. end;
  94. function taarch64inlinenode.first_frac_real : tnode;
  95. begin
  96. expectloc:=LOC_MMREGISTER;
  97. result:=nil;
  98. end;
  99. function taarch64inlinenode.first_fma : tnode;
  100. begin
  101. if ((is_double(resultdef)) or (is_single(resultdef))) then
  102. begin
  103. expectloc:=LOC_MMREGISTER;
  104. Result:=nil;
  105. end
  106. else
  107. Result:=inherited first_fma;
  108. end;
  109. procedure taarch64inlinenode.second_abs_real;
  110. begin
  111. load_fpu_location;
  112. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
  113. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  114. end;
  115. procedure taarch64inlinenode.second_sqr_real;
  116. begin
  117. load_fpu_location;
  118. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
  119. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  120. end;
  121. procedure taarch64inlinenode.second_sqrt_real;
  122. begin
  123. load_fpu_location;
  124. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
  125. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  126. end;
  127. procedure taarch64inlinenode.second_abs_long;
  128. var
  129. opsize : tcgsize;
  130. begin
  131. secondpass(left);
  132. opsize:=def_cgsize(left.resultdef);
  133. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  134. location:=left.location;
  135. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  136. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
  137. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
  138. end;
  139. procedure taarch64inlinenode.second_round_real;
  140. var
  141. hreg: tregister;
  142. begin
  143. secondpass(left);
  144. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  145. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  146. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  147. hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  148. { round as floating point using current rounding mode }
  149. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
  150. { convert to signed integer rounding towards zero (there's no "round to
  151. integer using current rounding mode") }
  152. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
  153. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  154. end;
  155. procedure taarch64inlinenode.second_trunc_real;
  156. begin
  157. secondpass(left);
  158. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  159. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  160. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  161. { convert to signed integer rounding towards zero }
  162. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
  163. end;
  164. procedure taarch64inlinenode.second_int_real;
  165. var
  166. hreg: tregister;
  167. begin
  168. secondpass(left);
  169. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  170. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  171. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  172. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  173. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  174. end;
  175. procedure taarch64inlinenode.second_frac_real;
  176. var
  177. hreg: tregister;
  178. begin
  179. secondpass(left);
  180. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  181. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  182. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  183. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  184. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FSUB,location.register,left.location.register,location.register));
  185. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  186. end;
  187. procedure taarch64inlinenode.second_get_frame;
  188. begin
  189. location_reset(location,LOC_CREGISTER,OS_ADDR);
  190. { this routine is used to get the frame pointer for backtracing
  191. purposes. current_procinfo.framepointer is set to SP because that one
  192. is used to access temps. On most platforms these two frame pointers
  193. are the same, but not on AArch64. }
  194. location.register:=NR_FRAME_POINTER_REG;
  195. end;
  196. procedure taarch64inlinenode.second_fma;
  197. const
  198. op : array[false..true,false..true] of TAsmOp =
  199. { positive product }
  200. (
  201. { positive third operand }
  202. (A_FMADD,
  203. { negative third operand }
  204. A_FNMSUB),
  205. { negative product }
  206. { positive third operand }
  207. (A_FMSUB,
  208. A_FNMADD)
  209. );
  210. var
  211. paraarray : array[1..3] of tnode;
  212. i : integer;
  213. negop3,
  214. negproduct : boolean;
  215. begin
  216. negop3:=false;
  217. negproduct:=false;
  218. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  219. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  220. paraarray[3]:=tcallparanode(parameters).paravalue;
  221. { check if a neg. node can be removed
  222. this is possible because changing the sign of
  223. a floating point number does not affect its absolute
  224. value in any way
  225. }
  226. if paraarray[1].nodetype=unaryminusn then
  227. begin
  228. paraarray[1]:=tunarynode(paraarray[1]).left;
  229. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  230. only no code is generated for it }
  231. negproduct:=not(negproduct);
  232. end;
  233. if paraarray[2].nodetype=unaryminusn then
  234. begin
  235. paraarray[2]:=tunarynode(paraarray[2]).left;
  236. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  237. only no code is generated for it }
  238. negproduct:=not(negproduct);
  239. end;
  240. if paraarray[3].nodetype=unaryminusn then
  241. begin
  242. paraarray[3]:=tunarynode(paraarray[3]).left;
  243. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  244. only no code is generated for it }
  245. negop3:=true;
  246. end;
  247. for i:=1 to 3 do
  248. secondpass(paraarray[i]);
  249. { no memory operand is allowed }
  250. for i:=1 to 3 do
  251. begin
  252. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  253. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  254. end;
  255. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  256. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  257. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
  258. location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
  259. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  260. end;
  261. procedure taarch64inlinenode.second_prefetch;
  262. var
  263. ref : treference;
  264. r : tregister;
  265. checkpointer_used : boolean;
  266. begin
  267. { do not call Checkpointer for left node }
  268. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  269. if checkpointer_used then
  270. node_change_local_switch(left,cs_checkpointer,false);
  271. secondpass(left);
  272. if checkpointer_used then
  273. node_change_local_switch(left,cs_checkpointer,false);
  274. case left.location.loc of
  275. LOC_CREFERENCE,
  276. LOC_REFERENCE:
  277. begin
  278. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  279. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  280. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  281. current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_PRFM,0,ref));
  282. end;
  283. else
  284. { nothing to prefetch };
  285. end;
  286. end;
  287. begin
  288. cinlinenode:=taarch64inlinenode;
  289. end.