ncpuinl.pas 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates AAarch64 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuinl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. taarch64inlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_round_real: tnode; override;
  28. function first_trunc_real: tnode; override;
  29. function first_int_real: tnode; override;
  30. function first_frac_real: tnode; override;
  31. function first_fma : tnode; override;
  32. function first_minmax : tnode; override;
  33. procedure second_abs_real; override;
  34. procedure second_sqr_real; override;
  35. procedure second_sqrt_real; override;
  36. procedure second_abs_long; override;
  37. procedure second_round_real; override;
  38. procedure second_trunc_real; override;
  39. procedure second_int_real; override;
  40. procedure second_frac_real; override;
  41. procedure second_get_frame; override;
  42. procedure second_fma; override;
  43. procedure second_prefetch; override;
  44. procedure second_minmax; override;
  45. private
  46. procedure load_fpu_location;
  47. end;
  48. implementation
  49. uses
  50. globtype,verbose,globals,
  51. compinnr,
  52. cpuinfo, defutil,symdef,aasmbase,aasmdata,aasmcpu,
  53. cgbase,cgutils,pass_1,pass_2,
  54. procinfo,
  55. ncal,nutils,
  56. cpubase,ncgutil,cgobj,cgcpu,hlcgobj;
  57. {*****************************************************************************
  58. taarch64inlinenode
  59. *****************************************************************************}
  60. procedure taarch64inlinenode.load_fpu_location;
  61. begin
  62. secondpass(left);
  63. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  64. location_copy(location,left.location);
  65. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  66. location.loc:=LOC_MMREGISTER;
  67. end;
  68. function taarch64inlinenode.first_abs_real : tnode;
  69. begin
  70. expectloc:=LOC_MMREGISTER;
  71. result:=nil;
  72. if needs_check_for_fpu_exceptions then
  73. Include(current_procinfo.flags,pi_do_call);
  74. end;
  75. function taarch64inlinenode.first_sqr_real : tnode;
  76. begin
  77. expectloc:=LOC_MMREGISTER;
  78. result:=nil;
  79. if needs_check_for_fpu_exceptions then
  80. Include(current_procinfo.flags,pi_do_call);
  81. end;
  82. function taarch64inlinenode.first_sqrt_real : tnode;
  83. begin
  84. expectloc:=LOC_MMREGISTER;
  85. result:=nil;
  86. if needs_check_for_fpu_exceptions then
  87. Include(current_procinfo.flags,pi_do_call);
  88. end;
  89. function taarch64inlinenode.first_round_real: tnode;
  90. begin
  91. expectloc:=LOC_MMREGISTER;
  92. result:=nil;
  93. if needs_check_for_fpu_exceptions then
  94. Include(current_procinfo.flags,pi_do_call);
  95. end;
  96. function taarch64inlinenode.first_trunc_real: tnode;
  97. begin
  98. expectloc:=LOC_MMREGISTER;
  99. result:=nil;
  100. if needs_check_for_fpu_exceptions then
  101. Include(current_procinfo.flags,pi_do_call);
  102. end;
  103. function taarch64inlinenode.first_int_real : tnode;
  104. begin
  105. expectloc:=LOC_MMREGISTER;
  106. result:=nil;
  107. if needs_check_for_fpu_exceptions then
  108. Include(current_procinfo.flags,pi_do_call);
  109. end;
  110. function taarch64inlinenode.first_frac_real : tnode;
  111. begin
  112. expectloc:=LOC_MMREGISTER;
  113. result:=nil;
  114. if needs_check_for_fpu_exceptions then
  115. Include(current_procinfo.flags,pi_do_call);
  116. end;
  117. function taarch64inlinenode.first_fma : tnode;
  118. begin
  119. if ((is_double(resultdef)) or (is_single(resultdef))) then
  120. begin
  121. expectloc:=LOC_MMREGISTER;
  122. Result:=nil;
  123. end
  124. else
  125. Result:=inherited first_fma;
  126. end;
  127. procedure taarch64inlinenode.second_abs_real;
  128. begin
  129. load_fpu_location;
  130. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
  131. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  132. end;
  133. procedure taarch64inlinenode.second_sqr_real;
  134. begin
  135. load_fpu_location;
  136. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
  137. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  138. end;
  139. procedure taarch64inlinenode.second_sqrt_real;
  140. begin
  141. load_fpu_location;
  142. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
  143. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  144. end;
  145. procedure taarch64inlinenode.second_abs_long;
  146. var
  147. opsize : tcgsize;
  148. hl: TAsmLabel;
  149. begin
  150. secondpass(left);
  151. opsize:=def_cgsize(left.resultdef);
  152. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  153. location:=left.location;
  154. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  155. if cs_check_overflow in current_settings.localswitches then
  156. begin
  157. current_asmdata.getjumplabel(hl);
  158. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,resultdef,OC_NE,torddef(resultdef).low.svalue,left.location.register,hl);
  159. hlcg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  160. hlcg.g_call_system_proc(current_asmdata.CurrAsmList,'fpc_overflow',[],nil).resetiftemp;
  161. hlcg.a_label(current_asmdata.CurrAsmList,hl);
  162. end;
  163. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
  164. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
  165. end;
  166. procedure taarch64inlinenode.second_round_real;
  167. var
  168. hreg: tregister;
  169. begin
  170. secondpass(left);
  171. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  172. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  173. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  174. hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  175. { round as floating point using current rounding mode }
  176. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
  177. { convert to signed integer rounding towards zero (there's no "round to
  178. integer using current rounding mode") }
  179. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
  180. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  181. end;
  182. procedure taarch64inlinenode.second_trunc_real;
  183. begin
  184. secondpass(left);
  185. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  186. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  187. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  188. { convert to signed integer rounding towards zero }
  189. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
  190. end;
  191. procedure taarch64inlinenode.second_int_real;
  192. var
  193. hreg: tregister;
  194. begin
  195. secondpass(left);
  196. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  197. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  198. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  199. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  200. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  201. end;
  202. procedure taarch64inlinenode.second_frac_real;
  203. var
  204. hreg: tregister;
  205. begin
  206. secondpass(left);
  207. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  208. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  209. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  210. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  211. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FSUB,location.register,left.location.register,location.register));
  212. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  213. end;
  214. procedure taarch64inlinenode.second_get_frame;
  215. begin
  216. location_reset(location,LOC_CREGISTER,OS_ADDR);
  217. { this routine is used to get the frame pointer for backtracing
  218. purposes. current_procinfo.framepointer is set to SP because that one
  219. is used to access temps. On most platforms these two frame pointers
  220. are the same, but not on AArch64. }
  221. location.register:=NR_FRAME_POINTER_REG;
  222. end;
  223. procedure taarch64inlinenode.second_fma;
  224. const
  225. op : array[false..true,false..true] of TAsmOp =
  226. { positive product }
  227. (
  228. { positive third operand }
  229. (A_FMADD,
  230. { negative third operand }
  231. A_FNMSUB),
  232. { negative product }
  233. { positive third operand }
  234. (A_FMSUB,
  235. A_FNMADD)
  236. );
  237. var
  238. paraarray : array[1..3] of tnode;
  239. i : integer;
  240. negop3,
  241. negproduct : boolean;
  242. begin
  243. negop3:=false;
  244. negproduct:=false;
  245. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  246. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  247. paraarray[3]:=tcallparanode(parameters).paravalue;
  248. { check if a neg. node can be removed
  249. this is possible because changing the sign of
  250. a floating point number does not affect its absolute
  251. value in any way
  252. }
  253. if paraarray[1].nodetype=unaryminusn then
  254. begin
  255. paraarray[1]:=tunarynode(paraarray[1]).left;
  256. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  257. only no code is generated for it }
  258. negproduct:=not(negproduct);
  259. end;
  260. if paraarray[2].nodetype=unaryminusn then
  261. begin
  262. paraarray[2]:=tunarynode(paraarray[2]).left;
  263. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  264. only no code is generated for it }
  265. negproduct:=not(negproduct);
  266. end;
  267. if paraarray[3].nodetype=unaryminusn then
  268. begin
  269. paraarray[3]:=tunarynode(paraarray[3]).left;
  270. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  271. only no code is generated for it }
  272. negop3:=true;
  273. end;
  274. for i:=1 to 3 do
  275. secondpass(paraarray[i]);
  276. { no memory operand is allowed }
  277. for i:=1 to 3 do
  278. begin
  279. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  280. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  281. end;
  282. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  283. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  284. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
  285. location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
  286. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  287. end;
  288. procedure taarch64inlinenode.second_prefetch;
  289. var
  290. ref : treference;
  291. r : tregister;
  292. checkpointer_used : boolean;
  293. begin
  294. { do not call Checkpointer for left node }
  295. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  296. if checkpointer_used then
  297. node_change_local_switch(left,cs_checkpointer,false);
  298. secondpass(left);
  299. if checkpointer_used then
  300. node_change_local_switch(left,cs_checkpointer,false);
  301. case left.location.loc of
  302. LOC_CREFERENCE,
  303. LOC_REFERENCE:
  304. begin
  305. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  306. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  307. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  308. current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_PRFM,0,ref));
  309. end;
  310. else
  311. { nothing to prefetch };
  312. end;
  313. end;
  314. function taarch64inlinenode.first_minmax : tnode;
  315. begin
  316. if is_single(resultdef) or is_double(resultdef) then
  317. begin
  318. expectloc:=LOC_MMREGISTER;
  319. Result:=nil;
  320. if needs_check_for_fpu_exceptions then
  321. Include(current_procinfo.flags,pi_do_call);
  322. end
  323. else if is_32bitint(resultdef) or is_64bitint(resultdef) then
  324. begin
  325. expectloc:=LOC_REGISTER;
  326. Result:=nil;
  327. end
  328. else
  329. Result:=inherited first_minmax;
  330. end;
  331. procedure taarch64inlinenode.second_minmax;
  332. var
  333. paraarray : array[1..2] of tnode;
  334. i: Integer;
  335. ai: taicpu;
  336. op: TAsmOp;
  337. cond: TAsmCond;
  338. begin
  339. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  340. paraarray[2]:=tcallparanode(parameters).paravalue;
  341. for i:=low(paraarray) to high(paraarray) do
  342. secondpass(paraarray[i]);
  343. if is_single(resultdef) or is_double(resultdef) then
  344. begin
  345. { no memory operand is allowed }
  346. for i:=low(paraarray) to high(paraarray) do
  347. begin
  348. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  349. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,
  350. paraarray[i].resultdef,true);
  351. end;
  352. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  353. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  354. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,
  355. paraarray[1].location.register,paraarray[2].location.register));
  356. case inlinenumber of
  357. in_min_single,
  358. in_min_double:
  359. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
  360. location.register,paraarray[1].location.register,paraarray[2].location.register,C_MI));
  361. in_max_single,
  362. in_max_double:
  363. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
  364. location.register,paraarray[1].location.register,paraarray[2].location.register,C_GT));
  365. else
  366. Internalerror(2021121802);
  367. end;
  368. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  369. end
  370. else if is_32bitint(resultdef) or is_64bitint(resultdef) then
  371. begin
  372. { no memory operand is allowed }
  373. for i:=low(paraarray) to high(paraarray) do
  374. begin
  375. if not(paraarray[i].location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  376. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[i].location,
  377. paraarray[i].resultdef,paraarray[i].resultdef,true);
  378. end;
  379. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  380. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  381. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,
  382. paraarray[1].location.register,paraarray[2].location.register));
  383. case inlinenumber of
  384. in_min_longint,
  385. in_min_int64:
  386. cond := C_LT;
  387. in_min_dword,
  388. in_min_qword:
  389. cond := C_LO;
  390. in_max_longint,
  391. in_max_int64:
  392. cond := C_GT;
  393. in_max_dword,
  394. in_max_qword:
  395. cond := C_HI;
  396. else
  397. Internalerror(2021121901);
  398. end;
  399. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,
  400. location.register,paraarray[1].location.register,paraarray[2].location.register,cond));
  401. end
  402. else
  403. internalerror(2021121801);
  404. end;
  405. begin
  406. cinlinenode:=taarch64inlinenode;
  407. end.