ncpuinl.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates AAarch64 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuinl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. taarch64inlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_round_real: tnode; override;
  28. function first_trunc_real: tnode; override;
  29. function first_int_real: tnode; override;
  30. function first_frac_real: tnode; override;
  31. function first_fma : tnode; override;
  32. function first_minmax : tnode; override;
  33. procedure second_abs_real; override;
  34. procedure second_sqr_real; override;
  35. procedure second_sqrt_real; override;
  36. procedure second_abs_long; override;
  37. procedure second_round_real; override;
  38. procedure second_trunc_real; override;
  39. procedure second_int_real; override;
  40. procedure second_frac_real; override;
  41. procedure second_get_frame; override;
  42. procedure second_fma; override;
  43. procedure second_prefetch; override;
  44. procedure second_minmax; override;
  45. procedure pass_generate_code_cpu; override;
  46. function pass_typecheck_cpu: tnode; override;
  47. function first_cpu: tnode; override;
  48. private
  49. procedure load_fpu_location;
  50. end;
  51. implementation
  52. uses
  53. globtype,verbose,globals,
  54. compinnr,
  55. cpuinfo, defutil,symdef,aasmbase,aasmdata,aasmcpu,
  56. cgbase,cgutils,pass_1,pass_2,
  57. procinfo,
  58. ncal,nutils,
  59. cpubase,ncgutil,cgobj,cgcpu,hlcgobj;
  60. {*****************************************************************************
  61. taarch64inlinenode
  62. *****************************************************************************}
  63. function taarch64inlinenode.pass_typecheck_cpu: tnode;
  64. begin
  65. Result:=nil;
  66. case inlinenumber of
  67. in_a64_yield:
  68. resultdef:=voidtype;
  69. else
  70. result:=inherited;
  71. end;
  72. end;
  73. function taarch64inlinenode.first_cpu : tnode;
  74. begin
  75. Result:=nil;
  76. case inlinenumber of
  77. in_a64_yield:
  78. begin
  79. expectloc:=LOC_VOID;
  80. resultdef:=voidtype;
  81. end;
  82. else
  83. Result:=inherited first_cpu;
  84. end;
  85. end;
  86. procedure taarch64inlinenode.pass_generate_code_cpu;
  87. begin
  88. case inlinenumber of
  89. in_a64_yield:
  90. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_YIELD));
  91. else
  92. inherited pass_generate_code_cpu;
  93. end;
  94. end;
  95. procedure taarch64inlinenode.load_fpu_location;
  96. begin
  97. secondpass(left);
  98. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  99. location_copy(location,left.location);
  100. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  101. location.loc:=LOC_MMREGISTER;
  102. end;
  103. function taarch64inlinenode.first_abs_real : tnode;
  104. begin
  105. expectloc:=LOC_MMREGISTER;
  106. result:=nil;
  107. if needs_check_for_fpu_exceptions then
  108. Include(current_procinfo.flags,pi_do_call);
  109. end;
  110. function taarch64inlinenode.first_sqr_real : tnode;
  111. begin
  112. expectloc:=LOC_MMREGISTER;
  113. result:=nil;
  114. if needs_check_for_fpu_exceptions then
  115. Include(current_procinfo.flags,pi_do_call);
  116. end;
  117. function taarch64inlinenode.first_sqrt_real : tnode;
  118. begin
  119. expectloc:=LOC_MMREGISTER;
  120. result:=nil;
  121. if needs_check_for_fpu_exceptions then
  122. Include(current_procinfo.flags,pi_do_call);
  123. end;
  124. function taarch64inlinenode.first_round_real: tnode;
  125. begin
  126. expectloc:=LOC_MMREGISTER;
  127. result:=nil;
  128. if needs_check_for_fpu_exceptions then
  129. Include(current_procinfo.flags,pi_do_call);
  130. end;
  131. function taarch64inlinenode.first_trunc_real: tnode;
  132. begin
  133. expectloc:=LOC_MMREGISTER;
  134. result:=nil;
  135. if needs_check_for_fpu_exceptions then
  136. Include(current_procinfo.flags,pi_do_call);
  137. end;
  138. function taarch64inlinenode.first_int_real : tnode;
  139. begin
  140. expectloc:=LOC_MMREGISTER;
  141. result:=nil;
  142. if needs_check_for_fpu_exceptions then
  143. Include(current_procinfo.flags,pi_do_call);
  144. end;
  145. function taarch64inlinenode.first_frac_real : tnode;
  146. begin
  147. expectloc:=LOC_MMREGISTER;
  148. result:=nil;
  149. if needs_check_for_fpu_exceptions then
  150. Include(current_procinfo.flags,pi_do_call);
  151. end;
  152. function taarch64inlinenode.first_fma : tnode;
  153. begin
  154. if ((is_double(resultdef)) or (is_single(resultdef))) then
  155. begin
  156. expectloc:=LOC_MMREGISTER;
  157. Result:=nil;
  158. end
  159. else
  160. Result:=inherited first_fma;
  161. end;
  162. procedure taarch64inlinenode.second_abs_real;
  163. begin
  164. load_fpu_location;
  165. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
  166. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  167. end;
  168. procedure taarch64inlinenode.second_sqr_real;
  169. begin
  170. load_fpu_location;
  171. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
  172. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  173. end;
  174. procedure taarch64inlinenode.second_sqrt_real;
  175. begin
  176. load_fpu_location;
  177. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
  178. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  179. end;
  180. procedure taarch64inlinenode.second_abs_long;
  181. var
  182. opsize : tcgsize;
  183. hl: TAsmLabel;
  184. begin
  185. secondpass(left);
  186. opsize:=def_cgsize(left.resultdef);
  187. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  188. location:=left.location;
  189. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  190. if cs_check_overflow in current_settings.localswitches then
  191. begin
  192. current_asmdata.getjumplabel(hl);
  193. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,resultdef,OC_NE,torddef(resultdef).low.svalue,left.location.register,hl);
  194. hlcg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  195. hlcg.g_call_system_proc(current_asmdata.CurrAsmList,'fpc_overflow',[],nil).resetiftemp;
  196. hlcg.a_label(current_asmdata.CurrAsmList,hl);
  197. end;
  198. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
  199. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
  200. end;
  201. procedure taarch64inlinenode.second_round_real;
  202. var
  203. hreg: tregister;
  204. begin
  205. secondpass(left);
  206. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  207. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  208. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  209. hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  210. { round as floating point using current rounding mode }
  211. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
  212. { convert to signed integer rounding towards zero (there's no "round to
  213. integer using current rounding mode") }
  214. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
  215. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  216. end;
  217. procedure taarch64inlinenode.second_trunc_real;
  218. begin
  219. secondpass(left);
  220. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  221. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  222. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  223. { convert to signed integer rounding towards zero }
  224. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
  225. end;
  226. procedure taarch64inlinenode.second_int_real;
  227. var
  228. hreg: tregister;
  229. begin
  230. secondpass(left);
  231. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  232. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  233. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  234. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  235. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  236. end;
  237. procedure taarch64inlinenode.second_frac_real;
  238. var
  239. hreg: tregister;
  240. begin
  241. secondpass(left);
  242. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  243. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  244. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  245. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTZ,location.register,left.location.register));
  246. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FSUB,location.register,left.location.register,location.register));
  247. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  248. end;
  249. procedure taarch64inlinenode.second_get_frame;
  250. begin
  251. location_reset(location,LOC_CREGISTER,OS_ADDR);
  252. { this routine is used to get the frame pointer for backtracing
  253. purposes. current_procinfo.framepointer is set to SP because that one
  254. is used to access temps. On most platforms these two frame pointers
  255. are the same, but not on AArch64. }
  256. location.register:=NR_FRAME_POINTER_REG;
  257. end;
  258. procedure taarch64inlinenode.second_fma;
  259. const
  260. op : array[false..true,false..true] of TAsmOp =
  261. { positive product }
  262. (
  263. { positive third operand }
  264. (A_FMADD,
  265. { negative third operand }
  266. A_FNMSUB),
  267. { negative product }
  268. { positive third operand }
  269. (A_FMSUB,
  270. A_FNMADD)
  271. );
  272. var
  273. paraarray : array[1..3] of tnode;
  274. i : integer;
  275. negop3,
  276. negproduct : boolean;
  277. begin
  278. negop3:=false;
  279. negproduct:=false;
  280. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  281. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  282. paraarray[3]:=tcallparanode(parameters).paravalue;
  283. { check if a neg. node can be removed
  284. this is possible because changing the sign of
  285. a floating point number does not affect its absolute
  286. value in any way
  287. }
  288. if paraarray[1].nodetype=unaryminusn then
  289. begin
  290. paraarray[1]:=tunarynode(paraarray[1]).left;
  291. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  292. only no code is generated for it }
  293. negproduct:=not(negproduct);
  294. end;
  295. if paraarray[2].nodetype=unaryminusn then
  296. begin
  297. paraarray[2]:=tunarynode(paraarray[2]).left;
  298. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  299. only no code is generated for it }
  300. negproduct:=not(negproduct);
  301. end;
  302. if paraarray[3].nodetype=unaryminusn then
  303. begin
  304. paraarray[3]:=tunarynode(paraarray[3]).left;
  305. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  306. only no code is generated for it }
  307. negop3:=true;
  308. end;
  309. for i:=1 to 3 do
  310. secondpass(paraarray[i]);
  311. { no memory operand is allowed }
  312. for i:=1 to 3 do
  313. begin
  314. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  315. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  316. end;
  317. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  318. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  319. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(op[negproduct,negop3],
  320. location.register,paraarray[1].location.register,paraarray[2].location.register,paraarray[3].location.register));
  321. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  322. end;
  323. procedure taarch64inlinenode.second_prefetch;
  324. var
  325. ref : treference;
  326. r : tregister;
  327. checkpointer_used : boolean;
  328. begin
  329. { do not call Checkpointer for left node }
  330. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  331. if checkpointer_used then
  332. node_change_local_switch(left,cs_checkpointer,false);
  333. secondpass(left);
  334. if checkpointer_used then
  335. node_change_local_switch(left,cs_checkpointer,false);
  336. case left.location.loc of
  337. LOC_CREFERENCE,
  338. LOC_REFERENCE:
  339. begin
  340. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  341. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  342. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  343. current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_PRFM,0,ref));
  344. end;
  345. else
  346. { nothing to prefetch };
  347. end;
  348. end;
  349. function taarch64inlinenode.first_minmax : tnode;
  350. begin
  351. if is_single(resultdef) or is_double(resultdef) then
  352. begin
  353. expectloc:=LOC_MMREGISTER;
  354. Result:=nil;
  355. if needs_check_for_fpu_exceptions then
  356. Include(current_procinfo.flags,pi_do_call);
  357. end
  358. else if is_32bitint(resultdef) or is_64bitint(resultdef) then
  359. begin
  360. expectloc:=LOC_REGISTER;
  361. Result:=nil;
  362. end
  363. else
  364. Result:=inherited first_minmax;
  365. end;
  366. procedure taarch64inlinenode.second_minmax;
  367. var
  368. paraarray : array[1..2] of tnode;
  369. i: Integer;
  370. ai: taicpu;
  371. op: TAsmOp;
  372. cond: TAsmCond;
  373. begin
  374. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  375. paraarray[2]:=tcallparanode(parameters).paravalue;
  376. for i:=low(paraarray) to high(paraarray) do
  377. secondpass(paraarray[i]);
  378. if is_single(resultdef) or is_double(resultdef) then
  379. begin
  380. { no memory operand is allowed }
  381. for i:=low(paraarray) to high(paraarray) do
  382. begin
  383. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  384. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,
  385. paraarray[i].resultdef,true);
  386. end;
  387. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  388. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  389. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,
  390. paraarray[1].location.register,paraarray[2].location.register));
  391. case inlinenumber of
  392. in_min_single,
  393. in_min_double:
  394. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
  395. location.register,paraarray[1].location.register,paraarray[2].location.register,C_MI));
  396. in_max_single,
  397. in_max_double:
  398. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_FCSEL,
  399. location.register,paraarray[1].location.register,paraarray[2].location.register,C_GT));
  400. else
  401. Internalerror(2021121802);
  402. end;
  403. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  404. end
  405. else if is_32bitint(resultdef) or is_64bitint(resultdef) then
  406. begin
  407. { no memory operand is allowed }
  408. for i:=low(paraarray) to high(paraarray) do
  409. begin
  410. if not(paraarray[i].location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  411. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[i].location,
  412. paraarray[i].resultdef,paraarray[i].resultdef,true);
  413. end;
  414. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  415. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  416. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,
  417. paraarray[1].location.register,paraarray[2].location.register));
  418. case inlinenumber of
  419. in_min_longint,
  420. in_min_int64:
  421. cond := C_LT;
  422. in_min_dword,
  423. in_min_qword:
  424. cond := C_LO;
  425. in_max_longint,
  426. in_max_int64:
  427. cond := C_GT;
  428. in_max_dword,
  429. in_max_qword:
  430. cond := C_HI;
  431. else
  432. Internalerror(2021121901);
  433. end;
  434. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,
  435. location.register,paraarray[1].location.register,paraarray[2].location.register,cond));
  436. end
  437. else
  438. internalerror(2021121801);
  439. end;
  440. begin
  441. cinlinenode:=taarch64inlinenode;
  442. end.