narminl.pas 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates ARM inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit narminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tarminlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_fma : tnode; override;
  28. { atn,sin,cos,lgn isn't supported by the linux fpe
  29. function first_arctan_real: tnode; override;
  30. function first_ln_real: tnode; override;
  31. function first_cos_real: tnode; override;
  32. function first_sin_real: tnode; override;
  33. }
  34. procedure second_abs_real; override;
  35. procedure second_sqr_real; override;
  36. procedure second_sqrt_real; override;
  37. { atn,sin,cos,lgn isn't supported by the linux fpe
  38. procedure second_arctan_real; override;
  39. procedure second_ln_real; override;
  40. procedure second_cos_real; override;
  41. procedure second_sin_real; override;
  42. }
  43. procedure second_prefetch; override;
  44. procedure second_abs_long; override;
  45. procedure second_fma; override;
  46. private
  47. procedure load_fpu_location(out singleprec: boolean);
  48. end;
  49. implementation
  50. uses
  51. globtype,verbose,globals,
  52. procinfo,
  53. cpuinfo, defutil,symdef,aasmdata,aasmcpu,
  54. cgbase,cgutils,pass_1,pass_2,
  55. cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
  56. nutils,ncal;
  57. {*****************************************************************************
  58. tarminlinenode
  59. *****************************************************************************}
  60. procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
  61. begin
  62. secondpass(left);
  63. case current_settings.fputype of
  64. fpu_fpa,
  65. fpu_fpa10,
  66. fpu_fpa11:
  67. begin
  68. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  69. location_copy(location,left.location);
  70. if left.location.loc=LOC_CFPUREGISTER then
  71. begin
  72. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  73. location.loc := LOC_FPUREGISTER;
  74. end;
  75. end;
  76. fpu_soft:
  77. begin
  78. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  79. location_copy(location,left.location);
  80. end
  81. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  82. begin
  83. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  84. location_copy(location,left.location);
  85. if left.location.loc=LOC_CMMREGISTER then
  86. begin
  87. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  88. location.loc := LOC_MMREGISTER;
  89. end;
  90. end
  91. else
  92. internalerror(2009111801);
  93. end;
  94. singleprec:=tfloatdef(left.resultdef).floattype=s32real;
  95. end;
  96. function tarminlinenode.first_abs_real : tnode;
  97. begin
  98. if (cs_fp_emulation in current_settings.moduleswitches) then
  99. begin
  100. firstpass(left);
  101. expectloc:=LOC_REGISTER;
  102. first_abs_real:=nil;
  103. end
  104. else
  105. begin
  106. case current_settings.fputype of
  107. fpu_fpa,
  108. fpu_fpa10,
  109. fpu_fpa11:
  110. expectloc:=LOC_FPUREGISTER;
  111. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  112. expectloc:=LOC_MMREGISTER
  113. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  114. begin
  115. if tfloatdef(left.resultdef).floattype=s32real then
  116. expectloc:=LOC_MMREGISTER
  117. else
  118. exit(inherited first_abs_real);
  119. end
  120. else
  121. internalerror(2009112401);
  122. end;
  123. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  124. needs_check_for_fpu_exceptions then
  125. Include(current_procinfo.flags,pi_do_call);
  126. first_abs_real:=nil;
  127. end;
  128. end;
  129. function tarminlinenode.first_sqr_real : tnode;
  130. begin
  131. if (cs_fp_emulation in current_settings.moduleswitches) then
  132. result:=inherited first_sqr_real
  133. else
  134. begin
  135. case current_settings.fputype of
  136. fpu_fpa,
  137. fpu_fpa10,
  138. fpu_fpa11:
  139. expectloc:=LOC_FPUREGISTER;
  140. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  141. expectloc:=LOC_MMREGISTER
  142. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  143. begin
  144. if tfloatdef(left.resultdef).floattype=s32real then
  145. expectloc:=LOC_MMREGISTER
  146. else
  147. exit(inherited first_sqr_real);
  148. end
  149. else
  150. internalerror(2009112402);
  151. end;
  152. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  153. needs_check_for_fpu_exceptions then
  154. Include(current_procinfo.flags,pi_do_call);
  155. first_sqr_real:=nil;
  156. end;
  157. end;
  158. function tarminlinenode.first_sqrt_real : tnode;
  159. begin
  160. if cs_fp_emulation in current_settings.moduleswitches then
  161. result:=inherited first_sqrt_real
  162. else
  163. begin
  164. case current_settings.fputype of
  165. fpu_fpa,
  166. fpu_fpa10,
  167. fpu_fpa11:
  168. expectloc:=LOC_FPUREGISTER;
  169. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  170. expectloc:=LOC_MMREGISTER
  171. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  172. begin
  173. if tfloatdef(left.resultdef).floattype=s32real then
  174. expectloc:=LOC_MMREGISTER
  175. else
  176. exit(inherited first_sqrt_real);
  177. end
  178. else
  179. internalerror(2009112403);
  180. end;
  181. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  182. needs_check_for_fpu_exceptions then
  183. Include(current_procinfo.flags,pi_do_call);
  184. first_sqrt_real := nil;
  185. end;
  186. end;
  187. function tarminlinenode.first_fma : tnode;
  188. begin
  189. if ((is_double(resultdef)) or (is_single(resultdef))) then
  190. begin
  191. expectloc:=LOC_MMREGISTER;
  192. Result:=nil;
  193. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  194. needs_check_for_fpu_exceptions then
  195. Include(current_procinfo.flags,pi_do_call);
  196. end
  197. else
  198. Result:=inherited first_fma;
  199. end;
  200. { atn,sin,cos,lgn isn't supported by the linux fpe
  201. function tarminlinenode.first_arctan_real: tnode;
  202. begin
  203. expectloc:=LOC_FPUREGISTER;
  204. result:=nil;
  205. end;
  206. function tarminlinenode.first_ln_real: tnode;
  207. begin
  208. expectloc:=LOC_FPUREGISTER;
  209. result:=nil;
  210. end;
  211. function tarminlinenode.first_cos_real: tnode;
  212. begin
  213. expectloc:=LOC_FPUREGISTER;
  214. result:=nil;
  215. end;
  216. function tarminlinenode.first_sin_real: tnode;
  217. begin
  218. expectloc:=LOC_FPUREGISTER;
  219. result:=nil;
  220. end;
  221. }
  222. procedure tarminlinenode.second_abs_real;
  223. var
  224. singleprec: boolean;
  225. pf: TOpPostfix;
  226. begin
  227. load_fpu_location(singleprec);
  228. case current_settings.fputype of
  229. fpu_fpa,
  230. fpu_fpa10,
  231. fpu_fpa11:
  232. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  233. fpu_soft:
  234. begin
  235. if singleprec then
  236. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
  237. else
  238. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
  239. end
  240. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  241. begin
  242. if singleprec then
  243. pf:=PF_F32
  244. else
  245. pf:=PF_F64;
  246. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
  247. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  248. end
  249. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  250. begin
  251. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
  252. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  253. end
  254. else
  255. internalerror(2009111402);
  256. end;
  257. end;
  258. procedure tarminlinenode.second_sqr_real;
  259. var
  260. singleprec: boolean;
  261. pf: TOpPostfix;
  262. begin
  263. load_fpu_location(singleprec);
  264. case current_settings.fputype of
  265. fpu_fpa,
  266. fpu_fpa10,
  267. fpu_fpa11:
  268. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
  269. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  270. begin
  271. if singleprec then
  272. pf:=PF_F32
  273. else
  274. pf:=PF_F64;
  275. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
  276. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  277. end
  278. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  279. begin
  280. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
  281. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  282. end
  283. else
  284. internalerror(2009111403);
  285. end;
  286. end;
  287. procedure tarminlinenode.second_sqrt_real;
  288. var
  289. singleprec: boolean;
  290. pf: TOpPostfix;
  291. begin
  292. load_fpu_location(singleprec);
  293. case current_settings.fputype of
  294. fpu_fpa,
  295. fpu_fpa10,
  296. fpu_fpa11:
  297. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
  298. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  299. begin
  300. if singleprec then
  301. pf:=PF_F32
  302. else
  303. pf:=PF_F64;
  304. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
  305. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  306. end
  307. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  308. begin
  309. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
  310. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  311. end
  312. else
  313. internalerror(2009111405);
  314. end;
  315. end;
  316. { atn, sin, cos, lgn isn't supported by the linux fpe
  317. procedure tarminlinenode.second_arctan_real;
  318. begin
  319. load_fpu_location;
  320. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  321. end;
  322. procedure tarminlinenode.second_ln_real;
  323. begin
  324. load_fpu_location;
  325. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  326. end;
  327. procedure tarminlinenode.second_cos_real;
  328. begin
  329. load_fpu_location;
  330. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  331. end;
  332. procedure tarminlinenode.second_sin_real;
  333. begin
  334. load_fpu_location;
  335. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  336. end;
  337. }
  338. procedure tarminlinenode.second_prefetch;
  339. var
  340. ref : treference;
  341. r : tregister;
  342. checkpointer_used : boolean;
  343. begin
  344. if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
  345. begin
  346. { do not call Checkpointer for left node }
  347. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  348. if checkpointer_used then
  349. node_change_local_switch(left,cs_checkpointer,false);
  350. secondpass(left);
  351. if checkpointer_used then
  352. node_change_local_switch(left,cs_checkpointer,false);
  353. case left.location.loc of
  354. LOC_CREFERENCE,
  355. LOC_REFERENCE:
  356. begin
  357. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  358. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  359. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  360. { since the address might be nil we can't use ldr for older cpus }
  361. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
  362. end;
  363. else
  364. { nothing to prefetch };
  365. end;
  366. end;
  367. end;
  368. procedure tarminlinenode.second_abs_long;
  369. var
  370. opsize : tcgsize;
  371. begin
  372. if GenerateThumbCode then
  373. begin
  374. inherited second_abs_long;
  375. exit;
  376. end;
  377. secondpass(left);
  378. opsize:=def_cgsize(left.resultdef);
  379. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  380. location:=left.location;
  381. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  382. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  383. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
  384. if GenerateThumb2Code then
  385. current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));
  386. current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
  387. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  388. end;
  389. procedure tarminlinenode.second_fma;
  390. const
  391. op : array[false..true,false..true] of TAsmOp =
  392. { positive product }
  393. (
  394. { positive third operand }
  395. (A_VFMA,
  396. { negative third operand }
  397. A_VFNMS),
  398. { negative product }
  399. { positive third operand }
  400. (A_VFMS,
  401. A_VFNMA)
  402. );
  403. var
  404. paraarray : array[1..3] of tnode;
  405. i : integer;
  406. negop3,
  407. negproduct : boolean;
  408. oppostfix : TOpPostfix;
  409. begin
  410. if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
  411. begin
  412. negop3:=false;
  413. negproduct:=false;
  414. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  415. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  416. paraarray[3]:=tcallparanode(parameters).paravalue;
  417. { check if a neg. node can be removed
  418. this is possible because changing the sign of
  419. a floating point number does not affect its absolute
  420. value in any way
  421. }
  422. if paraarray[1].nodetype=unaryminusn then
  423. begin
  424. paraarray[1]:=tunarynode(paraarray[1]).left;
  425. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  426. only no code is generated for it }
  427. negproduct:=not(negproduct);
  428. end;
  429. if paraarray[2].nodetype=unaryminusn then
  430. begin
  431. paraarray[2]:=tunarynode(paraarray[2]).left;
  432. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  433. only no code is generated for it }
  434. negproduct:=not(negproduct);
  435. end;
  436. if paraarray[3].nodetype=unaryminusn then
  437. begin
  438. paraarray[3]:=tunarynode(paraarray[3]).left;
  439. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  440. only no code is generated for it }
  441. negop3:=true;
  442. end;
  443. for i:=1 to 3 do
  444. secondpass(paraarray[i]);
  445. { no memory operand is allowed }
  446. for i:=1 to 3 do
  447. begin
  448. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  449. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  450. end;
  451. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  452. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  453. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  454. paraarray[3].location.register,location.register,mms_movescalar);
  455. if is_double(resultdef) then
  456. oppostfix:=PF_F64
  457. else
  458. oppostfix:=PF_F32;
  459. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
  460. location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
  461. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  462. end
  463. else
  464. internalerror(2014032301);
  465. end;
  466. begin
  467. cinlinenode:=tarminlinenode;
  468. end.