narminl.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates ARM inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit narminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tarminlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_fma : tnode; override;
  28. { atn,sin,cos,lgn isn't supported by the linux fpe
  29. function first_arctan_real: tnode; override;
  30. function first_ln_real: tnode; override;
  31. function first_cos_real: tnode; override;
  32. function first_sin_real: tnode; override;
  33. }
  34. procedure second_abs_real; override;
  35. procedure second_sqr_real; override;
  36. procedure second_sqrt_real; override;
  37. { atn,sin,cos,lgn isn't supported by the linux fpe
  38. procedure second_arctan_real; override;
  39. procedure second_ln_real; override;
  40. procedure second_cos_real; override;
  41. procedure second_sin_real; override;
  42. }
  43. procedure second_prefetch; override;
  44. procedure second_abs_long; override;
  45. procedure second_fma; override;
  46. private
  47. procedure load_fpu_location(out singleprec: boolean);
  48. end;
  49. implementation
  50. uses
  51. globtype,verbose,globals,
  52. cpuinfo, defutil,symdef,aasmdata,aasmcpu,
  53. cgbase,cgutils,pass_1,pass_2,
  54. cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
  55. nutils,ncal;
  56. {*****************************************************************************
  57. tarminlinenode
  58. *****************************************************************************}
  59. procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
  60. begin
  61. secondpass(left);
  62. case current_settings.fputype of
  63. fpu_fpa,
  64. fpu_fpa10,
  65. fpu_fpa11:
  66. begin
  67. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  68. location_copy(location,left.location);
  69. if left.location.loc=LOC_CFPUREGISTER then
  70. begin
  71. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  72. location.loc := LOC_FPUREGISTER;
  73. end;
  74. end;
  75. fpu_vfp_first..fpu_vfp_last:
  76. begin
  77. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  78. location_copy(location,left.location);
  79. if left.location.loc=LOC_CMMREGISTER then
  80. begin
  81. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  82. location.loc := LOC_MMREGISTER;
  83. end;
  84. end;
  85. fpu_soft:
  86. begin
  87. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  88. location_copy(location,left.location);
  89. end
  90. else
  91. internalerror(2009111801);
  92. end;
  93. singleprec:=tfloatdef(left.resultdef).floattype=s32real;
  94. end;
  95. function tarminlinenode.first_abs_real : tnode;
  96. begin
  97. if (cs_fp_emulation in current_settings.moduleswitches) then
  98. begin
  99. firstpass(left);
  100. expectloc:=LOC_REGISTER;
  101. first_abs_real:=nil;
  102. end
  103. else
  104. begin
  105. case current_settings.fputype of
  106. fpu_fpa,
  107. fpu_fpa10,
  108. fpu_fpa11:
  109. expectloc:=LOC_FPUREGISTER;
  110. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  111. expectloc:=LOC_MMREGISTER
  112. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  113. begin
  114. if tfloatdef(left.resultdef).floattype=s32real then
  115. expectloc:=LOC_MMREGISTER
  116. else
  117. exit(inherited first_abs_real);
  118. end
  119. else
  120. internalerror(2009112401);
  121. end;
  122. first_abs_real:=nil;
  123. end;
  124. end;
  125. function tarminlinenode.first_sqr_real : tnode;
  126. begin
  127. if (cs_fp_emulation in current_settings.moduleswitches) then
  128. result:=inherited first_sqr_real
  129. else
  130. begin
  131. case current_settings.fputype of
  132. fpu_fpa,
  133. fpu_fpa10,
  134. fpu_fpa11:
  135. expectloc:=LOC_FPUREGISTER;
  136. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  137. expectloc:=LOC_MMREGISTER
  138. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  139. begin
  140. if tfloatdef(left.resultdef).floattype=s32real then
  141. expectloc:=LOC_MMREGISTER
  142. else
  143. exit(inherited first_sqr_real);
  144. end
  145. else
  146. internalerror(2009112402);
  147. end;
  148. first_sqr_real:=nil;
  149. end;
  150. end;
  151. function tarminlinenode.first_sqrt_real : tnode;
  152. begin
  153. if cs_fp_emulation in current_settings.moduleswitches then
  154. result:=inherited first_sqrt_real
  155. else
  156. begin
  157. case current_settings.fputype of
  158. fpu_fpa,
  159. fpu_fpa10,
  160. fpu_fpa11:
  161. expectloc:=LOC_FPUREGISTER;
  162. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  163. expectloc:=LOC_MMREGISTER
  164. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  165. begin
  166. if tfloatdef(left.resultdef).floattype=s32real then
  167. expectloc:=LOC_MMREGISTER
  168. else
  169. exit(inherited first_sqrt_real);
  170. end
  171. else
  172. internalerror(2009112403);
  173. end;
  174. first_sqrt_real := nil;
  175. end;
  176. end;
  177. function tarminlinenode.first_fma : tnode;
  178. begin
  179. if (true) and
  180. ((is_double(resultdef)) or (is_single(resultdef))) then
  181. begin
  182. expectloc:=LOC_MMREGISTER;
  183. Result:=nil;
  184. end
  185. else
  186. Result:=inherited first_fma;
  187. end;
  188. { atn,sin,cos,lgn isn't supported by the linux fpe
  189. function tarminlinenode.first_arctan_real: tnode;
  190. begin
  191. expectloc:=LOC_FPUREGISTER;
  192. result:=nil;
  193. end;
  194. function tarminlinenode.first_ln_real: tnode;
  195. begin
  196. expectloc:=LOC_FPUREGISTER;
  197. result:=nil;
  198. end;
  199. function tarminlinenode.first_cos_real: tnode;
  200. begin
  201. expectloc:=LOC_FPUREGISTER;
  202. result:=nil;
  203. end;
  204. function tarminlinenode.first_sin_real: tnode;
  205. begin
  206. expectloc:=LOC_FPUREGISTER;
  207. result:=nil;
  208. end;
  209. }
  210. procedure tarminlinenode.second_abs_real;
  211. var
  212. singleprec: boolean;
  213. pf: TOpPostfix;
  214. begin
  215. load_fpu_location(singleprec);
  216. case current_settings.fputype of
  217. fpu_fpa,
  218. fpu_fpa10,
  219. fpu_fpa11:
  220. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  221. fpu_soft:
  222. begin
  223. if singleprec then
  224. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
  225. else
  226. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
  227. end
  228. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  229. begin
  230. if singleprec then
  231. pf:=PF_F32
  232. else
  233. pf:=PF_F64;
  234. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
  235. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  236. end
  237. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  238. begin
  239. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
  240. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  241. end
  242. else
  243. internalerror(2009111402);
  244. end;
  245. end;
  246. procedure tarminlinenode.second_sqr_real;
  247. var
  248. singleprec: boolean;
  249. pf: TOpPostfix;
  250. begin
  251. load_fpu_location(singleprec);
  252. case current_settings.fputype of
  253. fpu_fpa,
  254. fpu_fpa10,
  255. fpu_fpa11:
  256. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
  257. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  258. begin
  259. if singleprec then
  260. pf:=PF_F32
  261. else
  262. pf:=PF_F64;
  263. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
  264. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  265. end
  266. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  267. begin
  268. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
  269. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  270. end
  271. else
  272. internalerror(2009111403);
  273. end;
  274. end;
  275. procedure tarminlinenode.second_sqrt_real;
  276. var
  277. singleprec: boolean;
  278. pf: TOpPostfix;
  279. begin
  280. load_fpu_location(singleprec);
  281. case current_settings.fputype of
  282. fpu_fpa,
  283. fpu_fpa10,
  284. fpu_fpa11:
  285. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
  286. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  287. begin
  288. if singleprec then
  289. pf:=PF_F32
  290. else
  291. pf:=PF_F64;
  292. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
  293. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  294. end
  295. else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
  296. begin
  297. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
  298. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  299. end
  300. else
  301. internalerror(2009111402);
  302. end;
  303. end;
  304. { atn, sin, cos, lgn isn't supported by the linux fpe
  305. procedure tarminlinenode.second_arctan_real;
  306. begin
  307. load_fpu_location;
  308. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  309. end;
  310. procedure tarminlinenode.second_ln_real;
  311. begin
  312. load_fpu_location;
  313. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  314. end;
  315. procedure tarminlinenode.second_cos_real;
  316. begin
  317. load_fpu_location;
  318. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  319. end;
  320. procedure tarminlinenode.second_sin_real;
  321. begin
  322. load_fpu_location;
  323. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  324. end;
  325. }
  326. procedure tarminlinenode.second_prefetch;
  327. var
  328. ref : treference;
  329. r : tregister;
  330. checkpointer_used : boolean;
  331. begin
  332. if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
  333. begin
  334. { do not call Checkpointer for left node }
  335. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  336. if checkpointer_used then
  337. node_change_local_switch(left,cs_checkpointer,false);
  338. secondpass(left);
  339. if checkpointer_used then
  340. node_change_local_switch(left,cs_checkpointer,false);
  341. case left.location.loc of
  342. LOC_CREFERENCE,
  343. LOC_REFERENCE:
  344. begin
  345. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  346. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  347. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  348. { since the address might be nil we can't use ldr for older cpus }
  349. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
  350. end;
  351. else
  352. { nothing to prefetch };
  353. end;
  354. end;
  355. end;
  356. procedure tarminlinenode.second_abs_long;
  357. var
  358. opsize : tcgsize;
  359. begin
  360. if GenerateThumbCode then
  361. begin
  362. inherited second_abs_long;
  363. exit;
  364. end;
  365. secondpass(left);
  366. opsize:=def_cgsize(left.resultdef);
  367. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  368. location:=left.location;
  369. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  370. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  371. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
  372. if GenerateThumb2Code then
  373. current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));
  374. current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
  375. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  376. end;
  377. procedure tarminlinenode.second_fma;
  378. const
  379. op : array[false..true,false..true] of TAsmOp =
  380. { positive product }
  381. (
  382. { positive third operand }
  383. (A_VFMA,
  384. { negative third operand }
  385. A_VFNMS),
  386. { negative product }
  387. { positive third operand }
  388. (A_VFMS,
  389. A_VFNMA)
  390. );
  391. var
  392. paraarray : array[1..3] of tnode;
  393. i : integer;
  394. negop3,
  395. negproduct : boolean;
  396. oppostfix : TOpPostfix;
  397. begin
  398. if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
  399. begin
  400. negop3:=false;
  401. negproduct:=false;
  402. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  403. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  404. paraarray[3]:=tcallparanode(parameters).paravalue;
  405. { check if a neg. node can be removed
  406. this is possible because changing the sign of
  407. a floating point number does not affect its absolute
  408. value in any way
  409. }
  410. if paraarray[1].nodetype=unaryminusn then
  411. begin
  412. paraarray[1]:=tunarynode(paraarray[1]).left;
  413. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  414. only no code is generated for it }
  415. negproduct:=not(negproduct);
  416. end;
  417. if paraarray[2].nodetype=unaryminusn then
  418. begin
  419. paraarray[2]:=tunarynode(paraarray[2]).left;
  420. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  421. only no code is generated for it }
  422. negproduct:=not(negproduct);
  423. end;
  424. if paraarray[3].nodetype=unaryminusn then
  425. begin
  426. paraarray[3]:=tunarynode(paraarray[3]).left;
  427. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  428. only no code is generated for it }
  429. negop3:=true;
  430. end;
  431. for i:=1 to 3 do
  432. secondpass(paraarray[i]);
  433. { no memory operand is allowed }
  434. for i:=1 to 3 do
  435. begin
  436. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  437. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  438. end;
  439. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  440. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  441. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  442. paraarray[3].location.register,location.register,mms_movescalar);
  443. if is_double(resultdef) then
  444. oppostfix:=PF_F64
  445. else
  446. oppostfix:=PF_F32;
  447. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
  448. location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
  449. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  450. end
  451. else
  452. internalerror(2014032301);
  453. end;
  454. begin
  455. cinlinenode:=tarminlinenode;
  456. end.