narminl.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates ARM inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit narminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tarminlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_fma : tnode; override;
  28. { atn,sin,cos,lgn isn't supported by the linux fpe
  29. function first_arctan_real: tnode; override;
  30. function first_ln_real: tnode; override;
  31. function first_cos_real: tnode; override;
  32. function first_sin_real: tnode; override;
  33. }
  34. procedure second_abs_real; override;
  35. procedure second_sqr_real; override;
  36. procedure second_sqrt_real; override;
  37. { atn,sin,cos,lgn isn't supported by the linux fpe
  38. procedure second_arctan_real; override;
  39. procedure second_ln_real; override;
  40. procedure second_cos_real; override;
  41. procedure second_sin_real; override;
  42. }
  43. procedure second_prefetch; override;
  44. procedure second_abs_long; override;
  45. procedure second_fma; override;
  46. private
  47. procedure load_fpu_location(out singleprec: boolean);
  48. end;
  49. implementation
  50. uses
  51. globtype,verbose,globals,
  52. cpuinfo, defutil,symdef,aasmdata,aasmcpu,
  53. cgbase,cgutils,pass_1,pass_2,
  54. cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
  55. nutils,ncal;
  56. {*****************************************************************************
  57. tarminlinenode
  58. *****************************************************************************}
  59. procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
  60. begin
  61. secondpass(left);
  62. case current_settings.fputype of
  63. fpu_fpa,
  64. fpu_fpa10,
  65. fpu_fpa11:
  66. begin
  67. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  68. location_copy(location,left.location);
  69. if left.location.loc=LOC_CFPUREGISTER then
  70. begin
  71. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  72. location.loc := LOC_FPUREGISTER;
  73. end;
  74. end;
  75. fpu_vfpv2,
  76. fpu_vfpv3,
  77. fpu_vfpv4,
  78. fpu_vfpv3_d16,
  79. fpu_fpv4_s16:
  80. begin
  81. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  82. location_copy(location,left.location);
  83. if left.location.loc=LOC_CMMREGISTER then
  84. begin
  85. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  86. location.loc := LOC_MMREGISTER;
  87. end;
  88. end;
  89. fpu_soft:
  90. begin
  91. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  92. location_copy(location,left.location);
  93. end
  94. else
  95. internalerror(2009111801);
  96. end;
  97. singleprec:=tfloatdef(left.resultdef).floattype=s32real;
  98. end;
  99. function tarminlinenode.first_abs_real : tnode;
  100. begin
  101. if (cs_fp_emulation in current_settings.moduleswitches) then
  102. begin
  103. firstpass(left);
  104. expectloc:=LOC_REGISTER;
  105. first_abs_real:=nil;
  106. end
  107. else
  108. begin
  109. case current_settings.fputype of
  110. fpu_fpa,
  111. fpu_fpa10,
  112. fpu_fpa11:
  113. expectloc:=LOC_FPUREGISTER;
  114. fpu_vfpv2,
  115. fpu_vfpv3,
  116. fpu_vfpv4,
  117. fpu_vfpv3_d16:
  118. expectloc:=LOC_MMREGISTER;
  119. fpu_fpv4_s16:
  120. begin
  121. if tfloatdef(left.resultdef).floattype=s32real then
  122. expectloc:=LOC_MMREGISTER
  123. else
  124. exit(inherited first_abs_real);
  125. end;
  126. else
  127. internalerror(2009112401);
  128. end;
  129. first_abs_real:=nil;
  130. end;
  131. end;
  132. function tarminlinenode.first_sqr_real : tnode;
  133. begin
  134. if (cs_fp_emulation in current_settings.moduleswitches) then
  135. result:=inherited first_sqr_real
  136. else
  137. begin
  138. case current_settings.fputype of
  139. fpu_fpa,
  140. fpu_fpa10,
  141. fpu_fpa11:
  142. expectloc:=LOC_FPUREGISTER;
  143. fpu_vfpv2,
  144. fpu_vfpv3,
  145. fpu_vfpv4,
  146. fpu_vfpv3_d16:
  147. expectloc:=LOC_MMREGISTER;
  148. fpu_fpv4_s16:
  149. begin
  150. if tfloatdef(left.resultdef).floattype=s32real then
  151. expectloc:=LOC_MMREGISTER
  152. else
  153. exit(inherited first_sqr_real);
  154. end;
  155. else
  156. internalerror(2009112402);
  157. end;
  158. first_sqr_real:=nil;
  159. end;
  160. end;
  161. function tarminlinenode.first_sqrt_real : tnode;
  162. begin
  163. if cs_fp_emulation in current_settings.moduleswitches then
  164. result:=inherited first_sqrt_real
  165. else
  166. begin
  167. case current_settings.fputype of
  168. fpu_fpa,
  169. fpu_fpa10,
  170. fpu_fpa11:
  171. expectloc:=LOC_FPUREGISTER;
  172. fpu_vfpv2,
  173. fpu_vfpv3,
  174. fpu_vfpv4,
  175. fpu_vfpv3_d16:
  176. expectloc:=LOC_MMREGISTER;
  177. fpu_fpv4_s16:
  178. begin
  179. if tfloatdef(left.resultdef).floattype=s32real then
  180. expectloc:=LOC_MMREGISTER
  181. else
  182. exit(inherited first_sqrt_real);
  183. end;
  184. else
  185. internalerror(2009112403);
  186. end;
  187. first_sqrt_real := nil;
  188. end;
  189. end;
  190. function tarminlinenode.first_fma : tnode;
  191. begin
  192. if (true) and
  193. ((is_double(resultdef)) or (is_single(resultdef))) then
  194. begin
  195. expectloc:=LOC_MMREGISTER;
  196. Result:=nil;
  197. end
  198. else
  199. Result:=inherited first_fma;
  200. end;
  201. { atn,sin,cos,lgn isn't supported by the linux fpe
  202. function tarminlinenode.first_arctan_real: tnode;
  203. begin
  204. expectloc:=LOC_FPUREGISTER;
  205. result:=nil;
  206. end;
  207. function tarminlinenode.first_ln_real: tnode;
  208. begin
  209. expectloc:=LOC_FPUREGISTER;
  210. result:=nil;
  211. end;
  212. function tarminlinenode.first_cos_real: tnode;
  213. begin
  214. expectloc:=LOC_FPUREGISTER;
  215. result:=nil;
  216. end;
  217. function tarminlinenode.first_sin_real: tnode;
  218. begin
  219. expectloc:=LOC_FPUREGISTER;
  220. result:=nil;
  221. end;
  222. }
  223. procedure tarminlinenode.second_abs_real;
  224. var
  225. singleprec: boolean;
  226. pf: TOpPostfix;
  227. begin
  228. load_fpu_location(singleprec);
  229. case current_settings.fputype of
  230. fpu_fpa,
  231. fpu_fpa10,
  232. fpu_fpa11:
  233. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  234. fpu_vfpv2,
  235. fpu_vfpv3,
  236. fpu_vfpv4,
  237. fpu_vfpv3_d16:
  238. begin
  239. if singleprec then
  240. pf:=PF_F32
  241. else
  242. pf:=PF_F64;
  243. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
  244. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  245. end;
  246. fpu_fpv4_s16:
  247. begin
  248. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
  249. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  250. end;
  251. fpu_soft:
  252. begin
  253. if singleprec then
  254. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
  255. else
  256. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
  257. end
  258. else
  259. internalerror(2009111402);
  260. end;
  261. end;
  262. procedure tarminlinenode.second_sqr_real;
  263. var
  264. singleprec: boolean;
  265. pf: TOpPostfix;
  266. begin
  267. load_fpu_location(singleprec);
  268. case current_settings.fputype of
  269. fpu_fpa,
  270. fpu_fpa10,
  271. fpu_fpa11:
  272. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
  273. fpu_vfpv2,
  274. fpu_vfpv3,
  275. fpu_vfpv4,
  276. fpu_vfpv3_d16:
  277. begin
  278. if singleprec then
  279. pf:=PF_F32
  280. else
  281. pf:=PF_F64;
  282. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
  283. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  284. end;
  285. fpu_fpv4_s16:
  286. begin
  287. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
  288. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  289. end;
  290. else
  291. internalerror(2009111403);
  292. end;
  293. end;
  294. procedure tarminlinenode.second_sqrt_real;
  295. var
  296. singleprec: boolean;
  297. pf: TOpPostfix;
  298. begin
  299. load_fpu_location(singleprec);
  300. case current_settings.fputype of
  301. fpu_fpa,
  302. fpu_fpa10,
  303. fpu_fpa11:
  304. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
  305. fpu_vfpv2,
  306. fpu_vfpv3,
  307. fpu_vfpv4,
  308. fpu_vfpv3_d16:
  309. begin
  310. if singleprec then
  311. pf:=PF_F32
  312. else
  313. pf:=PF_F64;
  314. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
  315. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  316. end;
  317. fpu_fpv4_s16:
  318. begin
  319. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
  320. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  321. end;
  322. else
  323. internalerror(2009111402);
  324. end;
  325. end;
  326. { atn, sin, cos, lgn isn't supported by the linux fpe
  327. procedure tarminlinenode.second_arctan_real;
  328. begin
  329. load_fpu_location;
  330. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  331. end;
  332. procedure tarminlinenode.second_ln_real;
  333. begin
  334. load_fpu_location;
  335. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  336. end;
  337. procedure tarminlinenode.second_cos_real;
  338. begin
  339. load_fpu_location;
  340. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  341. end;
  342. procedure tarminlinenode.second_sin_real;
  343. begin
  344. load_fpu_location;
  345. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  346. end;
  347. }
  348. procedure tarminlinenode.second_prefetch;
  349. var
  350. ref : treference;
  351. r : tregister;
  352. checkpointer_used : boolean;
  353. begin
  354. if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
  355. begin
  356. { do not call Checkpointer for left node }
  357. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  358. if checkpointer_used then
  359. node_change_local_switch(left,cs_checkpointer,false);
  360. secondpass(left);
  361. if checkpointer_used then
  362. node_change_local_switch(left,cs_checkpointer,false);
  363. case left.location.loc of
  364. LOC_CREFERENCE,
  365. LOC_REFERENCE:
  366. begin
  367. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  368. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  369. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  370. { since the address might be nil we can't use ldr for older cpus }
  371. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
  372. end;
  373. else
  374. { nothing to prefetch };
  375. end;
  376. end;
  377. end;
  378. procedure tarminlinenode.second_abs_long;
  379. var
  380. opsize : tcgsize;
  381. begin
  382. if GenerateThumbCode then
  383. begin
  384. inherited second_abs_long;
  385. exit;
  386. end;
  387. secondpass(left);
  388. opsize:=def_cgsize(left.resultdef);
  389. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  390. location:=left.location;
  391. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  392. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  393. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
  394. if GenerateThumb2Code then
  395. current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));
  396. current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
  397. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  398. end;
  399. procedure tarminlinenode.second_fma;
  400. const
  401. op : array[false..true,false..true] of TAsmOp =
  402. { positive product }
  403. (
  404. { positive third operand }
  405. (A_VFMA,
  406. { negative third operand }
  407. A_VFNMS),
  408. { negative product }
  409. { positive third operand }
  410. (A_VFMS,
  411. A_VFNMA)
  412. );
  413. var
  414. paraarray : array[1..3] of tnode;
  415. i : integer;
  416. negop3,
  417. negproduct : boolean;
  418. oppostfix : TOpPostfix;
  419. begin
  420. if current_settings.fputype in [fpu_vfpv4] then
  421. begin
  422. negop3:=false;
  423. negproduct:=false;
  424. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  425. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  426. paraarray[3]:=tcallparanode(parameters).paravalue;
  427. { check if a neg. node can be removed
  428. this is possible because changing the sign of
  429. a floating point number does not affect its absolute
  430. value in any way
  431. }
  432. if paraarray[1].nodetype=unaryminusn then
  433. begin
  434. paraarray[1]:=tunarynode(paraarray[1]).left;
  435. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  436. only no code is generated for it }
  437. negproduct:=not(negproduct);
  438. end;
  439. if paraarray[2].nodetype=unaryminusn then
  440. begin
  441. paraarray[2]:=tunarynode(paraarray[2]).left;
  442. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  443. only no code is generated for it }
  444. negproduct:=not(negproduct);
  445. end;
  446. if paraarray[3].nodetype=unaryminusn then
  447. begin
  448. paraarray[3]:=tunarynode(paraarray[3]).left;
  449. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  450. only no code is generated for it }
  451. negop3:=true;
  452. end;
  453. for i:=1 to 3 do
  454. secondpass(paraarray[i]);
  455. { no memory operand is allowed }
  456. for i:=1 to 3 do
  457. begin
  458. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  459. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  460. end;
  461. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  462. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  463. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  464. paraarray[3].location.register,location.register,mms_movescalar);
  465. if is_double(resultdef) then
  466. oppostfix:=PF_F64
  467. else
  468. oppostfix:=PF_F32;
  469. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
  470. location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
  471. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  472. end
  473. else
  474. internalerror(2014032301);
  475. end;
  476. begin
  477. cinlinenode:=tarminlinenode;
  478. end.