narminl.pas 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates ARM inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit narminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tarminlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_fma : tnode; override;
  28. { atn,sin,cos,lgn isn't supported by the linux fpe
  29. function first_arctan_real: tnode; override;
  30. function first_ln_real: tnode; override;
  31. function first_cos_real: tnode; override;
  32. function first_sin_real: tnode; override;
  33. }
  34. procedure second_abs_real; override;
  35. procedure second_sqr_real; override;
  36. procedure second_sqrt_real; override;
  37. { atn,sin,cos,lgn isn't supported by the linux fpe
  38. procedure second_arctan_real; override;
  39. procedure second_ln_real; override;
  40. procedure second_cos_real; override;
  41. procedure second_sin_real; override;
  42. }
  43. procedure second_prefetch; override;
  44. procedure second_abs_long; override;
  45. procedure second_fma; override;
  46. private
  47. procedure load_fpu_location(out singleprec: boolean);
  48. end;
  49. implementation
  50. uses
  51. globtype,verbose,globals,
  52. cpuinfo, defutil,symdef,aasmdata,aasmcpu,
  53. cgbase,cgutils,pass_1,pass_2,
  54. cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
  55. nutils,ncal;
  56. {*****************************************************************************
  57. tarminlinenode
  58. *****************************************************************************}
  59. procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
  60. begin
  61. secondpass(left);
  62. case current_settings.fputype of
  63. fpu_fpa,
  64. fpu_fpa10,
  65. fpu_fpa11:
  66. begin
  67. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  68. location_copy(location,left.location);
  69. if left.location.loc=LOC_CFPUREGISTER then
  70. begin
  71. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  72. location.loc := LOC_FPUREGISTER;
  73. end;
  74. end;
  75. fpu_vfpv2,
  76. fpu_vfpv3,
  77. fpu_vfpv4,
  78. fpu_vfpv3_d16,
  79. fpu_fpv4_s16:
  80. begin
  81. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  82. location_copy(location,left.location);
  83. if left.location.loc=LOC_CMMREGISTER then
  84. begin
  85. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  86. location.loc := LOC_MMREGISTER;
  87. end;
  88. end;
  89. fpu_soft:
  90. begin
  91. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  92. location_copy(location,left.location);
  93. end
  94. else
  95. internalerror(2009111801);
  96. end;
  97. singleprec:=tfloatdef(left.resultdef).floattype=s32real;
  98. end;
  99. function tarminlinenode.first_abs_real : tnode;
  100. begin
  101. if (cs_fp_emulation in current_settings.moduleswitches) then
  102. begin
  103. firstpass(left);
  104. expectloc:=LOC_REGISTER;
  105. first_abs_real:=nil;
  106. end
  107. else
  108. begin
  109. case current_settings.fputype of
  110. fpu_fpa,
  111. fpu_fpa10,
  112. fpu_fpa11:
  113. expectloc:=LOC_FPUREGISTER;
  114. fpu_vfpv2,
  115. fpu_vfpv3,
  116. fpu_vfpv4,
  117. fpu_vfpv3_d16:
  118. expectloc:=LOC_MMREGISTER;
  119. fpu_fpv4_s16:
  120. begin
  121. if tfloatdef(left.resultdef).floattype=s32real then
  122. expectloc:=LOC_MMREGISTER
  123. else
  124. exit(inherited first_abs_real);
  125. end;
  126. else
  127. internalerror(2009112401);
  128. end;
  129. first_abs_real:=nil;
  130. end;
  131. end;
  132. function tarminlinenode.first_sqr_real : tnode;
  133. begin
  134. if (cs_fp_emulation in current_settings.moduleswitches) then
  135. result:=inherited first_sqr_real
  136. else
  137. begin
  138. case current_settings.fputype of
  139. fpu_fpa,
  140. fpu_fpa10,
  141. fpu_fpa11:
  142. expectloc:=LOC_FPUREGISTER;
  143. fpu_vfpv2,
  144. fpu_vfpv3,
  145. fpu_vfpv4,
  146. fpu_vfpv3_d16:
  147. expectloc:=LOC_MMREGISTER;
  148. fpu_fpv4_s16:
  149. begin
  150. if tfloatdef(left.resultdef).floattype=s32real then
  151. expectloc:=LOC_MMREGISTER
  152. else
  153. exit(inherited first_sqr_real);
  154. end;
  155. else
  156. internalerror(2009112402);
  157. end;
  158. first_sqr_real:=nil;
  159. end;
  160. end;
  161. function tarminlinenode.first_sqrt_real : tnode;
  162. begin
  163. if cs_fp_emulation in current_settings.moduleswitches then
  164. result:=inherited first_sqrt_real
  165. else
  166. begin
  167. case current_settings.fputype of
  168. fpu_fpa,
  169. fpu_fpa10,
  170. fpu_fpa11:
  171. expectloc:=LOC_FPUREGISTER;
  172. fpu_vfpv2,
  173. fpu_vfpv3,
  174. fpu_vfpv4,
  175. fpu_vfpv3_d16:
  176. expectloc:=LOC_MMREGISTER;
  177. fpu_fpv4_s16:
  178. begin
  179. if tfloatdef(left.resultdef).floattype=s32real then
  180. expectloc:=LOC_MMREGISTER
  181. else
  182. exit(inherited first_sqrt_real);
  183. end;
  184. else
  185. internalerror(2009112403);
  186. end;
  187. first_sqrt_real := nil;
  188. end;
  189. end;
  190. function tarminlinenode.first_fma : tnode;
  191. begin
  192. if (true) and
  193. ((is_double(resultdef)) or (is_single(resultdef))) then
  194. begin
  195. expectloc:=LOC_MMREGISTER;
  196. Result:=nil;
  197. end
  198. else
  199. Result:=inherited first_fma;
  200. end;
  201. { atn,sin,cos,lgn isn't supported by the linux fpe
  202. function tarminlinenode.first_arctan_real: tnode;
  203. begin
  204. expectloc:=LOC_FPUREGISTER;
  205. result:=nil;
  206. end;
  207. function tarminlinenode.first_ln_real: tnode;
  208. begin
  209. expectloc:=LOC_FPUREGISTER;
  210. result:=nil;
  211. end;
  212. function tarminlinenode.first_cos_real: tnode;
  213. begin
  214. expectloc:=LOC_FPUREGISTER;
  215. result:=nil;
  216. end;
  217. function tarminlinenode.first_sin_real: tnode;
  218. begin
  219. expectloc:=LOC_FPUREGISTER;
  220. result:=nil;
  221. end;
  222. }
  223. procedure tarminlinenode.second_abs_real;
  224. var
  225. singleprec: boolean;
  226. pf: TOpPostfix;
  227. begin
  228. load_fpu_location(singleprec);
  229. case current_settings.fputype of
  230. fpu_fpa,
  231. fpu_fpa10,
  232. fpu_fpa11:
  233. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  234. fpu_vfpv2,
  235. fpu_vfpv3,
  236. fpu_vfpv4,
  237. fpu_vfpv3_d16:
  238. begin
  239. if singleprec then
  240. pf:=PF_F32
  241. else
  242. pf:=PF_F64;
  243. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
  244. end;
  245. fpu_fpv4_s16:
  246. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
  247. fpu_soft:
  248. begin
  249. if singleprec then
  250. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
  251. else
  252. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
  253. end
  254. else
  255. internalerror(2009111402);
  256. end;
  257. end;
  258. procedure tarminlinenode.second_sqr_real;
  259. var
  260. singleprec: boolean;
  261. pf: TOpPostfix;
  262. begin
  263. load_fpu_location(singleprec);
  264. case current_settings.fputype of
  265. fpu_fpa,
  266. fpu_fpa10,
  267. fpu_fpa11:
  268. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
  269. fpu_vfpv2,
  270. fpu_vfpv3,
  271. fpu_vfpv4,
  272. fpu_vfpv3_d16:
  273. begin
  274. if singleprec then
  275. pf:=PF_F32
  276. else
  277. pf:=PF_F64;
  278. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
  279. end;
  280. fpu_fpv4_s16:
  281. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
  282. else
  283. internalerror(2009111403);
  284. end;
  285. end;
  286. procedure tarminlinenode.second_sqrt_real;
  287. var
  288. singleprec: boolean;
  289. pf: TOpPostfix;
  290. begin
  291. load_fpu_location(singleprec);
  292. case current_settings.fputype of
  293. fpu_fpa,
  294. fpu_fpa10,
  295. fpu_fpa11:
  296. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
  297. fpu_vfpv2,
  298. fpu_vfpv3,
  299. fpu_vfpv4,
  300. fpu_vfpv3_d16:
  301. begin
  302. if singleprec then
  303. pf:=PF_F32
  304. else
  305. pf:=PF_F64;
  306. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
  307. end;
  308. fpu_fpv4_s16:
  309. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
  310. else
  311. internalerror(2009111402);
  312. end;
  313. end;
  314. { atn, sin, cos, lgn isn't supported by the linux fpe
  315. procedure tarminlinenode.second_arctan_real;
  316. begin
  317. load_fpu_location;
  318. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  319. end;
  320. procedure tarminlinenode.second_ln_real;
  321. begin
  322. load_fpu_location;
  323. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  324. end;
  325. procedure tarminlinenode.second_cos_real;
  326. begin
  327. load_fpu_location;
  328. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  329. end;
  330. procedure tarminlinenode.second_sin_real;
  331. begin
  332. load_fpu_location;
  333. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  334. end;
  335. }
  336. procedure tarminlinenode.second_prefetch;
  337. var
  338. ref : treference;
  339. r : tregister;
  340. checkpointer_used : boolean;
  341. begin
  342. if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
  343. begin
  344. { do not call Checkpointer for left node }
  345. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  346. if checkpointer_used then
  347. node_change_local_switch(left,cs_checkpointer,false);
  348. secondpass(left);
  349. if checkpointer_used then
  350. node_change_local_switch(left,cs_checkpointer,false);
  351. case left.location.loc of
  352. LOC_CREFERENCE,
  353. LOC_REFERENCE:
  354. begin
  355. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  356. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  357. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  358. { since the address might be nil we can't use ldr for older cpus }
  359. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
  360. end;
  361. else
  362. { nothing to prefetch };
  363. end;
  364. end;
  365. end;
  366. procedure tarminlinenode.second_abs_long;
  367. var
  368. opsize : tcgsize;
  369. begin
  370. if GenerateThumbCode then
  371. begin
  372. inherited second_abs_long;
  373. exit;
  374. end;
  375. secondpass(left);
  376. opsize:=def_cgsize(left.resultdef);
  377. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  378. location:=left.location;
  379. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  380. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  381. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
  382. if GenerateThumb2Code then
  383. current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));
  384. current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
  385. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  386. end;
  387. procedure tarminlinenode.second_fma;
  388. const
  389. op : array[false..true,false..true] of TAsmOp =
  390. { positive product }
  391. (
  392. { positive third operand }
  393. (A_VFMA,
  394. { negative third operand }
  395. A_VFNMS),
  396. { negative product }
  397. { positive third operand }
  398. (A_VFMS,
  399. A_VFNMA)
  400. );
  401. var
  402. paraarray : array[1..3] of tnode;
  403. i : integer;
  404. negop3,
  405. negproduct : boolean;
  406. oppostfix : TOpPostfix;
  407. begin
  408. if current_settings.fputype in [fpu_vfpv4] then
  409. begin
  410. negop3:=false;
  411. negproduct:=false;
  412. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  413. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  414. paraarray[3]:=tcallparanode(parameters).paravalue;
  415. { check if a neg. node can be removed
  416. this is possible because changing the sign of
  417. a floating point number does not affect its absolute
  418. value in any way
  419. }
  420. if paraarray[1].nodetype=unaryminusn then
  421. begin
  422. paraarray[1]:=tunarynode(paraarray[1]).left;
  423. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  424. only no code is generated for it }
  425. negproduct:=not(negproduct);
  426. end;
  427. if paraarray[2].nodetype=unaryminusn then
  428. begin
  429. paraarray[2]:=tunarynode(paraarray[2]).left;
  430. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  431. only no code is generated for it }
  432. negproduct:=not(negproduct);
  433. end;
  434. if paraarray[3].nodetype=unaryminusn then
  435. begin
  436. paraarray[3]:=tunarynode(paraarray[3]).left;
  437. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  438. only no code is generated for it }
  439. negop3:=true;
  440. end;
  441. for i:=1 to 3 do
  442. secondpass(paraarray[i]);
  443. { no memory operand is allowed }
  444. for i:=1 to 3 do
  445. begin
  446. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  447. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  448. end;
  449. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  450. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  451. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  452. paraarray[3].location.register,location.register,mms_movescalar);
  453. if is_double(resultdef) then
  454. oppostfix:=PF_F64
  455. else
  456. oppostfix:=PF_F32;
  457. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
  458. location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
  459. end
  460. else
  461. internalerror(2014032301);
  462. end;
  463. begin
  464. cinlinenode:=tarminlinenode;
  465. end.