narminl.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generates ARM inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit narminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tarminlinenode = class(tcgInlineNode)
  24. function first_abs_real: tnode; override;
  25. function first_sqr_real: tnode; override;
  26. function first_sqrt_real: tnode; override;
  27. function first_fma : tnode; override;
  28. { atn,sin,cos,lgn isn't supported by the linux fpe
  29. function first_arctan_real: tnode; override;
  30. function first_ln_real: tnode; override;
  31. function first_cos_real: tnode; override;
  32. function first_sin_real: tnode; override;
  33. }
  34. procedure second_abs_real; override;
  35. procedure second_sqr_real; override;
  36. procedure second_sqrt_real; override;
  37. { atn,sin,cos,lgn isn't supported by the linux fpe
  38. procedure second_arctan_real; override;
  39. procedure second_ln_real; override;
  40. procedure second_cos_real; override;
  41. procedure second_sin_real; override;
  42. }
  43. procedure second_prefetch; override;
  44. procedure second_abs_long; override;
  45. procedure second_fma; override;
  46. function first_cpu: tnode; override;
  47. procedure pass_generate_code_cpu; override;
  48. function pass_typecheck_cpu: tnode; override;
  49. private
  50. procedure load_fpu_location(out singleprec: boolean);
  51. end;
  52. implementation
  53. uses
  54. globtype,verbose,globals,
  55. procinfo,
  56. compinnr,cpuinfo,defutil,symdef,
  57. aasmdata,aasmcpu,aasmtai,
  58. cgbase,cgutils,pass_1,pass_2,
  59. cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
  60. nutils,ncal;
  61. {*****************************************************************************
  62. tarminlinenode
  63. *****************************************************************************}
  64. function tarminlinenode.pass_typecheck_cpu: tnode;
  65. begin
  66. Result:=nil;
  67. case inlinenumber of
  68. in_arm_yield:
  69. resultdef:=voidtype;
  70. else
  71. result:=inherited;
  72. end;
  73. end;
  74. function tarminlinenode.first_cpu : tnode;
  75. begin
  76. Result:=nil;
  77. case inlinenumber of
  78. in_arm_yield:
  79. begin
  80. expectloc:=LOC_VOID;
  81. resultdef:=voidtype;
  82. end;
  83. else
  84. Result:=inherited first_cpu;
  85. end;
  86. end;
  87. procedure tarminlinenode.pass_generate_code_cpu;
  88. begin
  89. case inlinenumber of
  90. in_arm_yield:
  91. if CPUARM_HAS_MP_INSTRUCTIONS in cpu_capabilities[current_settings.cputype] then
  92. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_YIELD))
  93. else
  94. { while yield is a no op operation if not supported by the cpu, assemblers do not
  95. handle it, so encode it in hex if the cpu does not support it }
  96. current_asmdata.CurrAsmList.concat(tai_const.Create_32bit(longint($e320f001)));
  97. else
  98. inherited pass_generate_code_cpu;
  99. end;
  100. end;
  101. procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
  102. begin
  103. secondpass(left);
  104. case current_settings.fputype of
  105. fpu_fpa,
  106. fpu_fpa10,
  107. fpu_fpa11:
  108. begin
  109. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  110. location_copy(location,left.location);
  111. if left.location.loc=LOC_CFPUREGISTER then
  112. begin
  113. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  114. location.loc := LOC_FPUREGISTER;
  115. end;
  116. end;
  117. fpu_soft:
  118. begin
  119. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  120. location_copy(location,left.location);
  121. end
  122. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  123. begin
  124. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  125. location_copy(location,left.location);
  126. if left.location.loc=LOC_CMMREGISTER then
  127. begin
  128. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  129. location.loc := LOC_MMREGISTER;
  130. end;
  131. end
  132. else
  133. internalerror(2009111801);
  134. end;
  135. singleprec:=tfloatdef(left.resultdef).floattype=s32real;
  136. end;
  137. function tarminlinenode.first_abs_real : tnode;
  138. begin
  139. if (cs_fp_emulation in current_settings.moduleswitches) then
  140. begin
  141. firstpass(left);
  142. expectloc:=LOC_REGISTER;
  143. first_abs_real:=nil;
  144. end
  145. else
  146. begin
  147. case current_settings.fputype of
  148. fpu_fpa,
  149. fpu_fpa10,
  150. fpu_fpa11:
  151. expectloc:=LOC_FPUREGISTER;
  152. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  153. expectloc:=LOC_MMREGISTER
  154. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  155. begin
  156. if tfloatdef(left.resultdef).floattype=s32real then
  157. expectloc:=LOC_MMREGISTER
  158. else
  159. exit(inherited first_abs_real);
  160. end
  161. else
  162. internalerror(2009112401);
  163. end;
  164. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  165. needs_check_for_fpu_exceptions then
  166. Include(current_procinfo.flags,pi_do_call);
  167. first_abs_real:=nil;
  168. end;
  169. end;
  170. function tarminlinenode.first_sqr_real : tnode;
  171. begin
  172. if (cs_fp_emulation in current_settings.moduleswitches) then
  173. result:=inherited first_sqr_real
  174. else
  175. begin
  176. case current_settings.fputype of
  177. fpu_fpa,
  178. fpu_fpa10,
  179. fpu_fpa11:
  180. expectloc:=LOC_FPUREGISTER;
  181. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  182. expectloc:=LOC_MMREGISTER
  183. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  184. begin
  185. if tfloatdef(left.resultdef).floattype=s32real then
  186. expectloc:=LOC_MMREGISTER
  187. else
  188. exit(inherited first_sqr_real);
  189. end
  190. else
  191. internalerror(2009112402);
  192. end;
  193. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  194. needs_check_for_fpu_exceptions then
  195. Include(current_procinfo.flags,pi_do_call);
  196. first_sqr_real:=nil;
  197. end;
  198. end;
  199. function tarminlinenode.first_sqrt_real : tnode;
  200. begin
  201. if cs_fp_emulation in current_settings.moduleswitches then
  202. result:=inherited first_sqrt_real
  203. else
  204. begin
  205. case current_settings.fputype of
  206. fpu_fpa,
  207. fpu_fpa10,
  208. fpu_fpa11:
  209. expectloc:=LOC_FPUREGISTER;
  210. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  211. expectloc:=LOC_MMREGISTER
  212. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  213. begin
  214. if tfloatdef(left.resultdef).floattype=s32real then
  215. expectloc:=LOC_MMREGISTER
  216. else
  217. exit(inherited first_sqrt_real);
  218. end
  219. else
  220. internalerror(2009112403);
  221. end;
  222. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  223. needs_check_for_fpu_exceptions then
  224. Include(current_procinfo.flags,pi_do_call);
  225. first_sqrt_real := nil;
  226. end;
  227. end;
  228. function tarminlinenode.first_fma : tnode;
  229. begin
  230. if ((is_double(resultdef)) or (is_single(resultdef))) then
  231. begin
  232. expectloc:=LOC_MMREGISTER;
  233. Result:=nil;
  234. if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
  235. needs_check_for_fpu_exceptions then
  236. Include(current_procinfo.flags,pi_do_call);
  237. end
  238. else
  239. Result:=inherited first_fma;
  240. end;
  241. { atn,sin,cos,lgn isn't supported by the linux fpe
  242. function tarminlinenode.first_arctan_real: tnode;
  243. begin
  244. expectloc:=LOC_FPUREGISTER;
  245. result:=nil;
  246. end;
  247. function tarminlinenode.first_ln_real: tnode;
  248. begin
  249. expectloc:=LOC_FPUREGISTER;
  250. result:=nil;
  251. end;
  252. function tarminlinenode.first_cos_real: tnode;
  253. begin
  254. expectloc:=LOC_FPUREGISTER;
  255. result:=nil;
  256. end;
  257. function tarminlinenode.first_sin_real: tnode;
  258. begin
  259. expectloc:=LOC_FPUREGISTER;
  260. result:=nil;
  261. end;
  262. }
  263. procedure tarminlinenode.second_abs_real;
  264. var
  265. singleprec: boolean;
  266. pf: TOpPostfix;
  267. begin
  268. load_fpu_location(singleprec);
  269. case current_settings.fputype of
  270. fpu_fpa,
  271. fpu_fpa10,
  272. fpu_fpa11:
  273. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  274. fpu_soft:
  275. begin
  276. if singleprec then
  277. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
  278. else
  279. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
  280. end
  281. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  282. begin
  283. if singleprec then
  284. pf:=PF_F32
  285. else
  286. pf:=PF_F64;
  287. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
  288. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  289. end
  290. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  291. begin
  292. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
  293. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  294. end
  295. else
  296. internalerror(2009111402);
  297. end;
  298. end;
  299. procedure tarminlinenode.second_sqr_real;
  300. var
  301. singleprec: boolean;
  302. pf: TOpPostfix;
  303. begin
  304. load_fpu_location(singleprec);
  305. case current_settings.fputype of
  306. fpu_fpa,
  307. fpu_fpa10,
  308. fpu_fpa11:
  309. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
  310. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  311. begin
  312. if singleprec then
  313. pf:=PF_F32
  314. else
  315. pf:=PF_F64;
  316. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
  317. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  318. end
  319. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  320. begin
  321. current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
  322. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  323. end
  324. else
  325. internalerror(2009111403);
  326. end;
  327. end;
  328. procedure tarminlinenode.second_sqrt_real;
  329. var
  330. singleprec: boolean;
  331. pf: TOpPostfix;
  332. begin
  333. load_fpu_location(singleprec);
  334. case current_settings.fputype of
  335. fpu_fpa,
  336. fpu_fpa10,
  337. fpu_fpa11:
  338. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
  339. else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
  340. begin
  341. if singleprec then
  342. pf:=PF_F32
  343. else
  344. pf:=PF_F64;
  345. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
  346. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  347. end
  348. else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
  349. begin
  350. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
  351. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  352. end
  353. else
  354. internalerror(2009111405);
  355. end;
  356. end;
  357. { atn, sin, cos, lgn isn't supported by the linux fpe
  358. procedure tarminlinenode.second_arctan_real;
  359. begin
  360. load_fpu_location;
  361. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  362. end;
  363. procedure tarminlinenode.second_ln_real;
  364. begin
  365. load_fpu_location;
  366. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  367. end;
  368. procedure tarminlinenode.second_cos_real;
  369. begin
  370. load_fpu_location;
  371. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
  372. end;
  373. procedure tarminlinenode.second_sin_real;
  374. begin
  375. load_fpu_location;
  376. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
  377. end;
  378. }
  379. procedure tarminlinenode.second_prefetch;
  380. var
  381. ref : treference;
  382. r : tregister;
  383. checkpointer_used : boolean;
  384. begin
  385. if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
  386. begin
  387. { do not call Checkpointer for left node }
  388. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  389. if checkpointer_used then
  390. node_change_local_switch(left,cs_checkpointer,false);
  391. secondpass(left);
  392. if checkpointer_used then
  393. node_change_local_switch(left,cs_checkpointer,false);
  394. case left.location.loc of
  395. LOC_CREFERENCE,
  396. LOC_REFERENCE:
  397. begin
  398. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  399. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  400. reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
  401. { since the address might be nil we can't use ldr for older cpus }
  402. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
  403. end;
  404. else
  405. { nothing to prefetch };
  406. end;
  407. end;
  408. end;
  409. procedure tarminlinenode.second_abs_long;
  410. var
  411. opsize : tcgsize;
  412. ovloc: tlocation;
  413. begin
  414. if GenerateThumbCode or is_64bitint(left.resultdef) then
  415. begin
  416. inherited second_abs_long;
  417. exit;
  418. end;
  419. secondpass(left);
  420. opsize:=def_cgsize(left.resultdef);
  421. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  422. location:=left.location;
  423. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  424. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  425. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
  426. if GenerateThumb2Code then
  427. current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));
  428. if cs_check_overflow in current_settings.localswitches then
  429. begin
  430. current_asmdata.CurrAsmList.concat(setoppostfix(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI),PF_S));
  431. location_reset(ovloc,LOC_VOID,opsize);
  432. cg.g_overflowCheck_loc(current_asmdata.CurrAsmList,ovloc,resultdef,ovloc);
  433. end
  434. else
  435. current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
  436. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  437. end;
  438. procedure tarminlinenode.second_fma;
  439. const
  440. op : array[false..true,false..true] of TAsmOp =
  441. { positive product }
  442. (
  443. { positive third operand }
  444. (A_VFMA,
  445. { negative third operand }
  446. A_VFNMS),
  447. { negative product }
  448. { positive third operand }
  449. (A_VFMS,
  450. A_VFNMA)
  451. );
  452. var
  453. paraarray : array[1..3] of tnode;
  454. i : integer;
  455. negop3,
  456. negproduct : boolean;
  457. oppostfix : TOpPostfix;
  458. begin
  459. if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
  460. begin
  461. negop3:=false;
  462. negproduct:=false;
  463. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  464. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  465. paraarray[3]:=tcallparanode(parameters).paravalue;
  466. { check if a neg. node can be removed
  467. this is possible because changing the sign of
  468. a floating point number does not affect its absolute
  469. value in any way
  470. }
  471. if paraarray[1].nodetype=unaryminusn then
  472. begin
  473. paraarray[1]:=tunarynode(paraarray[1]).left;
  474. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  475. only no code is generated for it }
  476. negproduct:=not(negproduct);
  477. end;
  478. if paraarray[2].nodetype=unaryminusn then
  479. begin
  480. paraarray[2]:=tunarynode(paraarray[2]).left;
  481. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  482. only no code is generated for it }
  483. negproduct:=not(negproduct);
  484. end;
  485. if paraarray[3].nodetype=unaryminusn then
  486. begin
  487. paraarray[3]:=tunarynode(paraarray[3]).left;
  488. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  489. only no code is generated for it }
  490. negop3:=true;
  491. end;
  492. for i:=1 to 3 do
  493. secondpass(paraarray[i]);
  494. { no memory operand is allowed }
  495. for i:=1 to 3 do
  496. begin
  497. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  498. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  499. end;
  500. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  501. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  502. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  503. paraarray[3].location.register,location.register,mms_movescalar);
  504. if is_double(resultdef) then
  505. oppostfix:=PF_F64
  506. else
  507. oppostfix:=PF_F32;
  508. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
  509. location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
  510. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  511. end
  512. else
  513. internalerror(2014032301);
  514. end;
  515. begin
  516. cinlinenode:=tarminlinenode;
  517. end.