ncpuadd.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. {
  2. Copyright (c) 2008 by Florian Klaempfl
  3. Code generation for add nodes on the Xtensa
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuadd;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,node,ncgadd,cpubase;
  22. type
  23. TCPUAddNode = class(tcgaddnode)
  24. private
  25. procedure pass_left_and_right;
  26. procedure cmp64_le(left_reg, right_reg: TRegister64; unsigned: boolean);
  27. procedure cmp64_lt(left_reg, right_reg: TRegister64; unsigned: boolean);
  28. protected
  29. function pass_1 : tnode;override;
  30. function first_addfloat: tnode;override;
  31. function use_generic_mul32to64: boolean;override;
  32. function use_generic_mul64bit: boolean;override;
  33. procedure second_addordinal;override;
  34. procedure second_cmpordinal;override;
  35. procedure second_cmpsmallset;override;
  36. procedure second_cmp64bit;override;
  37. procedure second_add64bit;override;
  38. procedure second_cmpfloat;override;
  39. procedure second_addfloat;override;
  40. procedure second_cmp;
  41. function use_fma: boolean;override;
  42. end;
  43. implementation
  44. uses
  45. globtype,systems,
  46. cutils,verbose,globals,
  47. symconst,symdef,paramgr,
  48. aasmbase,aasmtai,aasmdata,aasmcpu,defutil,htypechk,
  49. cgutils,cgcpu,
  50. cpuinfo,pass_1,pass_2,procinfo,
  51. cpupara,
  52. ncon,nset,nadd,
  53. ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32,
  54. hlcgobj;
  55. {*****************************************************************************
  56. TCPUAddNode
  57. *****************************************************************************}
  58. function TCPUAddNode.use_fma : boolean;
  59. begin
  60. Result:=is_single(left.resultdef) and is_single(right.resultdef) and
  61. (FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]);
  62. end;
  63. procedure TCPUAddNode.second_addordinal;
  64. var
  65. ophigh: tasmop;
  66. begin
  67. { this is only true, if the CPU supports 32x32 -> 64 bit MUL, see the relevant method }
  68. if (nodetype=muln) and is_64bit(resultdef) then
  69. begin
  70. if not(is_signed(left.resultdef)) or
  71. not(is_signed(right.resultdef)) then
  72. ophigh:=A_MULUH
  73. else
  74. ophigh:=A_MULSH;
  75. pass_left_right;
  76. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then
  77. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  78. if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then
  79. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  80. { initialize the result }
  81. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  82. location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  83. location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  84. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULL,location.register64.reglo,left.location.register,right.location.register));
  85. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(ophigh,location.register64.reghi,left.location.register,right.location.register));
  86. end
  87. else
  88. Inherited;
  89. end;
  90. procedure TCPUAddNode.second_cmpsmallset;
  91. var
  92. tmpreg : tregister;
  93. truelab, falselab: TAsmLabel;
  94. begin
  95. pass_left_right;
  96. if (not(nf_swapped in flags) and
  97. (nodetype = lten)) or
  98. ((nf_swapped in flags) and
  99. (nodetype = gten)) then
  100. swapleftright;
  101. current_asmdata.getjumplabel(truelab);
  102. current_asmdata.getjumplabel(falselab);
  103. location_reset_jump(location,truelab,falselab);
  104. force_reg_left_right(false,false);
  105. case nodetype of
  106. equaln:
  107. begin
  108. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_EQ,left.location.register,right.location.register,location.truelabel);
  109. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  110. end;
  111. unequaln:
  112. begin
  113. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left.location.register,right.location.register,location.truelabel);
  114. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  115. end;
  116. lten,
  117. gten:
  118. begin
  119. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  120. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,left.location.register,right.location.register,tmpreg);
  121. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_EQ,tmpreg,right.location.register,location.truelabel);
  122. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  123. end;
  124. else
  125. internalerror(2020082401);
  126. end;
  127. end;
  128. procedure TCPUAddNode.second_cmp;
  129. var
  130. cond: TOpCmp;
  131. instr: taicpu;
  132. truelab, falselab: TAsmLabel;
  133. begin
  134. pass_left_right;
  135. current_asmdata.getjumplabel(truelab);
  136. current_asmdata.getjumplabel(falselab);
  137. location_reset_jump(location,truelab,falselab);
  138. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(OS_INT),true);
  139. if is_signed(left.resultdef) then
  140. case nodetype of
  141. equaln: cond:=OC_EQ;
  142. unequaln: cond:=OC_NE;
  143. ltn: cond:=OC_LT;
  144. lten: cond:=OC_LTE;
  145. gtn: cond:=OC_GT;
  146. gten: cond:=OC_GTE;
  147. else
  148. internalerror(2020030801);
  149. end
  150. else
  151. case nodetype of
  152. equaln: cond:=OC_EQ;
  153. unequaln: cond:=OC_NE;
  154. ltn: cond:=OC_B;
  155. lten: cond:=OC_BE;
  156. gtn: cond:=OC_A;
  157. gten: cond:=OC_AE;
  158. else
  159. internalerror(2020030803);
  160. end;
  161. if (right.nodetype=ordconstn) and not(nf_swapped in flags) then
  162. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_INT,cond,right.location.value,left.location.register,location.truelabel)
  163. else
  164. begin
  165. if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then
  166. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,cgsize_orddef(OS_INT),true);
  167. if nf_swapped in flags then
  168. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cond,left.location.register,right.location.register,location.truelabel)
  169. else
  170. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cond,right.location.register,left.location.register,location.truelabel);
  171. end;
  172. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  173. end;
  174. const
  175. cmpops: array[boolean] of TOpCmp = (OC_LT,OC_B);
  176. procedure TCPUAddNode.cmp64_lt(left_reg, right_reg: TRegister64;unsigned: boolean);
  177. begin
  178. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cmpops[unsigned],right_reg.reghi,left_reg.reghi,location.truelabel);
  179. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,location.falselabel);
  180. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_B,right_reg.reglo,left_reg.reglo,location.truelabel);
  181. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  182. end;
  183. procedure TCPUAddNode.cmp64_le(left_reg, right_reg: TRegister64;unsigned: boolean);
  184. begin
  185. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cmpops[unsigned],left_reg.reghi,right_reg.reghi,location.falselabel);
  186. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,location.truelabel);
  187. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_B,left_reg.reglo,right_reg.reglo,location.falselabel);
  188. cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
  189. end;
  190. procedure TCPUAddNode.second_cmp64bit;
  191. var
  192. truelabel,
  193. falselabel: tasmlabel;
  194. unsigned: boolean;
  195. left_reg,right_reg: TRegister64;
  196. begin
  197. current_asmdata.getjumplabel(truelabel);
  198. current_asmdata.getjumplabel(falselabel);
  199. location_reset_jump(location,truelabel,falselabel);
  200. pass_left_right;
  201. force_reg_left_right(true,true);
  202. unsigned:=not(is_signed(left.resultdef)) or
  203. not(is_signed(right.resultdef));
  204. left_reg:=left.location.register64;
  205. { force_reg_left_right might leave right as LOC_CONSTANT, however, we cannot take advantage of this yet }
  206. if right.location.loc=LOC_CONSTANT then
  207. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,false);
  208. right_reg:=right.location.register64;
  209. case NodeType of
  210. equaln:
  211. begin
  212. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,location.falselabel);
  213. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reglo,right_reg.reglo,location.falselabel);
  214. cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
  215. end;
  216. unequaln:
  217. begin
  218. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,location.truelabel);
  219. cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reglo,right_reg.reglo,location.truelabel);
  220. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  221. end;
  222. else
  223. if nf_swapped in flags then
  224. case NodeType of
  225. ltn:
  226. cmp64_lt(right_reg, left_reg,unsigned);
  227. lten:
  228. cmp64_le(right_reg, left_reg,unsigned);
  229. gtn:
  230. cmp64_lt(left_reg, right_reg,unsigned);
  231. gten:
  232. cmp64_le(left_reg, right_reg,unsigned);
  233. else
  234. internalerror(2020082202);
  235. end
  236. else
  237. case NodeType of
  238. ltn:
  239. cmp64_lt(left_reg, right_reg,unsigned);
  240. lten:
  241. cmp64_le(left_reg, right_reg,unsigned);
  242. gtn:
  243. cmp64_lt(right_reg, left_reg,unsigned);
  244. gten:
  245. cmp64_le(right_reg, left_reg,unsigned);
  246. else
  247. internalerror(2020082203);
  248. end;
  249. end;
  250. end;
  251. function TCPUAddNode.pass_1 : tnode;
  252. begin
  253. result:=inherited pass_1;
  254. if not(assigned(result)) and (nodetype in [equaln,unequaln,ltn,lten,gtn,gten]) and
  255. not((FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) and
  256. is_single(left.resultdef) and (nodetype<>slashn)) then
  257. expectloc:=LOC_JUMP;
  258. {$ifdef dummy}
  259. if not(assigned(result)) then
  260. begin
  261. unsigned:=not(is_signed(left.resultdef)) or
  262. not(is_signed(right.resultdef));
  263. if is_64bit(left.resultdef) and
  264. ((nodetype in [equaln,unequaln]) or
  265. (unsigned and (nodetype in [ltn,lten,gtn,gten]))
  266. ) then
  267. expectloc:=LOC_FLAGS;
  268. end;
  269. { handling boolean expressions }
  270. if not(assigned(result)) and
  271. (
  272. not(is_boolean(left.resultdef)) or
  273. not(is_boolean(right.resultdef)) or
  274. is_dynamic_array(left.resultdef)
  275. ) then
  276. expectloc:=LOC_FLAGS;
  277. {$endif dummy}
  278. end;
  279. procedure TCPUAddNode.second_cmpordinal;
  280. begin
  281. second_cmp;
  282. end;
  283. procedure TCPUAddNode.pass_left_and_right;
  284. begin
  285. { calculate the operator which is more difficult }
  286. firstcomplex(self);
  287. { in case of constant put it to the left }
  288. if (left.nodetype=ordconstn) then
  289. swapleftright;
  290. secondpass(left);
  291. secondpass(right);
  292. end;
  293. function TCPUAddNode.first_addfloat: tnode;
  294. begin
  295. result := nil;
  296. if (FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) and
  297. (tfloatdef(left.resultdef).floattype=s32real) and (nodetype<>slashn) then
  298. begin
  299. if nodetype in [equaln,unequaln,lten,ltn,gten,gtn] then
  300. expectloc:=LOC_FLAGS
  301. else
  302. expectloc:=LOC_FPUREGISTER;
  303. end
  304. else
  305. result:=first_addfloat_soft;
  306. end;
  307. function TCPUAddNode.use_generic_mul32to64: boolean;
  308. begin
  309. result:=not(CPUXTENSA_HAS_MUL32HIGH in cpu_capabilities[current_settings.cputype]) or needoverflowcheck;
  310. end;
  311. function TCPUAddNode.use_generic_mul64bit: boolean;
  312. begin
  313. result:=needoverflowcheck or
  314. (cs_opt_size in current_settings.optimizerswitches) or
  315. not(CPUXTENSA_HAS_MUL32HIGH in cpu_capabilities[current_settings.cputype]);
  316. end;
  317. procedure TCPUAddNode.second_addfloat;
  318. var
  319. op : TAsmOp;
  320. cmpop,
  321. singleprec , inv: boolean;
  322. ai : taicpu;
  323. begin
  324. pass_left_and_right;
  325. if (nf_swapped in flags) then
  326. swapleftright;
  327. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  328. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  329. cmpop:=false;
  330. inv:=false;
  331. case nodetype of
  332. addn :
  333. op:=A_ADD;
  334. muln :
  335. op:=A_MUL;
  336. subn :
  337. op:=A_SUB;
  338. unequaln:
  339. begin
  340. op:=A_OEQ;
  341. cmpop:=true;
  342. inv:=true;
  343. end;
  344. equaln:
  345. begin
  346. op:=A_OEQ;
  347. cmpop:=true;
  348. end;
  349. ltn:
  350. begin
  351. op:=A_OLT;
  352. cmpop:=true;
  353. end;
  354. lten:
  355. begin
  356. op:=A_OLE;
  357. cmpop:=true;
  358. end;
  359. gtn:
  360. begin
  361. op:=A_OLT;
  362. swapleftright;
  363. cmpop:=true;
  364. end;
  365. gten:
  366. begin
  367. op:=A_OLE;
  368. swapleftright;
  369. cmpop:=true;
  370. end;
  371. else
  372. internalerror(2020032601);
  373. end;
  374. { initialize de result }
  375. if cmpop then
  376. begin
  377. if CPUXTENSA_HAS_BOOLEAN_OPTION in cpu_capabilities[current_settings.cputype] then
  378. begin
  379. location_reset(location,LOC_FLAGS,OS_NO);
  380. location.resflags.register:=NR_B0;
  381. location.resflags.flag:=F_NZ;
  382. end
  383. else
  384. Internalerror(2020070402);
  385. end
  386. else
  387. begin
  388. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  389. location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
  390. end;
  391. { emit the actual operation }
  392. if cmpop then
  393. begin
  394. cg.getcpuregister(current_asmdata.CurrAsmList,location.resflags.register);
  395. ai:=taicpu.op_reg_reg_reg(op,location.resflags.register,left.location.register,right.location.register);
  396. ai.oppostfix:=PF_S;
  397. current_asmdata.CurrAsmList.concat(ai);
  398. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  399. if inv then
  400. location.resflags.flag:=F_Z;
  401. end
  402. else
  403. begin
  404. ai:=taicpu.op_reg_reg_reg(op,location.register,left.location.register,right.location.register);
  405. ai.oppostfix := PF_S;
  406. current_asmdata.CurrAsmList.concat(ai);
  407. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  408. end;
  409. end;
  410. procedure TCPUAddNode.second_cmpfloat;
  411. begin
  412. second_addfloat;
  413. end;
  414. procedure TCPUAddNode.second_add64bit;
  415. var
  416. unsigned: Boolean;
  417. tmpreg: tregister;
  418. begin
  419. if nodetype=muln then
  420. begin
  421. pass_left_right;
  422. unsigned:=((left.resultdef.typ=orddef) and
  423. (torddef(left.resultdef).ordtype=u64bit)) or
  424. ((right.resultdef.typ=orddef) and
  425. (torddef(right.resultdef).ordtype=u64bit));
  426. force_reg_left_right(true,true);
  427. { force_reg_left_right might leave right as LOC_CONSTANT, however, we cannot take advantage of this yet }
  428. if right.location.loc=LOC_CONSTANT then
  429. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,false);
  430. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  431. location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  432. location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  433. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  434. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULL,location.register64.reglo,left.location.register64.reglo,right.location.register64.reglo));
  435. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULUH,location.register64.reghi,left.location.register64.reglo,right.location.register64.reglo));
  436. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULL,tmpreg,left.location.register64.reglo,right.location.register64.reghi));
  437. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,location.register64.reghi,location.register64.reghi,tmpreg));
  438. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULL,tmpreg,left.location.register64.reghi,right.location.register64.reglo));
  439. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,location.register64.reghi,location.register64.reghi,tmpreg));
  440. end
  441. else
  442. Inherited;
  443. end;
  444. begin
  445. caddnode:=tcpuaddnode;
  446. end.