ncpuadd.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. {
  2. Copyright (c) 2014 Jonas Maebe
  3. Code generation for add nodes on AArch64
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuadd;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgadd,cpubase;
  22. type
  23. taarch64addnode = class(tcgaddnode)
  24. private
  25. function GetResFlags(unsigned:Boolean):TResFlags;
  26. function GetFPUResFlags:TResFlags;
  27. protected
  28. function use_fma : boolean;override;
  29. procedure second_addfloat;override;
  30. procedure second_cmpfloat;override;
  31. procedure second_cmpboolean;override;
  32. procedure second_cmpsmallset;override;
  33. procedure second_cmpordinal;override;
  34. procedure second_addordinal;override;
  35. procedure second_add64bit; override;
  36. procedure second_cmp64bit; override;
  37. public
  38. function use_generic_mul32to64: boolean; override;
  39. function pass_1 : tnode;override;
  40. end;
  41. implementation
  42. uses
  43. systems,symconst,symtype,symdef,
  44. globals,globtype,
  45. cutils,verbose,
  46. paramgr,procinfo,
  47. aasmtai,aasmdata,aasmcpu,defutil,
  48. cgbase,cgcpu,cgutils,
  49. cpupara,
  50. ncon,nset,nadd,nmat,
  51. hlcgobj, ncgutil,cgobj,pass_2;
  52. {*****************************************************************************
  53. taarch64addnode
  54. *****************************************************************************}
  55. function taarch64addnode.use_fma : boolean;
  56. begin
  57. Result:=true;
  58. end;
  59. function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
  60. begin
  61. case NodeType of
  62. equaln:
  63. GetResFlags:=F_EQ;
  64. unequaln:
  65. GetResFlags:=F_NE;
  66. else
  67. if not(unsigned) then
  68. begin
  69. if nf_swapped in flags then
  70. case NodeType of
  71. ltn:
  72. GetResFlags:=F_GT;
  73. lten:
  74. GetResFlags:=F_GE;
  75. gtn:
  76. GetResFlags:=F_LT;
  77. gten:
  78. GetResFlags:=F_LE;
  79. else
  80. internalerror(2014082010);
  81. end
  82. else
  83. case NodeType of
  84. ltn:
  85. GetResFlags:=F_LT;
  86. lten:
  87. GetResFlags:=F_LE;
  88. gtn:
  89. GetResFlags:=F_GT;
  90. gten:
  91. GetResFlags:=F_GE;
  92. else
  93. internalerror(2014082011);
  94. end;
  95. end
  96. else
  97. begin
  98. if nf_swapped in Flags then
  99. case NodeType of
  100. ltn:
  101. GetResFlags:=F_HI;
  102. lten:
  103. GetResFlags:=F_HS;
  104. gtn:
  105. GetResFlags:=F_LO;
  106. gten:
  107. GetResFlags:=F_LS;
  108. else
  109. internalerror(2014082012);
  110. end
  111. else
  112. case NodeType of
  113. ltn:
  114. GetResFlags:=F_LO;
  115. lten:
  116. GetResFlags:=F_LS;
  117. gtn:
  118. GetResFlags:=F_HI;
  119. gten:
  120. GetResFlags:=F_HS;
  121. else
  122. internalerror(2014082013);
  123. end;
  124. end;
  125. end;
  126. end;
  127. function taarch64addnode.GetFPUResFlags:TResFlags;
  128. begin
  129. case NodeType of
  130. equaln:
  131. result:=F_EQ;
  132. unequaln:
  133. result:=F_NE;
  134. else
  135. begin
  136. if nf_swapped in Flags then
  137. case NodeType of
  138. ltn:
  139. result:=F_GT;
  140. lten:
  141. result:=F_GE;
  142. gtn:
  143. result:=F_LO;
  144. gten:
  145. result:=F_LS;
  146. else
  147. internalerror(2014082014);
  148. end
  149. else
  150. case NodeType of
  151. ltn:
  152. result:=F_LO;
  153. lten:
  154. result:=F_LS;
  155. gtn:
  156. result:=F_GT;
  157. gten:
  158. result:=F_GE;
  159. else
  160. internalerror(2014082015);
  161. end;
  162. end;
  163. end;
  164. end;
  165. procedure taarch64addnode.second_addfloat;
  166. var
  167. op : TAsmOp;
  168. begin
  169. pass_left_right;
  170. if nf_swapped in flags then
  171. swapleftright;
  172. { force fpureg as location, left right doesn't matter
  173. as both will be in a fpureg }
  174. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  175. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  176. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  177. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  178. case nodetype of
  179. addn :
  180. begin
  181. op:=A_FADD;
  182. end;
  183. muln :
  184. begin
  185. op:=A_FMUL;
  186. end;
  187. subn :
  188. begin
  189. op:=A_FSUB;
  190. end;
  191. slashn :
  192. begin
  193. op:=A_FDIV;
  194. end;
  195. else
  196. internalerror(200306014);
  197. end;
  198. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
  199. location.register,left.location.register,right.location.register));
  200. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  201. end;
  202. procedure taarch64addnode.second_cmpfloat;
  203. begin
  204. pass_left_right;
  205. if nf_swapped in flags then
  206. swapleftright;
  207. { force fpureg as location, left right doesn't matter
  208. as both will be in a fpureg }
  209. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  210. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  211. location_reset(location,LOC_FLAGS,OS_NO);
  212. location.resflags:=getfpuresflags;
  213. { signalling compare so we can get exceptions }
  214. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
  215. left.location.register,right.location.register));
  216. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  217. end;
  218. procedure taarch64addnode.second_cmpboolean;
  219. begin
  220. pass_left_right;
  221. force_reg_left_right(true,true);
  222. if right.location.loc=LOC_CONSTANT then
  223. begin
  224. if right.location.value>=0 then
  225. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
  226. else
  227. { avoid overflow if value=low(int64) }
  228. {$push}{$r-}{$q-}
  229. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
  230. {$pop}
  231. end
  232. else
  233. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  234. location_reset(location,LOC_FLAGS,OS_NO);
  235. location.resflags:=getresflags(true);
  236. end;
  237. procedure taarch64addnode.second_cmpsmallset;
  238. var
  239. tmpreg : tregister;
  240. op: tasmop;
  241. begin
  242. pass_left_right;
  243. location_reset(location,LOC_FLAGS,OS_NO);
  244. force_reg_left_right(true,true);
  245. if right.location.loc=LOC_CONSTANT then
  246. begin
  247. { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
  248. is a positive/negative value -> sign extend }
  249. if not(right.location.size in [OS_64,OS_S64]) then
  250. right.location.value:=longint(right.location.value);
  251. if right.location.value>=0 then
  252. op:=A_CMP
  253. else
  254. op:=A_CMN;
  255. end
  256. else
  257. { for DFA }
  258. op:=A_NONE;
  259. case nodetype of
  260. equaln,
  261. unequaln:
  262. begin
  263. if right.location.loc=LOC_CONSTANT then
  264. tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
  265. else
  266. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  267. location.resflags:=getresflags(true);
  268. end;
  269. lten,
  270. gten:
  271. begin
  272. if (not(nf_swapped in flags) and
  273. (nodetype=lten)) or
  274. ((nf_swapped in flags) and
  275. (nodetype=gten)) then
  276. swapleftright;
  277. { we can't handle left as a constant yet }
  278. if left.location.loc=LOC_CONSTANT then
  279. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  280. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
  281. if right.location.loc=LOC_CONSTANT then
  282. begin
  283. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
  284. tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
  285. end
  286. else
  287. begin
  288. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
  289. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
  290. end;
  291. location.resflags:=F_EQ;
  292. end;
  293. else
  294. internalerror(2012042701);
  295. end;
  296. end;
  297. procedure taarch64addnode.second_cmpordinal;
  298. var
  299. unsigned : boolean;
  300. begin
  301. pass_left_right;
  302. force_reg_left_right(true,true);
  303. unsigned:=not(is_signed(left.resultdef)) or
  304. not(is_signed(right.resultdef));
  305. if right.location.loc = LOC_CONSTANT then
  306. begin
  307. if right.location.value>=0 then
  308. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
  309. else
  310. {$push}{$r-}{$q-}
  311. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
  312. {$pop}
  313. end
  314. else
  315. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  316. location_reset(location,LOC_FLAGS,OS_NO);
  317. location.resflags:=getresflags(unsigned);
  318. end;
  319. procedure taarch64addnode.second_addordinal;
  320. const
  321. multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
  322. var
  323. unsigned: boolean;
  324. logical_not_op: TAsmOp;
  325. begin
  326. { 32x32->64 multiplication }
  327. if (nodetype=muln) and
  328. is_32bit(left.resultdef) and
  329. is_32bit(right.resultdef) and
  330. is_64bit(resultdef) then
  331. begin
  332. unsigned:=not(is_signed(left.resultdef)) or
  333. not(is_signed(right.resultdef));
  334. pass_left_right;
  335. force_reg_left_right(true,true);
  336. { force_reg_left_right can leave right as a LOC_CONSTANT (we can't
  337. say "a constant register is okay, but an ordinal constant isn't) }
  338. if right.location.loc=LOC_CONSTANT then
  339. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  340. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  341. location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  342. current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
  343. Exit;
  344. end
  345. else if (cs_opt_level2 in current_settings.optimizerswitches) then
  346. begin
  347. { Can we turn "x and (not y)" into an ANDN instruction instead? }
  348. if (nodetype in [andn, orn, xorn]) and
  349. (
  350. (
  351. (left.nodetype = notn) and
  352. (tnotnode(left).left.location.loc <> LOC_CONSTANT)
  353. ) or
  354. (
  355. (right.nodetype = notn) and
  356. (tnotnode(right).left.location.loc <> LOC_CONSTANT)
  357. )
  358. ) then
  359. begin
  360. { BIC only supports the second operand being inverted; however,
  361. since we're dealing with ordinals, there won't be any Boolean
  362. shortcutting, so we can safely swap the parameters }
  363. if (left.nodetype = notn) and (tnotnode(left).left.location.loc <> LOC_CONSTANT) then
  364. { If the left node is "not" but is inverting a constant, then
  365. the right node must also be a "not", but with a non-constant
  366. input }
  367. swapleftright;
  368. secondpass(left);
  369. { Skip the not node completely }
  370. Include(right.flags, nf_do_not_execute);
  371. secondpass(tnotnode(right).left);
  372. { allocate registers }
  373. if not (tnotnode(right).left.location.loc in [LOC_REGISTER, LOC_CREGISTER]) then
  374. hlcg.location_force_reg(
  375. current_asmdata.CurrAsmList,
  376. tnotnode(right).left.location,
  377. tnotnode(right).left.resultdef,
  378. tnotnode(right).left.resultdef,
  379. // tnotnode(right).resultdef, { In case the "not" node does some implicit typecasting }
  380. false
  381. );
  382. if not (left.location.loc in [LOC_REGISTER, LOC_CREGISTER]) then
  383. hlcg.location_force_reg(
  384. current_asmdata.CurrAsmList,
  385. left.location,
  386. left.resultdef,
  387. left.resultdef,
  388. false
  389. );
  390. set_result_location_reg;
  391. case nodetype of
  392. andn:
  393. logical_not_op := A_BIC;
  394. orn:
  395. logical_not_op := A_ORN;
  396. xorn:
  397. logical_not_op := A_EON;
  398. else
  399. InternalError(2022102130);
  400. end;
  401. current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(logical_not_op,location.register,left.location.register,tnotnode(right).left.location.register));
  402. { We have to make sure trailing bits are cut off for unsigned
  403. extensions since it will be full of 1s, so do this by
  404. downsizing the register from 32-bit to the target size }
  405. if (def_cgsize(resultdef) in [OS_8, OS_16]) then
  406. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,torddef(u32inttype),resultdef,self.location.register,self.location.register);
  407. { Overflow can't happen with bic/orn/eon }
  408. Exit;
  409. end;
  410. end;
  411. { Default behaviour }
  412. inherited second_addordinal;
  413. end;
  414. procedure taarch64addnode.second_add64bit;
  415. begin
  416. second_addordinal;
  417. end;
  418. procedure taarch64addnode.second_cmp64bit;
  419. begin
  420. second_cmpordinal;
  421. end;
  422. function taarch64addnode.use_generic_mul32to64: boolean;
  423. begin
  424. result:=false;
  425. end;
  426. function taarch64addnode.pass_1: tnode;
  427. begin
  428. Result:=inherited pass_1;
  429. { if the result is not nil, a new node has been generated and the current node will be discarted }
  430. if Result=nil then
  431. begin
  432. if left.resultdef.typ=floatdef then
  433. if needs_check_for_fpu_exceptions then
  434. Include(current_procinfo.flags,pi_do_call);
  435. end;
  436. end;
  437. begin
  438. caddnode:=taarch64addnode;
  439. end.