ncpuadd.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. {
  2. Copyright (c) 2014 Jonas Maebe
  3. Code generation for add nodes on AArch64
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuadd;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgadd,cpubase;
  22. type
  23. taarch64addnode = class(tcgaddnode)
  24. private
  25. function GetResFlags(unsigned:Boolean):TResFlags;
  26. function GetFPUResFlags:TResFlags;
  27. protected
  28. function use_fma : boolean;override;
  29. procedure second_addfloat;override;
  30. procedure second_cmpfloat;override;
  31. procedure second_cmpboolean;override;
  32. procedure second_cmpsmallset;override;
  33. procedure second_cmpordinal;override;
  34. procedure second_addordinal;override;
  35. procedure second_add64bit; override;
  36. procedure second_cmp64bit; override;
  37. public
  38. function use_generic_mul32to64: boolean; override;
  39. end;
  40. implementation
  41. uses
  42. systems,symtype,symdef,
  43. globals,globtype,
  44. cutils,verbose,
  45. paramgr,procinfo,
  46. aasmtai,aasmdata,aasmcpu,defutil,
  47. cgbase,cgcpu,cgutils,
  48. cpupara,
  49. ncon,nset,nadd,nmat,
  50. hlcgobj, ncgutil,cgobj,pass_2;
  51. {*****************************************************************************
  52. taarch64addnode
  53. *****************************************************************************}
  54. function taarch64addnode.use_fma : boolean;
  55. begin
  56. Result:=true;
  57. end;
  58. function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
  59. begin
  60. case NodeType of
  61. equaln:
  62. GetResFlags:=F_EQ;
  63. unequaln:
  64. GetResFlags:=F_NE;
  65. else
  66. if not(unsigned) then
  67. begin
  68. if nf_swapped in flags then
  69. case NodeType of
  70. ltn:
  71. GetResFlags:=F_GT;
  72. lten:
  73. GetResFlags:=F_GE;
  74. gtn:
  75. GetResFlags:=F_LT;
  76. gten:
  77. GetResFlags:=F_LE;
  78. else
  79. internalerror(2014082010);
  80. end
  81. else
  82. case NodeType of
  83. ltn:
  84. GetResFlags:=F_LT;
  85. lten:
  86. GetResFlags:=F_LE;
  87. gtn:
  88. GetResFlags:=F_GT;
  89. gten:
  90. GetResFlags:=F_GE;
  91. else
  92. internalerror(2014082011);
  93. end;
  94. end
  95. else
  96. begin
  97. if nf_swapped in Flags then
  98. case NodeType of
  99. ltn:
  100. GetResFlags:=F_HI;
  101. lten:
  102. GetResFlags:=F_HS;
  103. gtn:
  104. GetResFlags:=F_LO;
  105. gten:
  106. GetResFlags:=F_LS;
  107. else
  108. internalerror(2014082012);
  109. end
  110. else
  111. case NodeType of
  112. ltn:
  113. GetResFlags:=F_LO;
  114. lten:
  115. GetResFlags:=F_LS;
  116. gtn:
  117. GetResFlags:=F_HI;
  118. gten:
  119. GetResFlags:=F_HS;
  120. else
  121. internalerror(2014082013);
  122. end;
  123. end;
  124. end;
  125. end;
  126. function taarch64addnode.GetFPUResFlags:TResFlags;
  127. begin
  128. case NodeType of
  129. equaln:
  130. result:=F_EQ;
  131. unequaln:
  132. result:=F_NE;
  133. else
  134. begin
  135. if nf_swapped in Flags then
  136. case NodeType of
  137. ltn:
  138. result:=F_GT;
  139. lten:
  140. result:=F_GE;
  141. gtn:
  142. result:=F_LO;
  143. gten:
  144. result:=F_LS;
  145. else
  146. internalerror(2014082014);
  147. end
  148. else
  149. case NodeType of
  150. ltn:
  151. result:=F_LO;
  152. lten:
  153. result:=F_LS;
  154. gtn:
  155. result:=F_GT;
  156. gten:
  157. result:=F_GE;
  158. else
  159. internalerror(2014082015);
  160. end;
  161. end;
  162. end;
  163. end;
  164. procedure taarch64addnode.second_addfloat;
  165. var
  166. op : TAsmOp;
  167. begin
  168. pass_left_right;
  169. if nf_swapped in flags then
  170. swapleftright;
  171. { force fpureg as location, left right doesn't matter
  172. as both will be in a fpureg }
  173. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  174. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  175. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  176. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  177. case nodetype of
  178. addn :
  179. begin
  180. op:=A_FADD;
  181. end;
  182. muln :
  183. begin
  184. op:=A_FMUL;
  185. end;
  186. subn :
  187. begin
  188. op:=A_FSUB;
  189. end;
  190. slashn :
  191. begin
  192. op:=A_FDIV;
  193. end;
  194. else
  195. internalerror(200306014);
  196. end;
  197. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
  198. location.register,left.location.register,right.location.register));
  199. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  200. end;
  201. procedure taarch64addnode.second_cmpfloat;
  202. begin
  203. pass_left_right;
  204. if nf_swapped in flags then
  205. swapleftright;
  206. { force fpureg as location, left right doesn't matter
  207. as both will be in a fpureg }
  208. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  209. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  210. location_reset(location,LOC_FLAGS,OS_NO);
  211. location.resflags:=getfpuresflags;
  212. { signalling compare so we can get exceptions }
  213. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
  214. left.location.register,right.location.register));
  215. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  216. end;
  217. procedure taarch64addnode.second_cmpboolean;
  218. begin
  219. pass_left_right;
  220. force_reg_left_right(true,true);
  221. if right.location.loc=LOC_CONSTANT then
  222. begin
  223. if right.location.value>=0 then
  224. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
  225. else
  226. { avoid overflow if value=low(int64) }
  227. {$push}{$r-}{$q-}
  228. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
  229. {$pop}
  230. end
  231. else
  232. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  233. location_reset(location,LOC_FLAGS,OS_NO);
  234. location.resflags:=getresflags(true);
  235. end;
  236. procedure taarch64addnode.second_cmpsmallset;
  237. var
  238. tmpreg : tregister;
  239. op: tasmop;
  240. begin
  241. pass_left_right;
  242. location_reset(location,LOC_FLAGS,OS_NO);
  243. force_reg_left_right(true,true);
  244. if right.location.loc=LOC_CONSTANT then
  245. begin
  246. { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
  247. is a positive/negative value -> sign extend }
  248. if not(right.location.size in [OS_64,OS_S64]) then
  249. right.location.value:=longint(right.location.value);
  250. if right.location.value>=0 then
  251. op:=A_CMP
  252. else
  253. op:=A_CMN;
  254. end
  255. else
  256. { for DFA }
  257. op:=A_NONE;
  258. case nodetype of
  259. equaln,
  260. unequaln:
  261. begin
  262. if right.location.loc=LOC_CONSTANT then
  263. tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
  264. else
  265. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  266. location.resflags:=getresflags(true);
  267. end;
  268. lten,
  269. gten:
  270. begin
  271. if (not(nf_swapped in flags) and
  272. (nodetype=lten)) or
  273. ((nf_swapped in flags) and
  274. (nodetype=gten)) then
  275. swapleftright;
  276. { we can't handle left as a constant yet }
  277. if left.location.loc=LOC_CONSTANT then
  278. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  279. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
  280. if right.location.loc=LOC_CONSTANT then
  281. begin
  282. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
  283. tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
  284. end
  285. else
  286. begin
  287. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
  288. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
  289. end;
  290. location.resflags:=F_EQ;
  291. end;
  292. else
  293. internalerror(2012042701);
  294. end;
  295. end;
  296. procedure taarch64addnode.second_cmpordinal;
  297. var
  298. unsigned : boolean;
  299. begin
  300. pass_left_right;
  301. force_reg_left_right(true,true);
  302. unsigned:=not(is_signed(left.resultdef)) or
  303. not(is_signed(right.resultdef));
  304. if right.location.loc = LOC_CONSTANT then
  305. begin
  306. if right.location.value>=0 then
  307. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
  308. else
  309. {$push}{$r-}{$q-}
  310. Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
  311. {$pop}
  312. end
  313. else
  314. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
  315. location_reset(location,LOC_FLAGS,OS_NO);
  316. location.resflags:=getresflags(unsigned);
  317. end;
  318. procedure taarch64addnode.second_addordinal;
  319. const
  320. multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
  321. var
  322. unsigned: boolean;
  323. logical_not_op: TAsmOp;
  324. begin
  325. { 32x32->64 multiplication }
  326. if (nodetype=muln) and
  327. is_32bit(left.resultdef) and
  328. is_32bit(right.resultdef) and
  329. is_64bit(resultdef) then
  330. begin
  331. unsigned:=not(is_signed(left.resultdef)) or
  332. not(is_signed(right.resultdef));
  333. pass_left_right;
  334. force_reg_left_right(true,true);
  335. { force_reg_left_right can leave right as a LOC_CONSTANT (we can't
  336. say "a constant register is okay, but an ordinal constant isn't) }
  337. if right.location.loc=LOC_CONSTANT then
  338. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  339. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  340. location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  341. current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
  342. Exit;
  343. end
  344. else if (cs_opt_level2 in current_settings.optimizerswitches) then
  345. begin
  346. { Can we turn "x and (not y)" into an ANDN instruction instead? }
  347. if (nodetype in [andn, orn, xorn]) and
  348. (
  349. (
  350. (left.nodetype = notn) and
  351. (tnotnode(left).left.location.loc <> LOC_CONSTANT)
  352. ) or
  353. (
  354. (right.nodetype = notn) and
  355. (tnotnode(right).left.location.loc <> LOC_CONSTANT)
  356. )
  357. ) then
  358. begin
  359. { BIC only supports the second operand being inverted; however,
  360. since we're dealing with ordinals, there won't be any Boolean
  361. shortcutting, so we can safely swap the parameters }
  362. if (left.nodetype = notn) and (tnotnode(left).left.location.loc <> LOC_CONSTANT) then
  363. { If the left node is "not" but is inverting a constant, then
  364. the right node must also be a "not", but with a non-constant
  365. input }
  366. swapleftright;
  367. secondpass(left);
  368. { Skip the not node completely }
  369. Include(right.flags, nf_do_not_execute);
  370. secondpass(tnotnode(right).left);
  371. { allocate registers }
  372. if not (tnotnode(right).left.location.loc in [LOC_REGISTER, LOC_CREGISTER]) then
  373. hlcg.location_force_reg(
  374. current_asmdata.CurrAsmList,
  375. tnotnode(right).left.location,
  376. tnotnode(right).left.resultdef,
  377. tnotnode(right).left.resultdef,
  378. // tnotnode(right).resultdef, { In case the "not" node does some implicit typecasting }
  379. false
  380. );
  381. if not (left.location.loc in [LOC_REGISTER, LOC_CREGISTER]) then
  382. hlcg.location_force_reg(
  383. current_asmdata.CurrAsmList,
  384. left.location,
  385. left.resultdef,
  386. left.resultdef,
  387. false
  388. );
  389. set_result_location_reg;
  390. case nodetype of
  391. andn:
  392. logical_not_op := A_BIC;
  393. orn:
  394. logical_not_op := A_ORN;
  395. xorn:
  396. logical_not_op := A_EON;
  397. else
  398. InternalError(2022102130);
  399. end;
  400. current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(logical_not_op,location.register,left.location.register,tnotnode(right).left.location.register));
  401. { We have to make sure trailing bits are cut off for unsigned
  402. extensions since it will be full of 1s, so do this by
  403. downsizing the register from 32-bit to the target size }
  404. if (def_cgsize(resultdef) in [OS_8, OS_16]) then
  405. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,torddef(u32inttype),resultdef,self.location.register,self.location.register);
  406. { Overflow can't happen with bic/orn/eon }
  407. Exit;
  408. end;
  409. end;
  410. { Default behaviour }
  411. inherited second_addordinal;
  412. end;
  413. procedure taarch64addnode.second_add64bit;
  414. begin
  415. second_addordinal;
  416. end;
  417. procedure taarch64addnode.second_cmp64bit;
  418. begin
  419. second_cmpordinal;
  420. end;
  421. function taarch64addnode.use_generic_mul32to64: boolean;
  422. begin
  423. result:=false;
  424. end;
  425. begin
  426. caddnode:=taarch64addnode;
  427. end.