nllvminl.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_bitscan: tnode; override;
  30. function first_fma: tnode; override;
  31. function first_sqr_real: tnode; override;
  32. function first_sqrt_real: tnode; override;
  33. function first_trunc_real: tnode; override;
  34. function first_popcnt: tnode; override;
  35. public
  36. procedure second_length; override;
  37. procedure second_high; override;
  38. procedure second_sqr_real; override;
  39. procedure second_trunc_real; override;
  40. end;
  41. implementation
  42. uses
  43. verbose,globals,globtype,constexp,cutils,
  44. aasmbase, aasmdata,
  45. symconst,symtype,symdef,defutil,
  46. compinnr,
  47. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  48. pass_2,
  49. cgbase,cgutils,tgobj,hlcgobj,
  50. cpubase,
  51. llvmbase,aasmllvm,aasmllvmmetadata;
  52. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  53. var
  54. temp: tnode;
  55. begin
  56. { the prototype of trunc()/round() in the system unit is declared
  57. with valreal as parameter type, so the argument will always be
  58. extended -> remove the typeconversion to extended if any; not done
  59. in ninl, because there are other code generators that assume that
  60. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  61. (copy from code in nx64inl, should be refactored)
  62. }
  63. if (left.nodetype=typeconvn) and
  64. not(nf_explicit in left.flags) and
  65. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  66. begin
  67. { get rid of the type conversion, so the use_vectorfpu will be
  68. applied to the original type }
  69. temp:=ttypeconvnode(left).left;
  70. ttypeconvnode(left).left:=nil;
  71. left.free;
  72. left:=temp;
  73. end;
  74. end;
  75. function tllvminlinenode.first_get_frame: tnode;
  76. begin
  77. result:=ccallnode.createintern('llvm_frameaddress',
  78. ccallparanode.create(genintconstnode(0),nil));
  79. end;
  80. { in general, generate regular expression rather than intrinsics: according
  81. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  82. good at reasoning about general control flow and arithmetic, it is not
  83. anywhere near as strong at reasoning about the various intrinsics. If
  84. profitable for code generation purposes, the optimizer will likely form
  85. the intrinsics itself late in the optimization pipeline." }
  86. function tllvminlinenode.first_abs_real: tnode;
  87. var
  88. lefttemp,
  89. resulttemp: ttempcreatenode;
  90. stat: tstatementnode;
  91. begin
  92. result:=internalstatements(stat);
  93. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  94. { assigned twice -> will be spilled if put in register }
  95. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  96. addstatement(stat,lefttemp);
  97. addstatement(stat,resulttemp);
  98. { lefttemp:=left }
  99. addstatement(stat,
  100. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  101. );
  102. { if lefttemp>=0 then
  103. resulttemp:=lefttemp
  104. else
  105. resulttemp:=-lefttemp
  106. }
  107. addstatement(stat,
  108. cifnode.create(
  109. caddnode.create(
  110. gten,
  111. ctemprefnode.create(lefttemp),
  112. crealconstnode.create(0.0,left.resultdef)
  113. ),
  114. cassignmentnode.create(
  115. ctemprefnode.create(resulttemp),
  116. ctemprefnode.create(lefttemp)
  117. ),
  118. cassignmentnode.create(
  119. ctemprefnode.create(resulttemp),
  120. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  121. )
  122. )
  123. );
  124. addstatement(stat,ctempdeletenode.create(lefttemp));
  125. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  126. { return resulttemp }
  127. addstatement(stat,ctemprefnode.create(resulttemp));
  128. { reused }
  129. left:=nil;
  130. end;
  131. function tllvminlinenode.first_bitscan: tnode;
  132. var
  133. leftdef: tdef;
  134. resulttemp,
  135. lefttemp: ttempcreatenode;
  136. stat: tstatementnode;
  137. block: tblocknode;
  138. cntresult: tnode;
  139. procname: string[15];
  140. begin
  141. {
  142. if left<>0 then
  143. result:=llvm_ctlz/cttz(unsigned(left),true)
  144. else
  145. result:=255;
  146. }
  147. if inlinenumber=in_bsr_x then
  148. procname:='LLVM_CTLZ'
  149. else
  150. procname:='LLVM_CTTZ';
  151. leftdef:=left.resultdef;
  152. block:=internalstatements(stat);
  153. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  154. addstatement(stat,resulttemp);
  155. lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
  156. cntresult:=
  157. ccallnode.createintern(
  158. procname,
  159. ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
  160. ccallparanode.create(
  161. ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
  162. )
  163. )
  164. );
  165. { ctlz returns the number of leading zero bits, while bsr returns the bit
  166. number of the first non-zero bit (with the least significant bit as 0)
  167. -> invert result }
  168. if inlinenumber=in_bsr_x then
  169. begin
  170. cntresult:=
  171. caddnode.create(xorn,
  172. cntresult,
  173. genintconstnode(leftdef.size*8-1)
  174. );
  175. end;
  176. addstatement(stat,
  177. cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
  178. cassignmentnode.create(
  179. ctemprefnode.create(resulttemp),
  180. cntresult
  181. ),
  182. cassignmentnode.create(
  183. ctemprefnode.create(resulttemp),
  184. genintconstnode(255)
  185. )
  186. )
  187. );
  188. if assigned(lefttemp) then
  189. addstatement(stat,ctempdeletenode.create(lefttemp));
  190. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  191. addstatement(stat,ctemprefnode.create(resulttemp));
  192. left:=nil;
  193. result:=block;
  194. end;
  195. function tllvminlinenode.first_fma: tnode;
  196. var
  197. exceptmode: ansistring;
  198. procname: string[40];
  199. begin
  200. if cs_opt_fastmath in current_settings.optimizerswitches then
  201. begin
  202. case inlinenumber of
  203. in_fma_single:
  204. procname:='llvm_fma_f32';
  205. in_fma_double:
  206. procname:='llvm_fma_f64';
  207. in_fma_extended:
  208. procname:='llvm_fma_f80';
  209. in_fma_float128:
  210. procname:='llvm_fma_f128';
  211. else
  212. internalerror(2018122101);
  213. end;
  214. result:=ccallnode.createintern(procname,left);
  215. end
  216. else
  217. begin
  218. case inlinenumber of
  219. in_fma_single,
  220. in_fma_double,
  221. in_fma_extended,
  222. in_fma_float128:
  223. procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
  224. else
  225. internalerror(2019122811);
  226. end;
  227. exceptmode:=llvm_constrainedexceptmodestring;
  228. result:=ccallnode.createintern(procname,
  229. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar(exceptmode),length(exceptmode),llvm_metadatatype),
  230. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
  231. left
  232. )
  233. )
  234. );
  235. end;
  236. left:=nil;
  237. end;
  238. function tllvminlinenode.first_sqr_real: tnode;
  239. begin
  240. result:=nil;
  241. if use_vectorfpu(left.resultdef) then
  242. expectloc:=LOC_MMREGISTER
  243. else
  244. expectloc:=LOC_FPUREGISTER;
  245. end;
  246. function tllvminlinenode.first_sqrt_real: tnode;
  247. var
  248. exceptmode: ansistring;
  249. intrinsic: string[40];
  250. begin
  251. if left.resultdef.typ<>floatdef then
  252. internalerror(2018121601);
  253. if cs_opt_fastmath in current_settings.optimizerswitches then
  254. begin
  255. case tfloatdef(left.resultdef).floattype of
  256. s32real:
  257. intrinsic:='llvm_sqrt_f32';
  258. s64real:
  259. intrinsic:='llvm_sqrt_f64';
  260. s80real,sc80real:
  261. intrinsic:='llvm_sqrt_f80';
  262. s128real:
  263. intrinsic:='llvm_sqrt_f128';
  264. else
  265. internalerror(2018121602);
  266. end;
  267. result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
  268. end
  269. else
  270. begin
  271. case tfloatdef(left.resultdef).floattype of
  272. s32real,
  273. s64real,
  274. s80real,sc80real,
  275. s128real:
  276. intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
  277. else
  278. internalerror(2019122810);
  279. end;
  280. exceptmode:=llvm_constrainedexceptmodestring;
  281. result:=ccallnode.createintern(intrinsic,
  282. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar(exceptmode),length(exceptmode),llvm_metadatatype),
  283. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
  284. ccallparanode.create(left,nil)
  285. )
  286. )
  287. );
  288. end;
  289. left:=nil;
  290. end;
  291. function tllvminlinenode.first_trunc_real: tnode;
  292. begin
  293. { fptosi is undefined if the value is out of range -> only generate
  294. in cast of fastmath }
  295. if cs_opt_fastmath in current_settings.optimizerswitches then
  296. begin
  297. maybe_remove_round_trunc_typeconv;
  298. expectloc:=LOC_REGISTER;
  299. result:=nil;
  300. end
  301. else
  302. result:=inherited;
  303. end;
  304. function tllvminlinenode.first_popcnt: tnode;
  305. begin
  306. result:=ctypeconvnode.create(ccallnode.createintern('LLVM_CTPOP', ccallparanode.create(left,nil)),resultdef);
  307. left:=nil;
  308. end;
  309. procedure tllvminlinenode.second_length;
  310. var
  311. hreg: tregister;
  312. begin
  313. second_high;
  314. { Dynamic arrays do not have their length attached but their maximum index }
  315. if is_dynamic_array(left.resultdef) then
  316. begin
  317. hreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  318. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,location.register,hreg);
  319. location.register:=hreg;
  320. end;
  321. end;
  322. procedure tllvminlinenode.second_high;
  323. var
  324. lengthlab, nillab: tasmlabel;
  325. hregister: tregister;
  326. href: treference;
  327. lendef: tdef;
  328. begin
  329. secondpass(left);
  330. if is_shortstring(left.resultdef) then
  331. begin
  332. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  333. internalerror(2014080806);
  334. { typecast the shortstring reference into a length byte reference }
  335. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  336. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  337. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  338. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  339. end
  340. else
  341. begin
  342. { length in ansi/wide strings and high in dynamic arrays is at offset
  343. -sizeof(sizeint), for widestrings it's at -4 }
  344. if is_widestring(left.resultdef) then
  345. lendef:=u32inttype
  346. else
  347. lendef:=ossinttype;
  348. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  349. left.resultdef,cpointerdef.getreusable(lendef),true);
  350. current_asmdata.getjumplabel(nillab);
  351. current_asmdata.getjumplabel(lengthlab);
  352. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  353. { volatility of the ansistring/widestring refers to the volatility of the
  354. string pointer, not of the string data }
  355. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  356. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  357. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  358. if is_widestring(left.resultdef) then
  359. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  360. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  361. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  362. if is_dynamic_array(left.resultdef) then
  363. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,-1,hregister)
  364. else
  365. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,0,hregister);
  366. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  367. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  368. location.register:=hregister;
  369. end;
  370. end;
  371. procedure tllvminlinenode.second_sqr_real;
  372. begin
  373. secondpass(left);
  374. location.loc:=expectloc;
  375. if expectloc=LOC_MMREGISTER then
  376. begin
  377. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  378. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  379. end
  380. else
  381. begin
  382. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  383. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  384. end;
  385. current_asmdata.CurrAsmList.concat(
  386. taillvm.op_reg_size_reg_reg(la_fmul,
  387. location.register,resultdef,
  388. left.location.register,left.location.register
  389. )
  390. );
  391. end;
  392. procedure tllvminlinenode.second_trunc_real;
  393. begin
  394. secondpass(left);
  395. if use_vectorfpu(left.resultdef) then
  396. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  397. else
  398. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  399. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  400. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  401. current_asmdata.CurrAsmList.concat(
  402. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  403. );
  404. end;
  405. begin
  406. cinlinenode:=tllvminlinenode;
  407. end.