nllvminl.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_bitscan: tnode; override;
  30. function first_fma: tnode; override;
  31. function first_sqr_real: tnode; override;
  32. function first_sqrt_real: tnode; override;
  33. function first_trunc_real: tnode; override;
  34. public
  35. procedure second_length; override;
  36. procedure second_sqr_real; override;
  37. procedure second_trunc_real; override;
  38. end;
  39. implementation
  40. uses
  41. verbose,globals,globtype,constexp,
  42. aasmbase, aasmdata,
  43. symconst,symtype,symdef,defutil,
  44. compinnr,
  45. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  46. pass_2,
  47. cgbase,cgutils,tgobj,hlcgobj,
  48. cpubase,
  49. llvmbase,aasmllvm;
  50. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  51. var
  52. temp: tnode;
  53. begin
  54. { the prototype of trunc()/round() in the system unit is declared
  55. with valreal as parameter type, so the argument will always be
  56. extended -> remove the typeconversion to extended if any; not done
  57. in ninl, because there are other code generators that assume that
  58. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  59. (copy from code in nx64inl, should be refactored)
  60. }
  61. if (left.nodetype=typeconvn) and
  62. not(nf_explicit in left.flags) and
  63. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  64. begin
  65. { get rid of the type conversion, so the use_vectorfpu will be
  66. applied to the original type }
  67. temp:=ttypeconvnode(left).left;
  68. ttypeconvnode(left).left:=nil;
  69. left.free;
  70. left:=temp;
  71. end;
  72. end;
  73. function tllvminlinenode.first_get_frame: tnode;
  74. begin
  75. result:=ccallnode.createintern('llvm_frameaddress',
  76. ccallparanode.create(genintconstnode(0),nil));
  77. end;
  78. { in general, generate regular expression rather than intrinsics: according
  79. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  80. good at reasoning about general control flow and arithmetic, it is not
  81. anywhere near as strong at reasoning about the various intrinsics. If
  82. profitable for code generation purposes, the optimizer will likely form
  83. the intrinsics itself late in the optimization pipeline." }
  84. function tllvminlinenode.first_abs_real: tnode;
  85. var
  86. lefttemp,
  87. resulttemp: ttempcreatenode;
  88. stat: tstatementnode;
  89. begin
  90. result:=internalstatements(stat);
  91. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  92. { assigned twice -> will be spilled if put in register }
  93. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  94. addstatement(stat,lefttemp);
  95. addstatement(stat,resulttemp);
  96. { lefttemp:=left }
  97. addstatement(stat,
  98. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  99. );
  100. { if lefttemp>=0 then
  101. resulttemp:=lefttemp
  102. else
  103. resulttemp:=-lefttemp
  104. }
  105. addstatement(stat,
  106. cifnode.create(
  107. caddnode.create(
  108. gten,
  109. ctemprefnode.create(lefttemp),
  110. crealconstnode.create(0.0,left.resultdef)
  111. ),
  112. cassignmentnode.create(
  113. ctemprefnode.create(resulttemp),
  114. ctemprefnode.create(lefttemp)
  115. ),
  116. cassignmentnode.create(
  117. ctemprefnode.create(resulttemp),
  118. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  119. )
  120. )
  121. );
  122. addstatement(stat,ctempdeletenode.create(lefttemp));
  123. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  124. { return resulttemp }
  125. addstatement(stat,ctemprefnode.create(resulttemp));
  126. { reused }
  127. left:=nil;
  128. end;
  129. function tllvminlinenode.first_bitscan: tnode;
  130. var
  131. leftdef: tdef;
  132. resulttemp,
  133. lefttemp: ttempcreatenode;
  134. stat: tstatementnode;
  135. block: tblocknode;
  136. cntresult: tnode;
  137. procname: string[15];
  138. begin
  139. {
  140. if left<>0 then
  141. result:=llvm_ctlz/cttz(unsigned(left),true)
  142. else
  143. result:=255;
  144. }
  145. if inlinenumber=in_bsr_x then
  146. procname:='LLVM_CTLZ'
  147. else
  148. procname:='LLVM_CTTZ';
  149. leftdef:=left.resultdef;
  150. block:=internalstatements(stat);
  151. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  152. addstatement(stat,resulttemp);
  153. lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
  154. cntresult:=
  155. ccallnode.createintern(
  156. procname,
  157. ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
  158. ccallparanode.create(
  159. ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
  160. )
  161. )
  162. );
  163. { ctlz returns the number of leading zero bits, while bsr returns the bit
  164. number of the first non-zero bit (with the least significant bit as 0)
  165. -> invert result }
  166. if inlinenumber=in_bsr_x then
  167. begin
  168. cntresult:=
  169. caddnode.create(xorn,
  170. cntresult,
  171. genintconstnode(leftdef.size*8-1)
  172. );
  173. end;
  174. addstatement(stat,
  175. cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
  176. cassignmentnode.create(
  177. ctemprefnode.create(resulttemp),
  178. cntresult
  179. ),
  180. cassignmentnode.create(
  181. ctemprefnode.create(resulttemp),
  182. genintconstnode(255)
  183. )
  184. )
  185. );
  186. if assigned(lefttemp) then
  187. addstatement(stat,ctempdeletenode.create(lefttemp));
  188. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  189. addstatement(stat,ctemprefnode.create(resulttemp));
  190. left:=nil;
  191. result:=block;
  192. end;
  193. function tllvminlinenode.first_fma: tnode;
  194. var
  195. procname: string[15];
  196. begin
  197. case inlinenumber of
  198. in_fma_single:
  199. procname:='llvm_fma_f32';
  200. in_fma_double:
  201. procname:='llvm_fma_f64';
  202. in_fma_extended:
  203. procname:='llvm_fma_f80';
  204. in_fma_float128:
  205. procname:='llvm_fma_f128';
  206. else
  207. internalerror(2018122101);
  208. end;
  209. result:=ccallnode.createintern(procname,left);
  210. left:=nil;
  211. end;
  212. function tllvminlinenode.first_sqr_real: tnode;
  213. begin
  214. result:=nil;
  215. if use_vectorfpu(left.resultdef) then
  216. expectloc:=LOC_MMREGISTER
  217. else
  218. expectloc:=LOC_FPUREGISTER;
  219. end;
  220. function tllvminlinenode.first_sqrt_real: tnode;
  221. var
  222. intrinsic: string[20];
  223. begin
  224. if left.resultdef.typ<>floatdef then
  225. internalerror(2018121601);
  226. case tfloatdef(left.resultdef).floattype of
  227. s32real:
  228. intrinsic:='llvm_sqrt_f32';
  229. s64real:
  230. intrinsic:='llvm_sqrt_f64';
  231. s80real,sc80real:
  232. intrinsic:='llvm_sqrt_f80';
  233. s128real:
  234. intrinsic:='llvm_sqrt_f128';
  235. else
  236. internalerror(2018121602);
  237. end;
  238. result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
  239. left:=nil;
  240. end;
  241. function tllvminlinenode.first_trunc_real: tnode;
  242. begin
  243. { fptosi is undefined if the value is out of range -> only generate
  244. in cast of fastmath }
  245. if cs_opt_fastmath in current_settings.optimizerswitches then
  246. begin
  247. maybe_remove_round_trunc_typeconv;
  248. expectloc:=LOC_REGISTER;
  249. result:=nil;
  250. end
  251. else
  252. result:=inherited;
  253. end;
  254. procedure tllvminlinenode.second_length;
  255. var
  256. lengthlab, nillab: tasmlabel;
  257. hregister: tregister;
  258. href, tempref: treference;
  259. lendef: tdef;
  260. begin
  261. secondpass(left);
  262. if is_shortstring(left.resultdef) then
  263. begin
  264. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  265. internalerror(2014080806);
  266. { typecast the shortstring reference into a length byte reference }
  267. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  268. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  269. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  270. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  271. end
  272. else
  273. begin
  274. tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,tempref);
  275. { length in ansi/wide strings and high in dynamic arrays is at offset
  276. -sizeof(sizeint), for widestrings it's at -4 }
  277. if is_widestring(left.resultdef) then
  278. lendef:=u32inttype
  279. else
  280. lendef:=ossinttype;
  281. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  282. left.resultdef,cpointerdef.getreusable(lendef),true);
  283. current_asmdata.getjumplabel(nillab);
  284. current_asmdata.getjumplabel(lengthlab);
  285. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  286. { volatility of the ansistring/widestring refers to the volatility of the
  287. string pointer, not of the string data }
  288. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  289. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  290. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  291. if is_widestring(left.resultdef) then
  292. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  293. { Dynamic arrays do not have their length attached but their maximum index }
  294. if is_dynamic_array(left.resultdef) then
  295. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,hregister);
  296. hlcg.a_load_reg_ref(current_asmdata.CurrAsmList,resultdef,resultdef,hregister,tempref);
  297. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  298. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  299. hlcg.a_load_const_ref(current_asmdata.CurrAsmList,resultdef,0,tempref);
  300. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  301. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  302. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,resultdef,resultdef,tempref,hregister);
  303. tg.ungettemp(current_asmdata.CurrAsmList,tempref);
  304. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  305. location.register:=hregister;
  306. end;
  307. end;
  308. procedure tllvminlinenode.second_sqr_real;
  309. begin
  310. secondpass(left);
  311. location.loc:=expectloc;
  312. if expectloc=LOC_MMREGISTER then
  313. begin
  314. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  315. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  316. end
  317. else
  318. begin
  319. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  320. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  321. end;
  322. current_asmdata.CurrAsmList.concat(
  323. taillvm.op_reg_size_reg_reg(la_fmul,
  324. location.register,resultdef,
  325. left.location.register,left.location.register
  326. )
  327. );
  328. end;
  329. procedure tllvminlinenode.second_trunc_real;
  330. begin
  331. secondpass(left);
  332. if use_vectorfpu(left.resultdef) then
  333. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  334. else
  335. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  336. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  337. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  338. current_asmdata.CurrAsmList.concat(
  339. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  340. );
  341. end;
  342. begin
  343. cinlinenode:=tllvminlinenode;
  344. end.