nllvminl.pas 11 KB


  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_sqr_real: tnode; override;
  30. function first_sqrt_real: tnode; override;
  31. function first_trunc_real: tnode; override;
  32. public
  33. procedure second_length; override;
  34. procedure second_sqr_real; override;
  35. procedure second_trunc_real; override;
  36. end;
  37. implementation
  38. uses
  39. verbose,globals,globtype,constexp,
  40. aasmbase, aasmdata,
  41. symconst,symtype,symdef,defutil,
  42. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  43. pass_2,
  44. cgbase,cgutils,tgobj,hlcgobj,
  45. cpubase,
  46. llvmbase,aasmllvm;
  47. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  48. var
  49. temp: tnode;
  50. begin
  51. { the prototype of trunc()/round() in the system unit is declared
  52. with valreal as parameter type, so the argument will always be
  53. extended -> remove the typeconversion to extended if any; not done
  54. in ninl, because there are other code generators that assume that
  55. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  56. (copy from code in nx64inl, should be refactored)
  57. }
  58. if (left.nodetype=typeconvn) and
  59. not(nf_explicit in left.flags) and
  60. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  61. begin
  62. { get rid of the type conversion, so the use_vectorfpu will be
  63. applied to the original type }
  64. temp:=ttypeconvnode(left).left;
  65. ttypeconvnode(left).left:=nil;
  66. left.free;
  67. left:=temp;
  68. end;
  69. end;
  70. function tllvminlinenode.first_get_frame: tnode;
  71. begin
  72. result:=ccallnode.createintern('llvm_frameaddress',
  73. ccallparanode.create(genintconstnode(0),nil));
  74. end;
  75. { in general, generate regular expression rather than intrinsics: according
  76. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  77. good at reasoning about general control flow and arithmetic, it is not
  78. anywhere near as strong at reasoning about the various intrinsics. If
  79. profitable for code generation purposes, the optimizer will likely form
  80. the intrinsics itself late in the optimization pipeline." }
  81. function tllvminlinenode.first_abs_real: tnode;
  82. var
  83. lefttemp,
  84. resulttemp: ttempcreatenode;
  85. stat: tstatementnode;
  86. begin
  87. result:=internalstatements(stat);
  88. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  89. { assigned twice -> will be spilled if put in register }
  90. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  91. addstatement(stat,lefttemp);
  92. addstatement(stat,resulttemp);
  93. { lefttemp:=left }
  94. addstatement(stat,
  95. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  96. );
  97. { if lefttemp>=0 then
  98. resulttemp:=lefttemp
  99. else
  100. resulttemp:=-lefttemp
  101. }
  102. addstatement(stat,
  103. cifnode.create(
  104. caddnode.create(
  105. gten,
  106. ctemprefnode.create(lefttemp),
  107. crealconstnode.create(0.0,left.resultdef)
  108. ),
  109. cassignmentnode.create(
  110. ctemprefnode.create(resulttemp),
  111. ctemprefnode.create(lefttemp)
  112. ),
  113. cassignmentnode.create(
  114. ctemprefnode.create(resulttemp),
  115. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  116. )
  117. )
  118. );
  119. addstatement(stat,ctempdeletenode.create(lefttemp));
  120. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  121. { return resulttemp }
  122. addstatement(stat,ctemprefnode.create(resulttemp));
  123. { reused }
  124. left:=nil;
  125. end;
  126. function tllvminlinenode.first_sqr_real: tnode;
  127. begin
  128. result:=nil;
  129. if use_vectorfpu(left.resultdef) then
  130. expectloc:=LOC_MMREGISTER
  131. else
  132. expectloc:=LOC_FPUREGISTER;
  133. end;
  134. function tllvminlinenode.first_sqrt_real: tnode;
  135. var
  136. intrinsic: string[20];
  137. begin
  138. if left.resultdef.typ<>floatdef then
  139. internalerror(2018121601);
  140. case tfloatdef(left.resultdef).floattype of
  141. s32real:
  142. intrinsic:='llvm_sqrt_f32';
  143. s64real:
  144. intrinsic:='llvm_sqrt_f64';
  145. s80real,sc80real:
  146. intrinsic:='llvm_sqrt_f80';
  147. s128real:
  148. intrinsic:='llvm_sqrt_f128';
  149. else
  150. internalerror(2018121602);
  151. end;
  152. result:=ccallnode.createinternfromunit('SYSTEM',intrinsic, ccallparanode.create(left,nil));
  153. left:=nil;
  154. end;
  155. function tllvminlinenode.first_trunc_real: tnode;
  156. begin
  157. { fptosi is undefined if the value is out of range -> only generate
  158. in cast of fastmath }
  159. if cs_opt_fastmath in current_settings.optimizerswitches then
  160. begin
  161. maybe_remove_round_trunc_typeconv;
  162. expectloc:=LOC_REGISTER;
  163. result:=nil;
  164. end
  165. else
  166. result:=inherited;
  167. end;
  168. procedure tllvminlinenode.second_length;
  169. var
  170. lengthlab, nillab: tasmlabel;
  171. hregister: tregister;
  172. href, tempref: treference;
  173. lendef: tdef;
  174. begin
  175. secondpass(left);
  176. if is_shortstring(left.resultdef) then
  177. begin
  178. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  179. internalerror(2014080806);
  180. { typecast the shortstring reference into a length byte reference }
  181. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  182. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  183. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  184. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  185. end
  186. else
  187. begin
  188. tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,tempref);
  189. { length in ansi/wide strings and high in dynamic arrays is at offset
  190. -sizeof(sizeint), for widestrings it's at -4 }
  191. if is_widestring(left.resultdef) then
  192. lendef:=u32inttype
  193. else
  194. lendef:=ossinttype;
  195. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  196. left.resultdef,cpointerdef.getreusable(lendef),true);
  197. current_asmdata.getjumplabel(nillab);
  198. current_asmdata.getjumplabel(lengthlab);
  199. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  200. { volatility of the ansistring/widestring refers to the volatility of the
  201. string pointer, not of the string data }
  202. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  203. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  204. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  205. if is_widestring(left.resultdef) then
  206. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  207. { Dynamic arrays do not have their length attached but their maximum index }
  208. if is_dynamic_array(left.resultdef) then
  209. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,hregister);
  210. hlcg.a_load_reg_ref(current_asmdata.CurrAsmList,resultdef,resultdef,hregister,tempref);
  211. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  212. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  213. hlcg.a_load_const_ref(current_asmdata.CurrAsmList,resultdef,0,tempref);
  214. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  215. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  216. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,resultdef,resultdef,tempref,hregister);
  217. tg.ungettemp(current_asmdata.CurrAsmList,tempref);
  218. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  219. location.register:=hregister;
  220. end;
  221. end;
  222. procedure tllvminlinenode.second_sqr_real;
  223. begin
  224. secondpass(left);
  225. location.loc:=expectloc;
  226. if expectloc=LOC_MMREGISTER then
  227. begin
  228. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  229. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  230. end
  231. else
  232. begin
  233. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  234. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  235. end;
  236. current_asmdata.CurrAsmList.concat(
  237. taillvm.op_reg_size_reg_reg(la_fmul,
  238. location.register,resultdef,
  239. left.location.register,left.location.register
  240. )
  241. );
  242. end;
  243. procedure tllvminlinenode.second_trunc_real;
  244. begin
  245. secondpass(left);
  246. if use_vectorfpu(left.resultdef) then
  247. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  248. else
  249. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  250. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  251. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  252. current_asmdata.CurrAsmList.concat(
  253. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  254. );
  255. end;
  256. begin
  257. cinlinenode:=tllvminlinenode;
  258. end.