ncpumat.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. {
  2. Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
  3. Generate AArch64 assembler for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpumat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nmat,ncgmat;
  22. type
  23. taarch64moddivnode = class(tmoddivnode)
  24. function pass_1: tnode; override;
  25. procedure pass_generate_code;override;
  26. end;
  27. taarch64notnode = class(tcgnotnode)
  28. procedure second_boolean;override;
  29. end;
  30. taarch64unaryminusnode = class(tcgunaryminusnode)
  31. procedure second_float; override;
  32. end;
  33. implementation
  34. uses
  35. globtype,systems,constexp,
  36. cutils,verbose,globals,
  37. symconst,symdef,
  38. aasmbase,aasmcpu,aasmtai,aasmdata,
  39. defutil,
  40. cgbase,cgobj,hlcgobj,pass_2,procinfo,
  41. ncon,
  42. cpubase,
  43. ncgutil,cgcpu,cgutils;
  44. {*****************************************************************************
  45. taarch64moddivnode
  46. *****************************************************************************}
  47. function taarch64moddivnode.pass_1: tnode;
  48. begin
  49. result:=inherited pass_1;
  50. if not assigned(result) then
  51. include(current_procinfo.flags,pi_do_call);
  52. end;
  53. procedure taarch64moddivnode.pass_generate_code;
  54. var
  55. op : tasmop;
  56. tmpreg,
  57. numerator,
  58. divider,
  59. resultreg : tregister;
  60. hl : tasmlabel;
  61. overflowloc: tlocation;
  62. power: longint;
  63. procedure genOrdConstNodeDiv;
  64. var
  65. helper1, helper2: TRegister;
  66. so: tshifterop;
  67. opsize: TCgSize;
  68. begin
  69. opsize:=def_cgsize(resultdef);
  70. if tordconstnode(right).value=0 then
  71. internalerror(2020021601)
  72. else if tordconstnode(right).value=1 then
  73. cg.a_load_reg_reg(current_asmdata.CurrAsmList, opsize, opsize, numerator, resultreg)
  74. else if (tordconstnode(right).value = int64(-1)) then
  75. begin
  76. // note: only in the signed case possible..., may overflow
  77. if cs_check_overflow in current_settings.localswitches then
  78. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  79. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
  80. resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
  81. end
  82. else if ispowerof2(tordconstnode(right).value,power) then
  83. begin
  84. if (is_signed(right.resultdef)) then
  85. begin
  86. helper2:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  87. if power = 1 then
  88. helper1:=numerator
  89. else
  90. begin
  91. helper1:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  92. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,resultdef.size*8-1,numerator,helper1);
  93. end;
  94. shifterop_reset(so);
  95. so.shiftmode:=SM_LSR;
  96. so.shiftimm:=resultdef.size*8-power;
  97. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
  98. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,def_cgsize(resultdef),power,helper2,resultreg);
  99. end
  100. else
  101. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,opsize,power,numerator,resultreg)
  102. end
  103. else
  104. { Everything else is handled in the generic code }
  105. cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,opsize,
  106. tordconstnode(right).value.svalue,numerator,resultreg);
  107. end;
  108. begin
  109. secondpass(left);
  110. secondpass(right);
  111. { avoid warning }
  112. divider:=NR_NO;
  113. { set result location }
  114. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  115. location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  116. resultreg:=location.register;
  117. { put numerator in register }
  118. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  119. numerator:=left.location.register;
  120. if (right.nodetype=ordconstn) and
  121. ((tordconstnode(right).value=1) or
  122. (tordconstnode(right).value=int64(-1)) or
  123. (tordconstnode(right).value=0) or
  124. ispowerof2(tordconstnode(right).value,power)) then
  125. begin
  126. genOrdConstNodeDiv;
  127. if nodetype=modn then
  128. begin
  129. divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  130. cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
  131. end;
  132. end
  133. else
  134. begin
  135. { load divider in a register }
  136. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  137. divider:=right.location.register;
  138. { start division }
  139. if is_signed(left.resultdef) then
  140. op:=A_SDIV
  141. else
  142. op:=A_UDIV;
  143. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
  144. end;
  145. { no divide-by-zero detection available in hardware, emulate (if it's a
  146. constant, this will have been detected earlier already) }
  147. if (right.nodetype<>ordconstn) then
  148. begin
  149. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
  150. right.location.register,0));
  151. current_asmdata.getjumplabel(hl);
  152. current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
  153. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
  154. cg.a_label(current_asmdata.CurrAsmList,hl);
  155. end;
  156. { in case of overflow checking, also check for low(int64) div (-1)
  157. (no hardware support for this either) }
  158. if (cs_check_overflow in current_settings.localswitches) and
  159. is_signed(left.resultdef) and
  160. ((right.nodetype<>ordconstn) or
  161. (tordconstnode(right).value=-1)) then
  162. begin
  163. { num=ffff... and div=8000... <=>
  164. num xor not(div xor 8000...) = 0
  165. (and we have the "eon" operation, which performs "xor not(...)" }
  166. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
  167. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
  168. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
  169. tmpreg,left.location.register,tmpreg));
  170. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
  171. { now the zero/equal flag is set in case we divided low(int64) by
  172. (-1) }
  173. location_reset(overflowloc,LOC_FLAGS,OS_NO);
  174. overflowloc.resflags:=F_EQ;
  175. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
  176. end;
  177. { in case of modulo, multiply result again by the divider and subtract
  178. from the numerator }
  179. if nodetype=modn then
  180. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
  181. resultreg,divider,numerator));
  182. end;
  183. {*****************************************************************************
  184. taarch64notnode
  185. *****************************************************************************}
  186. procedure taarch64notnode.second_boolean;
  187. begin
  188. secondpass(left);
  189. if not handle_locjump then
  190. begin
  191. case left.location.loc of
  192. LOC_FLAGS :
  193. begin
  194. location_copy(location,left.location);
  195. inverse_flags(location.resflags);
  196. end;
  197. LOC_REGISTER, LOC_CREGISTER,
  198. LOC_REFERENCE, LOC_CREFERENCE,
  199. LOC_SUBSETREG, LOC_CSUBSETREG,
  200. LOC_SUBSETREF, LOC_CSUBSETREF:
  201. begin
  202. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  203. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
  204. left.location.register,0));
  205. location_reset(location,LOC_FLAGS,OS_NO);
  206. location.resflags:=F_EQ;
  207. end;
  208. else
  209. internalerror(2003042401);
  210. end;
  211. end;
  212. end;
  213. {*****************************************************************************
  214. taarch64unaryminusnode
  215. *****************************************************************************}
  216. procedure taarch64unaryminusnode.second_float;
  217. begin
  218. secondpass(left);
  219. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  220. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  221. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  222. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
  223. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  224. end;
  225. begin
  226. cmoddivnode:=taarch64moddivnode;
  227. cnotnode:=taarch64notnode;
  228. cunaryminusnode:=taarch64unaryminusnode;
  229. end.