ncpumat.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. {
  2. Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
  3. Generate AArch64 assembler for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpumat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nmat,ncgmat;
  22. type
  23. taarch64moddivnode = class(tmoddivnode)
  24. function pass_1: tnode; override;
  25. procedure pass_generate_code;override;
  26. end;
  27. taarch64notnode = class(tcgnotnode)
  28. procedure second_boolean;override;
  29. end;
  30. taarch64unaryminusnode = class(tcgunaryminusnode)
  31. procedure second_float; override;
  32. end;
  33. implementation
  34. uses
  35. globtype,systems,constexp,
  36. cutils,verbose,globals,
  37. symconst,symdef,
  38. aasmbase,aasmcpu,aasmtai,aasmdata,
  39. defutil,
  40. cgbase,cgobj,hlcgobj,pass_2,procinfo,
  41. ncon,
  42. cpubase,
  43. ncgutil,cgcpu,cgutils;
  44. {*****************************************************************************
  45. taarch64moddivnode
  46. *****************************************************************************}
  47. function taarch64moddivnode.pass_1: tnode;
  48. begin
  49. result:=inherited pass_1;
  50. if not assigned(result) then
  51. include(current_procinfo.flags,pi_do_call);
  52. end;
  53. procedure taarch64moddivnode.pass_generate_code;
  54. var
  55. op : tasmop;
  56. tmpreg,
  57. numerator,
  58. divider,
  59. resultreg : tregister;
  60. hl : tasmlabel;
  61. overflowloc: tlocation;
  62. power: longint;
  63. procedure genOrdConstNodeDiv;
  64. var
  65. helper1, helper2: TRegister;
  66. so: tshifterop;
  67. begin
  68. if tordconstnode(right).value=0 then
  69. internalerror(2020021601)
  70. else if tordconstnode(right).value=1 then
  71. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
  72. else if (tordconstnode(right).value = int64(-1)) then
  73. begin
  74. // note: only in the signed case possible..., may overflow
  75. if cs_check_overflow in current_settings.localswitches then
  76. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  77. current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
  78. resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
  79. end
  80. else if ispowerof2(tordconstnode(right).value,power) then
  81. begin
  82. if (is_signed(right.resultdef)) then
  83. begin
  84. helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  85. if power = 1 then
  86. helper1:=numerator
  87. else
  88. begin
  89. helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  90. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,helper1);
  91. end;
  92. shifterop_reset(so);
  93. so.shiftmode:=SM_LSR;
  94. so.shiftimm:=64-power;
  95. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
  96. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,power,helper2,resultreg);
  97. end
  98. else
  99. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
  100. end
  101. else
  102. { Everything else is handled in the generic code }
  103. cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
  104. tordconstnode(right).value.svalue,numerator,resultreg);
  105. end;
  106. begin
  107. secondpass(left);
  108. secondpass(right);
  109. { avoid warning }
  110. divider:=NR_NO;
  111. { set result location }
  112. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  113. location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  114. resultreg:=location.register;
  115. { put numerator in register }
  116. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  117. numerator:=left.location.register;
  118. if (right.nodetype=ordconstn) and
  119. ((tordconstnode(right).value=1) or
  120. (tordconstnode(right).value=int64(-1)) or
  121. (tordconstnode(right).value=0) or
  122. ispowerof2(tordconstnode(right).value,power)) then
  123. begin
  124. genOrdConstNodeDiv;
  125. if nodetype=modn then
  126. begin
  127. divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  128. cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
  129. end;
  130. end
  131. else
  132. begin
  133. { load divider in a register }
  134. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  135. divider:=right.location.register;
  136. { start division }
  137. if is_signed(left.resultdef) then
  138. op:=A_SDIV
  139. else
  140. op:=A_UDIV;
  141. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
  142. end;
  143. { no divide-by-zero detection available in hardware, emulate (if it's a
  144. constant, this will have been detected earlier already) }
  145. if (right.nodetype<>ordconstn) then
  146. begin
  147. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
  148. right.location.register,0));
  149. current_asmdata.getjumplabel(hl);
  150. current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
  151. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
  152. cg.a_label(current_asmdata.CurrAsmList,hl);
  153. end;
  154. { in case of overflow checking, also check for low(int64) div (-1)
  155. (no hardware support for this either) }
  156. if (cs_check_overflow in current_settings.localswitches) and
  157. is_signed(left.resultdef) and
  158. ((right.nodetype<>ordconstn) or
  159. (tordconstnode(right).value=-1)) then
  160. begin
  161. { num=ffff... and div=8000... <=>
  162. num xor not(div xor 8000...) = 0
  163. (and we have the "eon" operation, which performs "xor not(...)" }
  164. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
  165. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
  166. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
  167. tmpreg,left.location.register,tmpreg));
  168. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
  169. { now the zero/equal flag is set in case we divided low(int64) by
  170. (-1) }
  171. location_reset(overflowloc,LOC_FLAGS,OS_NO);
  172. overflowloc.resflags:=F_EQ;
  173. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
  174. end;
  175. { in case of modulo, multiply result again by the divider and subtract
  176. from the numerator }
  177. if nodetype=modn then
  178. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
  179. resultreg,divider,numerator));
  180. end;
  181. {*****************************************************************************
  182. taarch64notnode
  183. *****************************************************************************}
  184. procedure taarch64notnode.second_boolean;
  185. begin
  186. secondpass(left);
  187. if not handle_locjump then
  188. begin
  189. case left.location.loc of
  190. LOC_FLAGS :
  191. begin
  192. location_copy(location,left.location);
  193. inverse_flags(location.resflags);
  194. end;
  195. LOC_REGISTER, LOC_CREGISTER,
  196. LOC_REFERENCE, LOC_CREFERENCE,
  197. LOC_SUBSETREG, LOC_CSUBSETREG,
  198. LOC_SUBSETREF, LOC_CSUBSETREF:
  199. begin
  200. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  201. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
  202. left.location.register,0));
  203. location_reset(location,LOC_FLAGS,OS_NO);
  204. location.resflags:=F_EQ;
  205. end;
  206. else
  207. internalerror(2003042401);
  208. end;
  209. end;
  210. end;
  211. {*****************************************************************************
  212. taarch64unaryminusnode
  213. *****************************************************************************}
  214. procedure taarch64unaryminusnode.second_float;
  215. begin
  216. secondpass(left);
  217. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  218. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  219. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  220. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
  221. cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
  222. end;
  223. begin
  224. cmoddivnode:=taarch64moddivnode;
  225. cnotnode:=taarch64notnode;
  226. cunaryminusnode:=taarch64unaryminusnode;
  227. end.