nppcmat.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate PowerPC assembler for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nppcmat;
  18. {$I fpcdefs.inc}
  19. interface
  20. uses
  21. node, nmat, ncgmat;
  22. type
  23. tppcmoddivnode = class(tmoddivnode)
  24. function pass_1: tnode; override;
  25. procedure pass_generate_code override;
  26. end;
  27. tppcshlshrnode = class(tshlshrnode)
  28. procedure pass_generate_code override;
  29. end;
  30. tppcunaryminusnode = class(tunaryminusnode)
  31. procedure pass_generate_code override;
  32. end;
  33. tppcnotnode = class(tcgnotnode)
  34. procedure pass_generate_code override;
  35. end;
  36. implementation
  37. uses
  38. sysutils,
  39. globtype, systems,constexp,
  40. cutils, verbose, globals,
  41. symconst, symdef,
  42. aasmbase, aasmcpu, aasmtai,aasmdata,
  43. defutil,
  44. cgbase, cgutils, cgobj, hlcgobj, pass_1, pass_2,
  45. ncon, procinfo, nbas, nld, nadd,
  46. cpubase, cpuinfo,
  47. ncgutil, cgcpu, rgobj;
  48. {*****************************************************************************
  49. TPPCMODDIVNODE
  50. *****************************************************************************}
  51. function tppcmoddivnode.pass_1: tnode;
  52. var
  53. statementnode : tstatementnode;
  54. temp_left, temp_right : ttempcreatenode;
  55. left_copy, right_copy : tnode;
  56. block : tblocknode;
  57. begin
  58. result := nil;
  59. (*
  60. // this code replaces all mod nodes by the equivalent div/mul/sub sequence
  61. // on node level, which might be advantageous when doing CSE on that level
  62. // However, optimal modulo code for some cases (in particular a 'x mod 2^n-1'
  63. // operation) can not be expressed using nodes, so this is commented out for now
  64. if (nodetype = modn) then begin
  65. block := internalstatements(statementnode);
  66. temp_left := ctempcreatenode.create(left.resultdef, left.resultdef.size, tt_persistent, true);
  67. addstatement(statementnode, temp_left);
  68. addstatement(statementnode, cassignmentnode.create(ctemprefnode.create(temp_left), left.getcopy));
  69. if (right.nodetype <> ordconstn) then begin
  70. // implementated optimization: use temps to store the right value, otherwise
  71. // it is calculated twice when simply copying it which might result in side
  72. // effects
  73. temp_right := ctempcreatenode.create(right.resultdef, right.resultdef.size, tt_persistent, true);
  74. addstatement(statementnode, temp_right);
  75. addstatement(statementnode, cassignmentnode.create(ctemprefnode.create(temp_right), right.getcopy));
  76. addstatement(statementnode, cassignmentnode.create(ctemprefnode.create(temp_left),
  77. caddnode.create(subn, ctemprefnode.create(temp_left),
  78. caddnode.create(muln, cmoddivnode.create(divn, ctemprefnode.create(temp_left), ctemprefnode.create(temp_right)),
  79. ctemprefnode.create(temp_right)))));
  80. addstatement(statementnode, ctempdeletenode.create(temp_right));
  81. end else begin
  82. // in case this is a modulo by a constant operation, do not use a temp for the
  83. // right hand side, because otherwise the div optimization will not recognize this
  84. // fact (and there is no constant propagator/recognizer in the compiler),
  85. // resulting in suboptimal code.
  86. addstatement(statementnode, cassignmentnode.create(ctemprefnode.create(temp_left),
  87. caddnode.create(subn, ctemprefnode.create(temp_left),
  88. caddnode.create(muln, cmoddivnode.create(divn, ctemprefnode.create(temp_left), right.getcopy),
  89. right.getcopy))));
  90. end;
  91. addstatement(statementnode, ctempdeletenode.create_normal_temp(temp_left));
  92. addstatement(statementnode, ctemprefnode.create(temp_left));
  93. result := block;
  94. end;
  95. *)
  96. if (not assigned(result)) then
  97. result := inherited pass_1;
  98. if not assigned(result) then
  99. include(current_procinfo.flags, pi_do_call);
  100. end;
  101. procedure tppcmoddivnode.pass_generate_code;
  102. const { signed overflow }
  103. divops: array[boolean, boolean] of tasmop =
  104. ((A_DIVDU, A_DIVDU_),(A_DIVD, A_DIVDO_));
  105. divcgops : array[boolean] of TOpCG = (OP_DIV, OP_IDIV);
  106. zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
  107. tcgsize2native : array[OS_8..OS_S128] of tcgsize = (
  108. OS_64, OS_64, OS_64, OS_64, OS_NO,
  109. OS_S64, OS_S64, OS_S64, OS_S64, OS_NO
  110. );
  111. var
  112. power : longint;
  113. op : tasmop;
  114. numerator, divider,
  115. resultreg : tregister;
  116. size : TCgSize;
  117. hl : tasmlabel;
  118. done: boolean;
  119. procedure genOrdConstNodeMod;
  120. var
  121. modreg, maskreg, tempreg : tregister;
  122. isNegPower : boolean;
  123. begin
  124. if (tordconstnode(right).value = 0) then begin
  125. internalerror(2005061702);
  126. end else if (abs(tordconstnode(right).value.svalue) = 1) then begin
  127. { x mod +/-1 is always zero }
  128. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, 0, resultreg);
  129. end else if (ispowerof2(tordconstnode(right).value, power)) then begin
  130. if (is_signed(right.resultdef)) then begin
  131. tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  132. maskreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  133. modreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  134. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, abs(tordconstnode(right).value.svalue)-1, modreg);
  135. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 63, numerator, maskreg);
  136. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, numerator, modreg, tempreg);
  137. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
  138. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
  139. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
  140. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, modreg, maskreg, maskreg);
  141. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_INT, maskreg, tempreg, resultreg);
  142. end else begin
  143. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).value-1, numerator,
  144. resultreg);
  145. end;
  146. end else begin
  147. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, divCgOps[is_signed(right.resultdef)], OS_INT,
  148. tordconstnode(right).value.svalue, numerator, resultreg);
  149. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_MUL, OS_INT, tordconstnode(right).value.svalue, resultreg,
  150. resultreg);
  151. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, resultreg, numerator, resultreg);
  152. end;
  153. end;
  154. begin
  155. secondpass(left);
  156. secondpass(right);
  157. location_copy(location,left.location);
  158. { put numerator in register }
  159. size:=def_cgsize(left.resultdef);
  160. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  161. left.resultdef,left.resultdef,true);
  162. location_copy(location,left.location);
  163. numerator := location.register;
  164. resultreg := location.register;
  165. if (location.loc = LOC_CREGISTER) then begin
  166. location.loc := LOC_REGISTER;
  167. location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
  168. resultreg := location.register;
  169. end else if (nodetype = modn) or (right.nodetype = ordconstn) then begin
  170. { for a modulus op, and for const nodes we need the result register
  171. to be an extra register }
  172. resultreg := cg.getintregister(current_asmdata.CurrAsmList,size);
  173. end;
  174. done := false;
  175. if (cs_opt_level1 in current_settings.optimizerswitches) and (right.nodetype = ordconstn) then begin
  176. if (nodetype = divn) then
  177. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, divCgOps[is_signed(right.resultdef)],
  178. size, tordconstnode(right).value.svalue, numerator, resultreg)
  179. else
  180. genOrdConstNodeMod;
  181. done := true;
  182. end;
  183. if (not done) then begin
  184. { load divider in a register if necessary }
  185. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  186. if (right.nodetype <> ordconstn) then
  187. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
  188. right.location.register, 0))
  189. else begin
  190. if (tordconstnode(right).value = 0) then
  191. internalerror(2005100301);
  192. end;
  193. divider := right.location.register;
  194. { select the correct opcode according to the sign of the result, whether we need
  195. overflow checking }
  196. op := divops[is_signed(right.resultdef), cs_check_overflow in current_settings.localswitches];
  197. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
  198. divider));
  199. if (nodetype = modn) then begin
  200. { multiply with the divisor again, taking care of the correct size }
  201. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
  202. divider,resultreg));
  203. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
  204. numerator,resultreg));
  205. resultreg := location.register;
  206. end;
  207. end;
  208. { set result location }
  209. location.loc:=LOC_REGISTER;
  210. location.register:=resultreg;
  211. if right.nodetype <> ordconstn then begin
  212. current_asmdata.getjumplabel(hl);
  213. current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_BC,zerocond,hl));
  214. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
  215. cg.a_label(current_asmdata.CurrAsmList,hl);
  216. end;
  217. { unsigned division/module can only overflow in case of division by zero
  218. (but checking this overflow flag is more convoluted than performing a
  219. simple comparison with 0) }
  220. if is_signed(right.resultdef) then
  221. cg.g_overflowcheck(current_asmdata.CurrAsmList,location,resultdef);
  222. end;
  223. {*****************************************************************************
  224. TPPCSHLRSHRNODE
  225. *****************************************************************************}
  226. procedure tppcshlshrnode.pass_generate_code;
  227. var
  228. resultreg, hregister1, hregister2 : tregister;
  229. op: topcg;
  230. asmop1, asmop2: tasmop;
  231. shiftval: aint;
  232. begin
  233. secondpass(left);
  234. secondpass(right);
  235. { load left operators in a register }
  236. hlcg.location_force_reg(current_asmdata.CurrAsmList, left.location,
  237. left.resultdef, left.resultdef, true);
  238. location_copy(location, left.location);
  239. resultreg := location.register;
  240. hregister1 := location.register;
  241. if (location.loc = LOC_CREGISTER) then begin
  242. location.loc := LOC_REGISTER;
  243. resultreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  244. location.register := resultreg;
  245. end;
  246. { determine operator }
  247. if nodetype = shln then
  248. op := OP_SHL
  249. else
  250. op := OP_SHR;
  251. { shifting by a constant directly coded: }
  252. if (right.nodetype = ordconstn) then begin
  253. // result types with size < 32 bits have their shift values masked
  254. // differently... :/
  255. shiftval := tordconstnode(right).value and (tcgsize2size[def_cgsize(resultdef)] * 8 -1);
  256. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, op, def_cgsize(resultdef),
  257. shiftval, hregister1, resultreg)
  258. end else begin
  259. { load shift count in a register if necessary }
  260. hlcg.location_force_reg(current_asmdata.CurrAsmList, right.location,
  261. right.resultdef, right.resultdef, true);
  262. hregister2 := right.location.register;
  263. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, op, def_cgsize(resultdef), hregister2,
  264. hregister1, resultreg);
  265. end;
  266. end;
  267. {*****************************************************************************
  268. TPPCUNARYMINUSNODE
  269. *****************************************************************************}
  270. procedure tppcunaryminusnode.pass_generate_code;
  271. var
  272. src1: tregister;
  273. op: tasmop;
  274. begin
  275. secondpass(left);
  276. begin
  277. if left.location.loc in [LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then
  278. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  279. location_copy(location, left.location);
  280. location.loc := LOC_REGISTER;
  281. case left.location.loc of
  282. LOC_FPUREGISTER, LOC_REGISTER:
  283. begin
  284. src1 := left.location.register;
  285. location.register := src1;
  286. end;
  287. LOC_CFPUREGISTER, LOC_CREGISTER:
  288. begin
  289. src1 := left.location.register;
  290. if left.location.loc = LOC_CREGISTER then
  291. location.register := cg.getintregister(current_asmdata.CurrAsmList, OS_INT)
  292. else
  293. location.register := cg.getfpuregister(current_asmdata.CurrAsmList, location.size);
  294. end;
  295. LOC_REFERENCE, LOC_CREFERENCE:
  296. begin
  297. if (left.resultdef.typ = floatdef) then begin
  298. src1 := cg.getfpuregister(current_asmdata.CurrAsmList,
  299. left.location.size);
  300. location.register := src1;
  301. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  302. left.location.size,left.location.size,
  303. left.location.reference, src1);
  304. end else begin
  305. src1 := cg.getintregister(current_asmdata.CurrAsmList, OS_64);
  306. location.register := src1;
  307. cg.a_load_ref_reg(current_asmdata.CurrAsmList, OS_64, OS_64,
  308. left.location.reference, src1);
  309. end;
  310. end;
  311. else
  312. internalerror(2013120112);
  313. end;
  314. { choose appropriate operand }
  315. if left.resultdef.typ <> floatdef then begin
  316. if not (cs_check_overflow in current_settings.localswitches) then
  317. op := A_NEG
  318. else
  319. op := A_NEGO_;
  320. location.loc := LOC_REGISTER;
  321. end else begin
  322. op := A_FNEG;
  323. location.loc := LOC_FPUREGISTER;
  324. end;
  325. { emit operation }
  326. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op, location.register, src1));
  327. end;
  328. cg.g_overflowcheck(current_asmdata.CurrAsmList, location, resultdef);
  329. end;
  330. {*****************************************************************************
  331. TPPCNOTNODE
  332. *****************************************************************************}
  333. procedure tppcnotnode.pass_generate_code;
  334. begin
  335. if is_boolean(resultdef) then
  336. begin
  337. if not handle_locjump then
  338. begin
  339. secondpass(left);
  340. case left.location.loc of
  341. LOC_FLAGS:
  342. begin
  343. location_copy(location, left.location);
  344. inverse_flags(location.resflags);
  345. end;
  346. LOC_REGISTER, LOC_CREGISTER,
  347. LOC_REFERENCE, LOC_CREFERENCE,
  348. LOC_SUBSETREG, LOC_CSUBSETREG,
  349. LOC_SUBSETREF, LOC_CSUBSETREF:
  350. begin
  351. hlcg.location_force_reg(current_asmdata.CurrAsmList, left.location,
  352. left.resultdef, left.resultdef, true);
  353. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMPDI,
  354. left.location.register, 0));
  355. location_reset(location, LOC_FLAGS, OS_NO);
  356. location.resflags.cr := RS_CR0;
  357. location.resflags.flag := F_EQ;
  358. end;
  359. else
  360. internalerror(2003042401);
  361. end;
  362. end;
  363. end
  364. else
  365. begin
  366. secondpass(left);
  367. hlcg.location_force_reg(current_asmdata.CurrAsmList, left.location,
  368. left.resultdef, left.resultdef, true);
  369. location_copy(location, left.location);
  370. location.loc := LOC_REGISTER;
  371. location.register := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  372. { perform the NOT operation }
  373. cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_NOT, def_cgsize(resultdef),
  374. left.location.register,
  375. location.register);
  376. end;
  377. end;
  378. begin
  379. cmoddivnode := tppcmoddivnode;
  380. cshlshrnode := tppcshlshrnode;
  381. cunaryminusnode := tppcunaryminusnode;
  382. cnotnode := tppcnotnode;
  383. end.