n386add.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. procedure second_addordinal; override;
  26. procedure second_add64bit;override;
  27. procedure second_cmp64bit;override;
  28. procedure second_mul(unsigned: boolean);
  29. end;
  30. implementation
  31. uses
  32. globtype,systems,
  33. cutils,verbose,globals,
  34. symconst,symdef,paramgr,defutil,
  35. aasmbase,aasmtai,aasmdata,aasmcpu,
  36. cgbase,procinfo,
  37. ncon,nset,cgutils,tgobj,
  38. cga,ncgutil,cgobj,cg64f32,cgx86,
  39. hlcgobj;
  40. {*****************************************************************************
  41. use_generic_mul32to64
  42. *****************************************************************************}
  43. function ti386addnode.use_generic_mul32to64: boolean;
  44. begin
  45. result := False;
  46. end;
  47. { handles all unsigned multiplications, and 32->64 bit signed ones.
  48. 32bit-only signed mul is handled by generic codegen }
  49. procedure ti386addnode.second_addordinal;
  50. var
  51. unsigned: boolean;
  52. begin
  53. unsigned:=not(is_signed(left.resultdef)) or
  54. not(is_signed(right.resultdef));
  55. if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
  56. second_mul(unsigned)
  57. else
  58. inherited second_addordinal;
  59. end;
  60. {*****************************************************************************
  61. Add64bit
  62. *****************************************************************************}
  63. procedure ti386addnode.second_add64bit;
  64. var
  65. op : TOpCG;
  66. op1,op2 : TAsmOp;
  67. opsize : TOpSize;
  68. hregister,
  69. hregister2 : tregister;
  70. hl4 : tasmlabel;
  71. mboverflow,
  72. unsigned:boolean;
  73. r:Tregister;
  74. begin
  75. firstcomplex(self);
  76. pass_left_right;
  77. op1:=A_NONE;
  78. op2:=A_NONE;
  79. mboverflow:=false;
  80. opsize:=S_L;
  81. unsigned:=((left.resultdef.typ=orddef) and
  82. (torddef(left.resultdef).ordtype=u64bit)) or
  83. ((right.resultdef.typ=orddef) and
  84. (torddef(right.resultdef).ordtype=u64bit));
  85. case nodetype of
  86. addn :
  87. begin
  88. op:=OP_ADD;
  89. mboverflow:=true;
  90. end;
  91. subn :
  92. begin
  93. op:=OP_SUB;
  94. op1:=A_SUB;
  95. op2:=A_SBB;
  96. mboverflow:=true;
  97. end;
  98. xorn:
  99. op:=OP_XOR;
  100. orn:
  101. op:=OP_OR;
  102. andn:
  103. op:=OP_AND;
  104. else
  105. begin
  106. { everything should be handled in pass_1 (JM) }
  107. internalerror(200109051);
  108. end;
  109. end;
  110. { left and right no register? }
  111. { then one must be demanded }
  112. if (left.location.loc<>LOC_REGISTER) then
  113. begin
  114. if (right.location.loc<>LOC_REGISTER) then
  115. begin
  116. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  117. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  118. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  119. location_reset(left.location,LOC_REGISTER,left.location.size);
  120. left.location.register64.reglo:=hregister;
  121. left.location.register64.reghi:=hregister2;
  122. end
  123. else
  124. begin
  125. location_swap(left.location,right.location);
  126. toggleflag(nf_swapped);
  127. end;
  128. end;
  129. { at this point, left.location.loc should be LOC_REGISTER }
  130. if right.location.loc=LOC_REGISTER then
  131. begin
  132. { when swapped another result register }
  133. if (nodetype=subn) and (nf_swapped in flags) then
  134. begin
  135. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  136. left.location.register64,
  137. right.location.register64);
  138. location_swap(left.location,right.location);
  139. toggleflag(nf_swapped);
  140. end
  141. else
  142. begin
  143. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  144. right.location.register64,
  145. left.location.register64);
  146. end;
  147. end
  148. else
  149. begin
  150. { right.location<>LOC_REGISTER }
  151. if (nodetype=subn) and (nf_swapped in flags) then
  152. begin
  153. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  154. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  155. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  156. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  157. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  158. { the carry flag is still ok }
  159. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  160. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  161. end
  162. else
  163. begin
  164. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  165. left.location.register64);
  166. end;
  167. location_freetemp(current_asmdata.CurrAsmList,right.location);
  168. end;
  169. { only in case of overflow operations }
  170. { produce overflow code }
  171. { we must put it here directly, because sign of operation }
  172. { is in unsigned VAR!! }
  173. if mboverflow then
  174. begin
  175. if cs_check_overflow in current_settings.localswitches then
  176. begin
  177. current_asmdata.getjumplabel(hl4);
  178. if unsigned then
  179. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  180. else
  181. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  182. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  183. cg.a_label(current_asmdata.CurrAsmList,hl4);
  184. end;
  185. end;
  186. location_copy(location,left.location);
  187. end;
  188. procedure ti386addnode.second_cmp64bit;
  189. var
  190. hregister,
  191. hregister2 : tregister;
  192. href : treference;
  193. unsigned : boolean;
  194. procedure firstjmp64bitcmp;
  195. var
  196. oldnodetype : tnodetype;
  197. begin
  198. {$ifdef OLDREGVARS}
  199. load_all_regvars(current_asmdata.CurrAsmList);
  200. {$endif OLDREGVARS}
  201. { the jump the sequence is a little bit hairy }
  202. case nodetype of
  203. ltn,gtn:
  204. begin
  205. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  206. { cheat a little bit for the negative test }
  207. toggleflag(nf_swapped);
  208. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  209. toggleflag(nf_swapped);
  210. end;
  211. lten,gten:
  212. begin
  213. oldnodetype:=nodetype;
  214. if nodetype=lten then
  215. nodetype:=ltn
  216. else
  217. nodetype:=gtn;
  218. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  219. { cheat for the negative test }
  220. if nodetype=ltn then
  221. nodetype:=gtn
  222. else
  223. nodetype:=ltn;
  224. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  225. nodetype:=oldnodetype;
  226. end;
  227. equaln:
  228. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  229. unequaln:
  230. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  231. end;
  232. end;
  233. procedure secondjmp64bitcmp;
  234. begin
  235. { the jump the sequence is a little bit hairy }
  236. case nodetype of
  237. ltn,gtn,lten,gten:
  238. begin
  239. { the comparisaion of the low dword have to be }
  240. { always unsigned! }
  241. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),current_procinfo.CurrTrueLabel);
  242. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  243. end;
  244. equaln:
  245. begin
  246. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  247. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
  248. end;
  249. unequaln:
  250. begin
  251. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  252. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  253. end;
  254. end;
  255. end;
  256. begin
  257. firstcomplex(self);
  258. pass_left_right;
  259. unsigned:=((left.resultdef.typ=orddef) and
  260. (torddef(left.resultdef).ordtype=u64bit)) or
  261. ((right.resultdef.typ=orddef) and
  262. (torddef(right.resultdef).ordtype=u64bit));
  263. { left and right no register? }
  264. { then one must be demanded }
  265. if (left.location.loc<>LOC_REGISTER) then
  266. begin
  267. if (right.location.loc<>LOC_REGISTER) then
  268. begin
  269. { we can reuse a CREGISTER for comparison }
  270. if (left.location.loc<>LOC_CREGISTER) then
  271. begin
  272. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  273. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  274. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  275. location_freetemp(current_asmdata.CurrAsmList,left.location);
  276. location_reset(left.location,LOC_REGISTER,left.location.size);
  277. left.location.register64.reglo:=hregister;
  278. left.location.register64.reghi:=hregister2;
  279. end;
  280. end
  281. else
  282. begin
  283. location_swap(left.location,right.location);
  284. toggleflag(nf_swapped);
  285. end;
  286. end;
  287. { at this point, left.location.loc should be LOC_REGISTER }
  288. if right.location.loc=LOC_REGISTER then
  289. begin
  290. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  291. firstjmp64bitcmp;
  292. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  293. secondjmp64bitcmp;
  294. end
  295. else
  296. begin
  297. case right.location.loc of
  298. LOC_CREGISTER :
  299. begin
  300. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  301. firstjmp64bitcmp;
  302. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  303. secondjmp64bitcmp;
  304. end;
  305. LOC_CREFERENCE,
  306. LOC_REFERENCE :
  307. begin
  308. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  309. href:=right.location.reference;
  310. inc(href.offset,4);
  311. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  312. firstjmp64bitcmp;
  313. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  314. secondjmp64bitcmp;
  315. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  316. location_freetemp(current_asmdata.CurrAsmList,right.location);
  317. end;
  318. LOC_CONSTANT :
  319. begin
  320. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  321. firstjmp64bitcmp;
  322. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  323. secondjmp64bitcmp;
  324. end;
  325. else
  326. internalerror(200203282);
  327. end;
  328. end;
  329. { we have LOC_JUMP as result }
  330. location_reset(location,LOC_JUMP,OS_NO)
  331. end;
  332. {*****************************************************************************
  333. x86 MUL
  334. *****************************************************************************}
  335. procedure ti386addnode.second_mul(unsigned: boolean);
  336. var reg:Tregister;
  337. ref:Treference;
  338. use_ref:boolean;
  339. hl4 : tasmlabel;
  340. const
  341. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  342. begin
  343. pass_left_right;
  344. {The location.register will be filled in later (JM)}
  345. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  346. { Mul supports registers and references, so if not register/reference,
  347. load the location into a register.
  348. The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
  349. use_ref:=false;
  350. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  351. reg:=left.location.register
  352. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  353. begin
  354. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  355. ref:=left.location.reference;
  356. use_ref:=true;
  357. end
  358. else
  359. begin
  360. {LOC_CONSTANT for example.}
  361. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  362. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
  363. end;
  364. {Allocate EAX.}
  365. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  366. {Load the right value.}
  367. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
  368. {Also allocate EDX, since it is also modified by a mul (JM).}
  369. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  370. if use_ref then
  371. emit_ref(asmops[unsigned],S_L,ref)
  372. else
  373. emit_reg(asmops[unsigned],S_L,reg);
  374. if (cs_check_overflow in current_settings.localswitches) and
  375. { 32->64 bit cannot overflow }
  376. (not is_64bit(resultdef)) then
  377. begin
  378. current_asmdata.getjumplabel(hl4);
  379. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  380. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  381. cg.a_label(current_asmdata.CurrAsmList,hl4);
  382. end;
  383. {Free EAX,EDX}
  384. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  385. if is_64bit(resultdef) then
  386. begin
  387. {Allocate a couple of registers and store EDX:EAX into it}
  388. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  389. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
  390. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  391. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  392. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
  393. end
  394. else
  395. begin
  396. {Allocate a new register and store the result in EAX in it.}
  397. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  398. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  399. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
  400. end;
  401. location_freetemp(current_asmdata.CurrAsmList,left.location);
  402. location_freetemp(current_asmdata.CurrAsmList,right.location);
  403. end;
  404. begin
  405. caddnode:=ti386addnode;
  406. end.