n386add.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. procedure second_addordinal; override;
  26. procedure second_add64bit;override;
  27. procedure second_cmp64bit;override;
  28. procedure second_mul(unsigned: boolean);
  29. end;
  30. implementation
  31. uses
  32. globtype,systems,
  33. cutils,verbose,globals,
  34. symconst,symdef,paramgr,defutil,
  35. aasmbase,aasmtai,aasmdata,aasmcpu,
  36. cgbase,procinfo,
  37. ncon,nset,cgutils,tgobj,
  38. cga,ncgutil,cgobj,cg64f32,cgx86;
  39. {*****************************************************************************
  40. use_generic_mul32to64
  41. *****************************************************************************}
  42. function ti386addnode.use_generic_mul32to64: boolean;
  43. begin
  44. result := False;
  45. end;
  46. { handles all unsigned multiplications, and 32->64 bit signed ones.
  47. 32bit-only signed mul is handled by generic codegen }
  48. procedure ti386addnode.second_addordinal;
  49. var
  50. unsigned: boolean;
  51. begin
  52. unsigned:=not(is_signed(left.resultdef)) or
  53. not(is_signed(right.resultdef));
  54. if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
  55. second_mul(unsigned)
  56. else
  57. inherited second_addordinal;
  58. end;
  59. {*****************************************************************************
  60. Add64bit
  61. *****************************************************************************}
  62. procedure ti386addnode.second_add64bit;
  63. var
  64. op : TOpCG;
  65. op1,op2 : TAsmOp;
  66. opsize : TOpSize;
  67. hregister,
  68. hregister2 : tregister;
  69. hl4 : tasmlabel;
  70. mboverflow,
  71. unsigned:boolean;
  72. r:Tregister;
  73. begin
  74. firstcomplex(self);
  75. pass_left_right;
  76. op1:=A_NONE;
  77. op2:=A_NONE;
  78. mboverflow:=false;
  79. opsize:=S_L;
  80. unsigned:=((left.resultdef.typ=orddef) and
  81. (torddef(left.resultdef).ordtype=u64bit)) or
  82. ((right.resultdef.typ=orddef) and
  83. (torddef(right.resultdef).ordtype=u64bit));
  84. case nodetype of
  85. addn :
  86. begin
  87. op:=OP_ADD;
  88. mboverflow:=true;
  89. end;
  90. subn :
  91. begin
  92. op:=OP_SUB;
  93. op1:=A_SUB;
  94. op2:=A_SBB;
  95. mboverflow:=true;
  96. end;
  97. xorn:
  98. op:=OP_XOR;
  99. orn:
  100. op:=OP_OR;
  101. andn:
  102. op:=OP_AND;
  103. else
  104. begin
  105. { everything should be handled in pass_1 (JM) }
  106. internalerror(200109051);
  107. end;
  108. end;
  109. { left and right no register? }
  110. { then one must be demanded }
  111. if (left.location.loc<>LOC_REGISTER) then
  112. begin
  113. if (right.location.loc<>LOC_REGISTER) then
  114. begin
  115. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  116. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  117. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  118. location_reset(left.location,LOC_REGISTER,left.location.size);
  119. left.location.register64.reglo:=hregister;
  120. left.location.register64.reghi:=hregister2;
  121. end
  122. else
  123. begin
  124. location_swap(left.location,right.location);
  125. toggleflag(nf_swapped);
  126. end;
  127. end;
  128. { at this point, left.location.loc should be LOC_REGISTER }
  129. if right.location.loc=LOC_REGISTER then
  130. begin
  131. { when swapped another result register }
  132. if (nodetype=subn) and (nf_swapped in flags) then
  133. begin
  134. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  135. left.location.register64,
  136. right.location.register64);
  137. location_swap(left.location,right.location);
  138. toggleflag(nf_swapped);
  139. end
  140. else
  141. begin
  142. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  143. right.location.register64,
  144. left.location.register64);
  145. end;
  146. end
  147. else
  148. begin
  149. { right.location<>LOC_REGISTER }
  150. if (nodetype=subn) and (nf_swapped in flags) then
  151. begin
  152. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  153. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  154. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  155. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  156. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  157. { the carry flag is still ok }
  158. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  159. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  160. end
  161. else
  162. begin
  163. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  164. left.location.register64);
  165. end;
  166. location_freetemp(current_asmdata.CurrAsmList,right.location);
  167. end;
  168. { only in case of overflow operations }
  169. { produce overflow code }
  170. { we must put it here directly, because sign of operation }
  171. { is in unsigned VAR!! }
  172. if mboverflow then
  173. begin
  174. if cs_check_overflow in current_settings.localswitches then
  175. begin
  176. current_asmdata.getjumplabel(hl4);
  177. if unsigned then
  178. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  179. else
  180. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  181. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  182. cg.a_label(current_asmdata.CurrAsmList,hl4);
  183. end;
  184. end;
  185. location_copy(location,left.location);
  186. end;
  187. procedure ti386addnode.second_cmp64bit;
  188. var
  189. hregister,
  190. hregister2 : tregister;
  191. href : treference;
  192. unsigned : boolean;
  193. procedure firstjmp64bitcmp;
  194. var
  195. oldnodetype : tnodetype;
  196. begin
  197. {$ifdef OLDREGVARS}
  198. load_all_regvars(current_asmdata.CurrAsmList);
  199. {$endif OLDREGVARS}
  200. { the jump the sequence is a little bit hairy }
  201. case nodetype of
  202. ltn,gtn:
  203. begin
  204. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  205. { cheat a little bit for the negative test }
  206. toggleflag(nf_swapped);
  207. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  208. toggleflag(nf_swapped);
  209. end;
  210. lten,gten:
  211. begin
  212. oldnodetype:=nodetype;
  213. if nodetype=lten then
  214. nodetype:=ltn
  215. else
  216. nodetype:=gtn;
  217. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  218. { cheat for the negative test }
  219. if nodetype=ltn then
  220. nodetype:=gtn
  221. else
  222. nodetype:=ltn;
  223. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  224. nodetype:=oldnodetype;
  225. end;
  226. equaln:
  227. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  228. unequaln:
  229. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  230. end;
  231. end;
  232. procedure secondjmp64bitcmp;
  233. begin
  234. { the jump the sequence is a little bit hairy }
  235. case nodetype of
  236. ltn,gtn,lten,gten:
  237. begin
  238. { the comparisaion of the low dword have to be }
  239. { always unsigned! }
  240. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),current_procinfo.CurrTrueLabel);
  241. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  242. end;
  243. equaln:
  244. begin
  245. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  246. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
  247. end;
  248. unequaln:
  249. begin
  250. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  251. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  252. end;
  253. end;
  254. end;
  255. begin
  256. firstcomplex(self);
  257. pass_left_right;
  258. unsigned:=((left.resultdef.typ=orddef) and
  259. (torddef(left.resultdef).ordtype=u64bit)) or
  260. ((right.resultdef.typ=orddef) and
  261. (torddef(right.resultdef).ordtype=u64bit));
  262. { left and right no register? }
  263. { then one must be demanded }
  264. if (left.location.loc<>LOC_REGISTER) then
  265. begin
  266. if (right.location.loc<>LOC_REGISTER) then
  267. begin
  268. { we can reuse a CREGISTER for comparison }
  269. if (left.location.loc<>LOC_CREGISTER) then
  270. begin
  271. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  272. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  273. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  274. location_freetemp(current_asmdata.CurrAsmList,left.location);
  275. location_reset(left.location,LOC_REGISTER,left.location.size);
  276. left.location.register64.reglo:=hregister;
  277. left.location.register64.reghi:=hregister2;
  278. end;
  279. end
  280. else
  281. begin
  282. location_swap(left.location,right.location);
  283. toggleflag(nf_swapped);
  284. end;
  285. end;
  286. { at this point, left.location.loc should be LOC_REGISTER }
  287. if right.location.loc=LOC_REGISTER then
  288. begin
  289. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  290. firstjmp64bitcmp;
  291. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  292. secondjmp64bitcmp;
  293. end
  294. else
  295. begin
  296. case right.location.loc of
  297. LOC_CREGISTER :
  298. begin
  299. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  300. firstjmp64bitcmp;
  301. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  302. secondjmp64bitcmp;
  303. end;
  304. LOC_CREFERENCE,
  305. LOC_REFERENCE :
  306. begin
  307. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  308. href:=right.location.reference;
  309. inc(href.offset,4);
  310. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  311. firstjmp64bitcmp;
  312. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  313. secondjmp64bitcmp;
  314. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  315. location_freetemp(current_asmdata.CurrAsmList,right.location);
  316. end;
  317. LOC_CONSTANT :
  318. begin
  319. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  320. firstjmp64bitcmp;
  321. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  322. secondjmp64bitcmp;
  323. end;
  324. else
  325. internalerror(200203282);
  326. end;
  327. end;
  328. { we have LOC_JUMP as result }
  329. location_reset(location,LOC_JUMP,OS_NO)
  330. end;
  331. {*****************************************************************************
  332. x86 MUL
  333. *****************************************************************************}
  334. procedure ti386addnode.second_mul(unsigned: boolean);
  335. var reg:Tregister;
  336. ref:Treference;
  337. use_ref:boolean;
  338. hl4 : tasmlabel;
  339. const
  340. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  341. begin
  342. pass_left_right;
  343. {The location.register will be filled in later (JM)}
  344. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  345. { Mul supports registers and references, so if not register/reference,
  346. load the location into a register.
  347. The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
  348. use_ref:=false;
  349. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  350. reg:=left.location.register
  351. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  352. begin
  353. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  354. ref:=left.location.reference;
  355. use_ref:=true;
  356. end
  357. else
  358. begin
  359. {LOC_CONSTANT for example.}
  360. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  361. cg.a_load_loc_reg(current_asmdata.CurrAsmList,OS_INT,left.location,reg);
  362. end;
  363. {Allocate EAX.}
  364. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  365. {Load the right value.}
  366. cg.a_load_loc_reg(current_asmdata.CurrAsmList,OS_INT,right.location,NR_EAX);
  367. {Also allocate EDX, since it is also modified by a mul (JM).}
  368. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  369. if use_ref then
  370. emit_ref(asmops[unsigned],S_L,ref)
  371. else
  372. emit_reg(asmops[unsigned],S_L,reg);
  373. if (cs_check_overflow in current_settings.localswitches) and
  374. { 32->64 bit cannot overflow }
  375. (not is_64bit(resultdef)) then
  376. begin
  377. current_asmdata.getjumplabel(hl4);
  378. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  379. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  380. cg.a_label(current_asmdata.CurrAsmList,hl4);
  381. end;
  382. {Free EAX,EDX}
  383. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  384. if is_64bit(resultdef) then
  385. begin
  386. {Allocate a couple of registers and store EDX:EAX into it}
  387. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  388. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
  389. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  390. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  391. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
  392. end
  393. else
  394. begin
  395. {Allocate a new register and store the result in EAX in it.}
  396. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  397. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  398. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
  399. end;
  400. location_freetemp(current_asmdata.CurrAsmList,left.location);
  401. location_freetemp(current_asmdata.CurrAsmList,right.location);
  402. end;
  403. begin
  404. caddnode:=ti386addnode;
  405. end.