n386add.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. procedure second_addordinal; override;
  26. procedure second_add64bit;override;
  27. procedure second_cmp64bit;override;
  28. procedure second_mul(unsigned: boolean);
  29. end;
  30. implementation
  31. uses
  32. globtype,systems,
  33. cutils,verbose,globals,
  34. symconst,symdef,paramgr,defutil,
  35. aasmbase,aasmtai,aasmdata,aasmcpu,
  36. cgbase,procinfo,
  37. ncon,nset,cgutils,tgobj,
  38. cga,ncgutil,cgobj,cg64f32,cgx86,
  39. hlcgobj;
  40. {*****************************************************************************
  41. use_generic_mul32to64
  42. *****************************************************************************}
  43. function ti386addnode.use_generic_mul32to64: boolean;
  44. begin
  45. result := False;
  46. end;
  47. { handles all unsigned multiplications, and 32->64 bit signed ones.
  48. 32bit-only signed mul is handled by generic codegen }
  49. procedure ti386addnode.second_addordinal;
  50. var
  51. unsigned: boolean;
  52. begin
  53. unsigned:=not(is_signed(left.resultdef)) or
  54. not(is_signed(right.resultdef));
  55. if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
  56. second_mul(unsigned)
  57. else
  58. inherited second_addordinal;
  59. end;
  60. {*****************************************************************************
  61. Add64bit
  62. *****************************************************************************}
  63. procedure ti386addnode.second_add64bit;
  64. var
  65. op : TOpCG;
  66. op1,op2 : TAsmOp;
  67. opsize : TOpSize;
  68. hregister,
  69. hregister2 : tregister;
  70. hl4 : tasmlabel;
  71. mboverflow,
  72. unsigned:boolean;
  73. r:Tregister;
  74. begin
  75. pass_left_right;
  76. op1:=A_NONE;
  77. op2:=A_NONE;
  78. mboverflow:=false;
  79. opsize:=S_L;
  80. unsigned:=((left.resultdef.typ=orddef) and
  81. (torddef(left.resultdef).ordtype=u64bit)) or
  82. ((right.resultdef.typ=orddef) and
  83. (torddef(right.resultdef).ordtype=u64bit));
  84. case nodetype of
  85. addn :
  86. begin
  87. op:=OP_ADD;
  88. mboverflow:=true;
  89. end;
  90. subn :
  91. begin
  92. op:=OP_SUB;
  93. op1:=A_SUB;
  94. op2:=A_SBB;
  95. mboverflow:=true;
  96. end;
  97. xorn:
  98. op:=OP_XOR;
  99. orn:
  100. op:=OP_OR;
  101. andn:
  102. op:=OP_AND;
  103. else
  104. begin
  105. { everything should be handled in pass_1 (JM) }
  106. internalerror(200109051);
  107. end;
  108. end;
  109. { left and right no register? }
  110. { then one must be demanded }
  111. if (left.location.loc<>LOC_REGISTER) then
  112. begin
  113. if (right.location.loc<>LOC_REGISTER) then
  114. begin
  115. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  116. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  117. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  118. location_reset(left.location,LOC_REGISTER,left.location.size);
  119. left.location.register64.reglo:=hregister;
  120. left.location.register64.reghi:=hregister2;
  121. end
  122. else
  123. begin
  124. location_swap(left.location,right.location);
  125. toggleflag(nf_swapped);
  126. end;
  127. end;
  128. { at this point, left.location.loc should be LOC_REGISTER }
  129. if right.location.loc=LOC_REGISTER then
  130. begin
  131. { when swapped another result register }
  132. if (nodetype=subn) and (nf_swapped in flags) then
  133. begin
  134. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  135. left.location.register64,
  136. right.location.register64);
  137. location_swap(left.location,right.location);
  138. toggleflag(nf_swapped);
  139. end
  140. else
  141. begin
  142. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  143. right.location.register64,
  144. left.location.register64);
  145. end;
  146. end
  147. else
  148. begin
  149. { right.location<>LOC_REGISTER }
  150. if (nodetype=subn) and (nf_swapped in flags) then
  151. begin
  152. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  153. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  154. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  155. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  156. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  157. { the carry flag is still ok }
  158. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  159. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  160. end
  161. else
  162. begin
  163. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  164. left.location.register64);
  165. end;
  166. location_freetemp(current_asmdata.CurrAsmList,right.location);
  167. end;
  168. { only in case of overflow operations }
  169. { produce overflow code }
  170. { we must put it here directly, because sign of operation }
  171. { is in unsigned VAR!! }
  172. if mboverflow then
  173. begin
  174. if cs_check_overflow in current_settings.localswitches then
  175. begin
  176. current_asmdata.getjumplabel(hl4);
  177. if unsigned then
  178. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  179. else
  180. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  181. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  182. cg.a_label(current_asmdata.CurrAsmList,hl4);
  183. end;
  184. end;
  185. location_copy(location,left.location);
  186. end;
  187. procedure ti386addnode.second_cmp64bit;
  188. var
  189. hregister,
  190. hregister2 : tregister;
  191. href : treference;
  192. unsigned : boolean;
  193. procedure firstjmp64bitcmp;
  194. var
  195. oldnodetype : tnodetype;
  196. begin
  197. {$ifdef OLDREGVARS}
  198. load_all_regvars(current_asmdata.CurrAsmList);
  199. {$endif OLDREGVARS}
  200. { the jump the sequence is a little bit hairy }
  201. case nodetype of
  202. ltn,gtn:
  203. begin
  204. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  205. { cheat a little bit for the negative test }
  206. toggleflag(nf_swapped);
  207. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  208. toggleflag(nf_swapped);
  209. end;
  210. lten,gten:
  211. begin
  212. oldnodetype:=nodetype;
  213. if nodetype=lten then
  214. nodetype:=ltn
  215. else
  216. nodetype:=gtn;
  217. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  218. { cheat for the negative test }
  219. if nodetype=ltn then
  220. nodetype:=gtn
  221. else
  222. nodetype:=ltn;
  223. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  224. nodetype:=oldnodetype;
  225. end;
  226. equaln:
  227. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  228. unequaln:
  229. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  230. end;
  231. end;
  232. procedure secondjmp64bitcmp;
  233. begin
  234. { the jump the sequence is a little bit hairy }
  235. case nodetype of
  236. ltn,gtn,lten,gten:
  237. begin
  238. { the comparisaion of the low dword have to be }
  239. { always unsigned! }
  240. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),current_procinfo.CurrTrueLabel);
  241. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  242. end;
  243. equaln:
  244. begin
  245. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  246. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
  247. end;
  248. unequaln:
  249. begin
  250. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  251. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  252. end;
  253. end;
  254. end;
  255. begin
  256. pass_left_right;
  257. unsigned:=((left.resultdef.typ=orddef) and
  258. (torddef(left.resultdef).ordtype=u64bit)) or
  259. ((right.resultdef.typ=orddef) and
  260. (torddef(right.resultdef).ordtype=u64bit));
  261. { left and right no register? }
  262. { then one must be demanded }
  263. if (left.location.loc<>LOC_REGISTER) then
  264. begin
  265. if (right.location.loc<>LOC_REGISTER) then
  266. begin
  267. { we can reuse a CREGISTER for comparison }
  268. if (left.location.loc<>LOC_CREGISTER) then
  269. begin
  270. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  271. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  272. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  273. location_freetemp(current_asmdata.CurrAsmList,left.location);
  274. location_reset(left.location,LOC_REGISTER,left.location.size);
  275. left.location.register64.reglo:=hregister;
  276. left.location.register64.reghi:=hregister2;
  277. end;
  278. end
  279. else
  280. begin
  281. location_swap(left.location,right.location);
  282. toggleflag(nf_swapped);
  283. end;
  284. end;
  285. { at this point, left.location.loc should be LOC_REGISTER }
  286. if right.location.loc=LOC_REGISTER then
  287. begin
  288. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  289. firstjmp64bitcmp;
  290. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  291. secondjmp64bitcmp;
  292. end
  293. else
  294. begin
  295. case right.location.loc of
  296. LOC_CREGISTER :
  297. begin
  298. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  299. firstjmp64bitcmp;
  300. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  301. secondjmp64bitcmp;
  302. end;
  303. LOC_CREFERENCE,
  304. LOC_REFERENCE :
  305. begin
  306. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  307. href:=right.location.reference;
  308. inc(href.offset,4);
  309. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  310. firstjmp64bitcmp;
  311. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  312. secondjmp64bitcmp;
  313. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  314. location_freetemp(current_asmdata.CurrAsmList,right.location);
  315. end;
  316. LOC_CONSTANT :
  317. begin
  318. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  319. firstjmp64bitcmp;
  320. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  321. secondjmp64bitcmp;
  322. end;
  323. else
  324. internalerror(200203282);
  325. end;
  326. end;
  327. { we have LOC_JUMP as result }
  328. location_reset(location,LOC_JUMP,OS_NO)
  329. end;
  330. {*****************************************************************************
  331. x86 MUL
  332. *****************************************************************************}
  333. procedure ti386addnode.second_mul(unsigned: boolean);
  334. var reg:Tregister;
  335. ref:Treference;
  336. use_ref:boolean;
  337. hl4 : tasmlabel;
  338. const
  339. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  340. begin
  341. pass_left_right;
  342. {The location.register will be filled in later (JM)}
  343. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  344. { Mul supports registers and references, so if not register/reference,
  345. load the location into a register.
  346. The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
  347. use_ref:=false;
  348. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  349. reg:=left.location.register
  350. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  351. begin
  352. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  353. ref:=left.location.reference;
  354. use_ref:=true;
  355. end
  356. else
  357. begin
  358. {LOC_CONSTANT for example.}
  359. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  360. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
  361. end;
  362. {Allocate EAX.}
  363. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  364. {Load the right value.}
  365. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
  366. {Also allocate EDX, since it is also modified by a mul (JM).}
  367. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  368. if use_ref then
  369. emit_ref(asmops[unsigned],S_L,ref)
  370. else
  371. emit_reg(asmops[unsigned],S_L,reg);
  372. if (cs_check_overflow in current_settings.localswitches) and
  373. { 32->64 bit cannot overflow }
  374. (not is_64bit(resultdef)) then
  375. begin
  376. current_asmdata.getjumplabel(hl4);
  377. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  378. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  379. cg.a_label(current_asmdata.CurrAsmList,hl4);
  380. end;
  381. {Free EAX,EDX}
  382. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  383. if is_64bit(resultdef) then
  384. begin
  385. {Allocate a couple of registers and store EDX:EAX into it}
  386. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  387. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
  388. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  389. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  390. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
  391. end
  392. else
  393. begin
  394. {Allocate a new register and store the result in EAX in it.}
  395. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  396. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  397. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
  398. end;
  399. location_freetemp(current_asmdata.CurrAsmList,left.location);
  400. location_freetemp(current_asmdata.CurrAsmList,right.location);
  401. end;
  402. begin
  403. caddnode:=ti386addnode;
  404. end.