n8086add.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n8086add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti8086addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. procedure second_addordinal; override;
  26. procedure second_add64bit;override;
  27. procedure second_cmp64bit;override;
  28. procedure second_mul(unsigned: boolean);
  29. end;
  30. implementation
  31. uses
  32. globtype,systems,
  33. cutils,verbose,globals,
  34. symconst,symdef,paramgr,defutil,
  35. aasmbase,aasmtai,aasmdata,aasmcpu,
  36. cgbase,procinfo,
  37. ncon,nset,cgutils,tgobj,
  38. cga,ncgutil,cgobj,cg64f32,cgx86,
  39. hlcgobj;
  40. {*****************************************************************************
  41. use_generic_mul32to64
  42. *****************************************************************************}
  43. function ti8086addnode.use_generic_mul32to64: boolean;
  44. begin
  45. result := True;
  46. end;
  47. { handles all multiplications }
  48. procedure ti8086addnode.second_addordinal;
  49. var
  50. unsigned: boolean;
  51. begin
  52. unsigned:=not(is_signed(left.resultdef)) or
  53. not(is_signed(right.resultdef));
  54. if nodetype=muln then
  55. second_mul(unsigned)
  56. else
  57. inherited second_addordinal;
  58. end;
  59. {*****************************************************************************
  60. Add64bit
  61. *****************************************************************************}
  62. procedure ti8086addnode.second_add64bit;
  63. var
  64. op : TOpCG;
  65. op1,op2 : TAsmOp;
  66. opsize : TOpSize;
  67. hregister,
  68. hregister2 : tregister;
  69. hl4 : tasmlabel;
  70. mboverflow,
  71. unsigned:boolean;
  72. r:Tregister;
  73. begin
  74. pass_left_right;
  75. op1:=A_NONE;
  76. op2:=A_NONE;
  77. mboverflow:=false;
  78. opsize:=S_L;
  79. unsigned:=((left.resultdef.typ=orddef) and
  80. (torddef(left.resultdef).ordtype=u64bit)) or
  81. ((right.resultdef.typ=orddef) and
  82. (torddef(right.resultdef).ordtype=u64bit));
  83. case nodetype of
  84. addn :
  85. begin
  86. op:=OP_ADD;
  87. mboverflow:=true;
  88. end;
  89. subn :
  90. begin
  91. op:=OP_SUB;
  92. op1:=A_SUB;
  93. op2:=A_SBB;
  94. mboverflow:=true;
  95. end;
  96. xorn:
  97. op:=OP_XOR;
  98. orn:
  99. op:=OP_OR;
  100. andn:
  101. op:=OP_AND;
  102. else
  103. begin
  104. { everything should be handled in pass_1 (JM) }
  105. internalerror(200109051);
  106. end;
  107. end;
  108. { left and right no register? }
  109. { then one must be demanded }
  110. if (left.location.loc<>LOC_REGISTER) then
  111. begin
  112. if (right.location.loc<>LOC_REGISTER) then
  113. begin
  114. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  115. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  116. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  117. location_reset(left.location,LOC_REGISTER,left.location.size);
  118. left.location.register64.reglo:=hregister;
  119. left.location.register64.reghi:=hregister2;
  120. end
  121. else
  122. begin
  123. location_swap(left.location,right.location);
  124. toggleflag(nf_swapped);
  125. end;
  126. end;
  127. { at this point, left.location.loc should be LOC_REGISTER }
  128. if right.location.loc=LOC_REGISTER then
  129. begin
  130. { when swapped another result register }
  131. if (nodetype=subn) and (nf_swapped in flags) then
  132. begin
  133. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  134. left.location.register64,
  135. right.location.register64);
  136. location_swap(left.location,right.location);
  137. toggleflag(nf_swapped);
  138. end
  139. else
  140. begin
  141. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  142. right.location.register64,
  143. left.location.register64);
  144. end;
  145. end
  146. else
  147. begin
  148. { right.location<>LOC_REGISTER }
  149. if (nodetype=subn) and (nf_swapped in flags) then
  150. begin
  151. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  152. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  153. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  154. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  155. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  156. { the carry flag is still ok }
  157. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  158. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  159. end
  160. else
  161. begin
  162. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  163. left.location.register64);
  164. end;
  165. location_freetemp(current_asmdata.CurrAsmList,right.location);
  166. end;
  167. { only in case of overflow operations }
  168. { produce overflow code }
  169. { we must put it here directly, because sign of operation }
  170. { is in unsigned VAR!! }
  171. if mboverflow then
  172. begin
  173. if cs_check_overflow in current_settings.localswitches then
  174. begin
  175. current_asmdata.getjumplabel(hl4);
  176. if unsigned then
  177. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  178. else
  179. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  180. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  181. cg.a_label(current_asmdata.CurrAsmList,hl4);
  182. end;
  183. end;
  184. location_copy(location,left.location);
  185. end;
  186. procedure ti8086addnode.second_cmp64bit;
  187. var
  188. hregister,
  189. hregister2 : tregister;
  190. href : treference;
  191. unsigned : boolean;
  192. procedure firstjmp64bitcmp;
  193. var
  194. oldnodetype : tnodetype;
  195. begin
  196. {$ifdef OLDREGVARS}
  197. load_all_regvars(current_asmdata.CurrAsmList);
  198. {$endif OLDREGVARS}
  199. { the jump the sequence is a little bit hairy }
  200. case nodetype of
  201. ltn,gtn:
  202. begin
  203. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  204. { cheat a little bit for the negative test }
  205. toggleflag(nf_swapped);
  206. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  207. toggleflag(nf_swapped);
  208. end;
  209. lten,gten:
  210. begin
  211. oldnodetype:=nodetype;
  212. if nodetype=lten then
  213. nodetype:=ltn
  214. else
  215. nodetype:=gtn;
  216. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrTrueLabel);
  217. { cheat for the negative test }
  218. if nodetype=ltn then
  219. nodetype:=gtn
  220. else
  221. nodetype:=ltn;
  222. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
  223. nodetype:=oldnodetype;
  224. end;
  225. equaln:
  226. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  227. unequaln:
  228. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  229. end;
  230. end;
  231. procedure secondjmp64bitcmp;
  232. begin
  233. { the jump the sequence is a little bit hairy }
  234. case nodetype of
  235. ltn,gtn,lten,gten:
  236. begin
  237. { the comparisaion of the low dword have to be }
  238. { always unsigned! }
  239. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),current_procinfo.CurrTrueLabel);
  240. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  241. end;
  242. equaln:
  243. begin
  244. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrFalseLabel);
  245. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
  246. end;
  247. unequaln:
  248. begin
  249. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,current_procinfo.CurrTrueLabel);
  250. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  251. end;
  252. end;
  253. end;
  254. begin
  255. pass_left_right;
  256. unsigned:=((left.resultdef.typ=orddef) and
  257. (torddef(left.resultdef).ordtype=u64bit)) or
  258. ((right.resultdef.typ=orddef) and
  259. (torddef(right.resultdef).ordtype=u64bit));
  260. { left and right no register? }
  261. { then one must be demanded }
  262. if (left.location.loc<>LOC_REGISTER) then
  263. begin
  264. if (right.location.loc<>LOC_REGISTER) then
  265. begin
  266. { we can reuse a CREGISTER for comparison }
  267. if (left.location.loc<>LOC_CREGISTER) then
  268. begin
  269. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  270. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  271. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  272. location_freetemp(current_asmdata.CurrAsmList,left.location);
  273. location_reset(left.location,LOC_REGISTER,left.location.size);
  274. left.location.register64.reglo:=hregister;
  275. left.location.register64.reghi:=hregister2;
  276. end;
  277. end
  278. else
  279. begin
  280. location_swap(left.location,right.location);
  281. toggleflag(nf_swapped);
  282. end;
  283. end;
  284. { at this point, left.location.loc should be LOC_REGISTER }
  285. if right.location.loc=LOC_REGISTER then
  286. begin
  287. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  288. firstjmp64bitcmp;
  289. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  290. secondjmp64bitcmp;
  291. end
  292. else
  293. begin
  294. case right.location.loc of
  295. LOC_CREGISTER :
  296. begin
  297. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  298. firstjmp64bitcmp;
  299. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  300. secondjmp64bitcmp;
  301. end;
  302. LOC_CREFERENCE,
  303. LOC_REFERENCE :
  304. begin
  305. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  306. href:=right.location.reference;
  307. inc(href.offset,4);
  308. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  309. firstjmp64bitcmp;
  310. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  311. secondjmp64bitcmp;
  312. cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
  313. location_freetemp(current_asmdata.CurrAsmList,right.location);
  314. end;
  315. LOC_CONSTANT :
  316. begin
  317. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  318. firstjmp64bitcmp;
  319. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  320. secondjmp64bitcmp;
  321. end;
  322. else
  323. internalerror(200203282);
  324. end;
  325. end;
  326. { we have LOC_JUMP as result }
  327. location_reset(location,LOC_JUMP,OS_NO)
  328. end;
  329. {*****************************************************************************
  330. x86 MUL
  331. *****************************************************************************}
  332. procedure ti8086addnode.second_mul(unsigned: boolean);
  333. procedure add_mov(instr: Taicpu);
  334. begin
  335. { Notify the register allocator that we have written a move instruction so
  336. it can try to eliminate it. }
  337. if (instr.oper[0]^.reg<>current_procinfo.framepointer) and (instr.oper[0]^.reg<>NR_STACK_POINTER_REG) then
  338. tcgx86(cg).add_move_instruction(instr);
  339. current_asmdata.CurrAsmList.concat(instr);
  340. end;
  341. var reg:Tregister;
  342. ref:Treference;
  343. use_ref:boolean;
  344. hl4 : tasmlabel;
  345. const
  346. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  347. begin
  348. pass_left_right;
  349. {The location.register will be filled in later (JM)}
  350. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  351. { Mul supports registers and references, so if not register/reference,
  352. load the location into a register. }
  353. use_ref:=false;
  354. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  355. reg:=left.location.register
  356. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  357. begin
  358. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  359. ref:=left.location.reference;
  360. use_ref:=true;
  361. end
  362. else
  363. begin
  364. {LOC_CONSTANT for example.}
  365. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  366. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
  367. end;
  368. {Allocate AX.}
  369. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  370. {Load the right value.}
  371. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_AX);
  372. {Also allocate DX, since it is also modified by a mul (JM).}
  373. cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  374. if use_ref then
  375. emit_ref(asmops[unsigned],S_W,ref)
  376. else
  377. emit_reg(asmops[unsigned],S_W,reg);
  378. if (cs_check_overflow in current_settings.localswitches) and
  379. { 16->32 bit cannot overflow }
  380. (not is_32bitint(resultdef)) then
  381. begin
  382. current_asmdata.getjumplabel(hl4);
  383. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  384. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  385. cg.a_label(current_asmdata.CurrAsmList,hl4);
  386. end;
  387. {Free AX,DX}
  388. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  389. if is_32bitint(resultdef) then
  390. begin
  391. {Allocate an imaginary 32-bit register, which consists of a pair of
  392. 16-bit registers and store DX:AX into it}
  393. location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  394. add_mov(Taicpu.Op_reg_reg(A_MOV,S_W,NR_AX,location.register));
  395. add_mov(Taicpu.Op_reg_reg(A_MOV,S_W,NR_DX,GetNextReg(location.register)));
  396. end
  397. else
  398. begin
  399. {Allocate a new register and store the result in AX in it.}
  400. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  401. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  402. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_AX,location.register);
  403. end;
  404. location_freetemp(current_asmdata.CurrAsmList,left.location);
  405. location_freetemp(current_asmdata.CurrAsmList,right.location);
  406. end;
  407. begin
  408. caddnode:=ti8086addnode;
  409. end.