n386add.pas 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. function use_generic_mul64bit: boolean; override;
  26. procedure second_addordinal; override;
  27. procedure second_add64bit;override;
  28. procedure second_cmp64bit;override;
  29. procedure second_mul(unsigned: boolean);
  30. procedure second_mul64bit;
  31. protected
  32. procedure set_mul_result_location;
  33. end;
  34. implementation
  35. uses
  36. globtype,systems,
  37. cutils,verbose,globals,
  38. symconst,symdef,paramgr,defutil,
  39. aasmbase,aasmtai,aasmdata,aasmcpu,
  40. cgbase,procinfo,
  41. ncon,nset,cgutils,tgobj,
  42. cpuinfo,
  43. cga,ncgutil,cgobj,cg64f32,cgx86,
  44. hlcgobj;
  45. {*****************************************************************************
  46. use_generic_mul32to64
  47. *****************************************************************************}
  48. function ti386addnode.use_generic_mul32to64: boolean;
  49. begin
  50. result := False;
  51. end;
  52. function ti386addnode.use_generic_mul64bit: boolean;
  53. begin
  54. result:=needoverflowcheck or
  55. (cs_opt_size in current_settings.optimizerswitches);
  56. end;
  57. { handles all unsigned multiplications, and 32->64 bit signed ones.
  58. 32bit-only signed mul is handled by generic codegen }
  59. procedure ti386addnode.second_addordinal;
  60. var
  61. unsigned: boolean;
  62. begin
  63. unsigned:=not(is_signed(left.resultdef)) or
  64. not(is_signed(right.resultdef));
  65. { use IMUL instead of MUL in case overflow checking is off and we're
  66. doing a 32->32-bit multiplication }
  67. if not needoverflowcheck and
  68. not is_64bit(resultdef) then
  69. unsigned:=false;
  70. if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
  71. second_mul(unsigned)
  72. else
  73. inherited second_addordinal;
  74. end;
  75. {*****************************************************************************
  76. Add64bit
  77. *****************************************************************************}
  78. procedure ti386addnode.second_add64bit;
  79. var
  80. op : TOpCG;
  81. op1,op2 : TAsmOp;
  82. opsize : TOpSize;
  83. hregister,
  84. hregister2 : tregister;
  85. hl4 : tasmlabel;
  86. mboverflow,
  87. unsigned:boolean;
  88. r:Tregister;
  89. begin
  90. pass_left_right;
  91. op1:=A_NONE;
  92. op2:=A_NONE;
  93. mboverflow:=false;
  94. opsize:=S_L;
  95. unsigned:=((left.resultdef.typ=orddef) and
  96. (torddef(left.resultdef).ordtype=u64bit)) or
  97. ((right.resultdef.typ=orddef) and
  98. (torddef(right.resultdef).ordtype=u64bit));
  99. case nodetype of
  100. addn :
  101. begin
  102. op:=OP_ADD;
  103. mboverflow:=true;
  104. end;
  105. subn :
  106. begin
  107. op:=OP_SUB;
  108. op1:=A_SUB;
  109. op2:=A_SBB;
  110. mboverflow:=true;
  111. end;
  112. xorn:
  113. op:=OP_XOR;
  114. orn:
  115. op:=OP_OR;
  116. andn:
  117. op:=OP_AND;
  118. muln:
  119. begin
  120. second_mul64bit;
  121. exit;
  122. end
  123. else
  124. begin
  125. { everything should be handled in pass_1 (JM) }
  126. internalerror(2001090505);
  127. end;
  128. end;
  129. { left and right no register? }
  130. { then one must be demanded }
  131. if (left.location.loc<>LOC_REGISTER) then
  132. begin
  133. if (right.location.loc<>LOC_REGISTER) then
  134. begin
  135. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  136. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  137. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  138. location_reset(left.location,LOC_REGISTER,left.location.size);
  139. left.location.register64.reglo:=hregister;
  140. left.location.register64.reghi:=hregister2;
  141. end
  142. else
  143. begin
  144. location_swap(left.location,right.location);
  145. toggleflag(nf_swapped);
  146. end;
  147. end;
  148. { at this point, left.location.loc should be LOC_REGISTER }
  149. if right.location.loc=LOC_REGISTER then
  150. begin
  151. if mboverflow and needoverflowcheck then
  152. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  153. { when swapped another result register }
  154. if (nodetype=subn) and (nf_swapped in flags) then
  155. begin
  156. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  157. left.location.register64,
  158. right.location.register64);
  159. location_swap(left.location,right.location);
  160. toggleflag(nf_swapped);
  161. end
  162. else
  163. begin
  164. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  165. right.location.register64,
  166. left.location.register64);
  167. end;
  168. end
  169. else
  170. begin
  171. { right.location<>LOC_REGISTER }
  172. if (nodetype=subn) and (nf_swapped in flags) then
  173. begin
  174. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  175. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  176. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  177. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  178. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  179. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  180. { the carry flag is still ok }
  181. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  182. { We need to keep the FLAGS register allocated for overflow checks }
  183. if not mboverflow or not needoverflowcheck then
  184. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  185. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  186. end
  187. else
  188. begin
  189. if mboverflow and needoverflowcheck then
  190. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  191. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  192. left.location.register64);
  193. end;
  194. location_freetemp(current_asmdata.CurrAsmList,right.location);
  195. end;
  196. { only in case of overflow operations }
  197. { produce overflow code }
  198. { we must put it here directly, because sign of operation }
  199. { is in unsigned VAR!! }
  200. if mboverflow then
  201. begin
  202. if needoverflowcheck then
  203. begin
  204. current_asmdata.getjumplabel(hl4);
  205. if unsigned then
  206. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  207. else
  208. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  209. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  210. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  211. cg.a_label(current_asmdata.CurrAsmList,hl4);
  212. end;
  213. end;
  214. location_copy(location,left.location);
  215. end;
  216. procedure ti386addnode.second_cmp64bit;
  217. var
  218. truelabel,
  219. falselabel,
  220. hlab : tasmlabel;
  221. href : treference;
  222. unsigned : boolean;
  223. procedure firstjmp64bitcmp;
  224. var
  225. oldnodetype : tnodetype;
  226. begin
  227. { the jump the sequence is a little bit hairy }
  228. case nodetype of
  229. ltn,gtn:
  230. begin
  231. if (hlab<>location.truelabel) then
  232. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
  233. { cheat a little bit for the negative test }
  234. toggleflag(nf_swapped);
  235. if (hlab<>location.falselabel) then
  236. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
  237. toggleflag(nf_swapped);
  238. end;
  239. lten,gten:
  240. begin
  241. oldnodetype:=nodetype;
  242. if nodetype=lten then
  243. nodetype:=ltn
  244. else
  245. nodetype:=gtn;
  246. if (hlab<>location.truelabel) then
  247. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
  248. { cheat for the negative test }
  249. if nodetype=ltn then
  250. nodetype:=gtn
  251. else
  252. nodetype:=ltn;
  253. if (hlab<>location.falselabel) then
  254. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
  255. nodetype:=oldnodetype;
  256. end;
  257. equaln:
  258. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
  259. unequaln:
  260. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
  261. else
  262. internalerror(2019050905);
  263. end;
  264. end;
  265. procedure secondjmp64bitcmp;
  266. begin
  267. { the jump the sequence is a little bit hairy }
  268. case nodetype of
  269. ltn,gtn,lten,gten:
  270. begin
  271. { the comparisaion of the low dword have to be }
  272. { always unsigned! }
  273. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),location.truelabel);
  274. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  275. end;
  276. equaln:
  277. begin
  278. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
  279. cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
  280. end;
  281. unequaln:
  282. begin
  283. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
  284. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  285. end;
  286. else
  287. internalerror(2019050904);
  288. end;
  289. end;
  290. begin
  291. truelabel:=nil;
  292. falselabel:=nil;
  293. pass_left_right;
  294. unsigned:=((left.resultdef.typ=orddef) and
  295. (torddef(left.resultdef).ordtype=u64bit)) or
  296. ((right.resultdef.typ=orddef) and
  297. (torddef(right.resultdef).ordtype=u64bit));
  298. { we have LOC_JUMP as result }
  299. current_asmdata.getjumplabel(truelabel);
  300. current_asmdata.getjumplabel(falselabel);
  301. location_reset_jump(location,truelabel,falselabel);
  302. { Relational compares against constants having low dword=0 can omit the
  303. second compare based on the fact that any unsigned value is >=0 }
  304. hlab:=nil;
  305. if (right.location.loc=LOC_CONSTANT) and
  306. (lo(right.location.value64)=0) then
  307. begin
  308. case getresflags(true) of
  309. F_AE: hlab:=location.truelabel ;
  310. F_B: hlab:=location.falselabel;
  311. else
  312. ;
  313. end;
  314. end;
  315. if (right.location.loc=LOC_CONSTANT) and
  316. (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  317. begin
  318. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  319. href:=left.location.reference;
  320. inc(href.offset,4);
  321. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  322. emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
  323. firstjmp64bitcmp;
  324. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  325. if assigned(hlab) then
  326. cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
  327. else
  328. begin
  329. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  330. emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
  331. secondjmp64bitcmp;
  332. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  333. end;
  334. location_freetemp(current_asmdata.CurrAsmList,left.location);
  335. exit;
  336. end;
  337. { left and right no register? }
  338. { then one must be demanded }
  339. if not (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  340. begin
  341. if not (right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  342. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true)
  343. else
  344. begin
  345. location_swap(left.location,right.location);
  346. toggleflag(nf_swapped);
  347. end;
  348. end;
  349. { at this point, left.location.loc should be LOC_[C]REGISTER }
  350. case right.location.loc of
  351. LOC_REGISTER,
  352. LOC_CREGISTER :
  353. begin
  354. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  355. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  356. firstjmp64bitcmp;
  357. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  358. secondjmp64bitcmp;
  359. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  360. end;
  361. LOC_CREFERENCE,
  362. LOC_REFERENCE :
  363. begin
  364. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  365. href:=right.location.reference;
  366. inc(href.offset,4);
  367. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  368. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  369. firstjmp64bitcmp;
  370. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  371. secondjmp64bitcmp;
  372. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  373. location_freetemp(current_asmdata.CurrAsmList,right.location);
  374. end;
  375. LOC_CONSTANT :
  376. begin
  377. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  378. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  379. firstjmp64bitcmp;
  380. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  381. if assigned(hlab) then
  382. cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
  383. else
  384. begin
  385. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  386. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  387. secondjmp64bitcmp;
  388. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  389. end;
  390. end;
  391. else
  392. internalerror(2002032803);
  393. end;
  394. end;
  395. {*****************************************************************************
  396. x86 MUL
  397. *****************************************************************************}
  398. procedure ti386addnode.set_mul_result_location;
  399. begin
  400. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  401. {Free EAX,EDX}
  402. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  403. if is_64bit(resultdef) then
  404. begin
  405. {Allocate a couple of registers and store EDX:EAX into it}
  406. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  407. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
  408. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  409. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  410. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
  411. end
  412. else
  413. begin
  414. {Allocate a new register and store the result in EAX in it.}
  415. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  416. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  417. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
  418. end;
  419. location_freetemp(current_asmdata.CurrAsmList,left.location);
  420. location_freetemp(current_asmdata.CurrAsmList,right.location);
  421. end;
  422. procedure ti386addnode.second_mul(unsigned: boolean);
  423. var reg,reghi,reglo:Tregister;
  424. ref:Treference;
  425. use_ref:boolean;
  426. hl4 : tasmlabel;
  427. const
  428. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  429. begin
  430. pass_left_right;
  431. reg:=NR_NO;
  432. reference_reset(ref,sizeof(pint),[]);
  433. { Mul supports registers and references, so if not register/reference,
  434. load the location into a register.
  435. The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
  436. use_ref:=false;
  437. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  438. reg:=left.location.register
  439. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  440. begin
  441. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  442. ref:=left.location.reference;
  443. use_ref:=true;
  444. end
  445. else
  446. begin
  447. { LOC_CONSTANT for example.}
  448. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  449. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
  450. end;
  451. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and
  452. (not(needoverflowcheck) or
  453. { 32->64 bit cannot overflow }
  454. is_64bit(resultdef)) then
  455. begin
  456. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  457. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EDX);
  458. reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  459. reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  460. if use_ref then
  461. current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(A_MULX,S_L,ref,reglo,reghi))
  462. else
  463. emit_reg_reg_reg(A_MULX,S_L,reg,reglo,reghi);
  464. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  465. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  466. location.register64.reglo:=reglo;
  467. if is_64bit(resultdef) then
  468. location.register64.reghi:=reghi;
  469. location_freetemp(current_asmdata.CurrAsmList,left.location);
  470. location_freetemp(current_asmdata.CurrAsmList,right.location);
  471. end
  472. else
  473. begin
  474. { Allocate EAX. }
  475. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  476. { Load the right value. }
  477. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
  478. { Also allocate EDX, since it is also modified by a mul (JM). }
  479. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  480. if needoverflowcheck then
  481. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  482. if use_ref then
  483. emit_ref(asmops[unsigned],S_L,ref)
  484. else
  485. emit_reg(asmops[unsigned],S_L,reg);
  486. if needoverflowcheck and
  487. { 32->64 bit cannot overflow }
  488. (not is_64bit(resultdef)) then
  489. begin
  490. current_asmdata.getjumplabel(hl4);
  491. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  492. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  493. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  494. cg.a_label(current_asmdata.CurrAsmList,hl4);
  495. end;
  496. set_mul_result_location;
  497. end;
  498. end;
  499. procedure ti386addnode.second_mul64bit;
  500. var
  501. list: TAsmList;
  502. hreg1,hreg2: tregister;
  503. begin
  504. { 64x64 multiplication yields 128-bit result, but we're only
  505. interested in its lower 64 bits. This lower part is independent
  506. of operand signs, and so is the generated code. }
  507. { pass_left_right already called from second_add64bit }
  508. list:=current_asmdata.CurrAsmList;
  509. if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  510. tcgx86(cg).make_simple_ref(list,left.location.reference);
  511. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  512. tcgx86(cg).make_simple_ref(list,right.location.reference);
  513. { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
  514. if (right.location.loc=LOC_CONSTANT) then
  515. begin
  516. { if left has side effects, it could be that this code is called with right.location.value64=0,
  517. see also #40182 }
  518. if right.location.value64=0 then
  519. begin
  520. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  521. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  522. emit_const_reg(A_MOV,S_L,0,location.register64.reglo);
  523. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  524. emit_const_reg(A_MOV,S_L,0,location.register64.reghi);
  525. exit;
  526. end;
  527. { Omit zero terms, if any }
  528. hreg1:=NR_NO;
  529. hreg2:=NR_NO;
  530. if lo(right.location.value64)<>0 then
  531. hreg1:=cg.getintregister(list,OS_INT);
  532. if hi(right.location.value64)<>0 then
  533. hreg2:=cg.getintregister(list,OS_INT);
  534. { Take advantage of 3-operand form of IMUL }
  535. case left.location.loc of
  536. LOC_REGISTER,LOC_CREGISTER:
  537. begin
  538. if hreg1<>NR_NO then
  539. emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
  540. if hreg2<>NR_NO then
  541. emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
  542. end;
  543. LOC_REFERENCE,LOC_CREFERENCE:
  544. begin
  545. if hreg2<>NR_NO then
  546. list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
  547. inc(left.location.reference.offset,4);
  548. if hreg1<>NR_NO then
  549. list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
  550. dec(left.location.reference.offset,4);
  551. end;
  552. else
  553. InternalError(2014011602);
  554. end;
  555. end
  556. else
  557. begin
  558. hreg1:=cg.getintregister(list,OS_INT);
  559. hreg2:=cg.getintregister(list,OS_INT);
  560. cg64.a_load64low_loc_reg(list,left.location,hreg1);
  561. cg64.a_load64high_loc_reg(list,left.location,hreg2);
  562. case right.location.loc of
  563. LOC_REGISTER,LOC_CREGISTER:
  564. begin
  565. emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
  566. emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
  567. end;
  568. LOC_REFERENCE,LOC_CREFERENCE:
  569. begin
  570. emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
  571. inc(right.location.reference.offset,4);
  572. emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
  573. dec(right.location.reference.offset,4);
  574. end;
  575. else
  576. InternalError(2014011603);
  577. end;
  578. end;
  579. { add hi*lo and lo*hi terms together }
  580. if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
  581. emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
  582. { load lo(right) into EAX }
  583. cg.getcpuregister(list,NR_EAX);
  584. cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
  585. { multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
  586. cg.getcpuregister(list,NR_EDX);
  587. if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  588. emit_reg(A_MUL,S_L,left.location.register64.reglo)
  589. else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  590. emit_ref(A_MUL,S_L,left.location.reference)
  591. else
  592. InternalError(2014011604);
  593. { add previously calculated terms to the high half }
  594. if (hreg1<>NR_NO) then
  595. emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
  596. else if (hreg2<>NR_NO) then
  597. emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
  598. else
  599. InternalError(2014011601);
  600. { Result is now in EDX:EAX. Copy it to virtual registers. }
  601. set_mul_result_location;
  602. end;
  603. begin
  604. caddnode:=ti386addnode;
  605. end.