n386add.pas 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. function use_generic_mul32to64: boolean; override;
  25. function use_generic_mul64bit: boolean; override;
  26. procedure second_addordinal; override;
  27. procedure second_add64bit;override;
  28. procedure second_cmp64bit;override;
  29. procedure second_mul(unsigned: boolean);
  30. procedure second_mul64bit;
  31. protected
  32. procedure set_mul_result_location;
  33. end;
  34. implementation
  35. uses
  36. globtype,systems,
  37. cutils,verbose,globals,
  38. symconst,symdef,paramgr,defutil,
  39. aasmbase,aasmtai,aasmdata,aasmcpu,
  40. cgbase,procinfo,
  41. ncon,nset,cgutils,tgobj,
  42. cpuinfo,
  43. cga,ncgutil,cgobj,cg64f32,cgx86,
  44. hlcgobj;
  45. {*****************************************************************************
  46. use_generic_mul32to64
  47. *****************************************************************************}
  48. function ti386addnode.use_generic_mul32to64: boolean;
  49. begin
  50. result := False;
  51. end;
  52. function ti386addnode.use_generic_mul64bit: boolean;
  53. begin
  54. result:=needoverflowcheck or
  55. (cs_opt_size in current_settings.optimizerswitches);
  56. end;
  57. { handles all unsigned multiplications, and 32->64 bit signed ones.
  58. 32bit-only signed mul is handled by generic codegen }
  59. procedure ti386addnode.second_addordinal;
  60. var
  61. unsigned: boolean;
  62. begin
  63. unsigned:=not(is_signed(left.resultdef)) or
  64. not(is_signed(right.resultdef));
  65. { use IMUL instead of MUL in case overflow checking is off and we're
  66. doing a 32->32-bit multiplication }
  67. if not needoverflowcheck and
  68. not is_64bit(resultdef) then
  69. unsigned:=false;
  70. if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
  71. second_mul(unsigned)
  72. else
  73. inherited second_addordinal;
  74. end;
  75. {*****************************************************************************
  76. Add64bit
  77. *****************************************************************************}
  78. procedure ti386addnode.second_add64bit;
  79. var
  80. op : TOpCG;
  81. op1,op2 : TAsmOp;
  82. opsize : TOpSize;
  83. hregister,
  84. hregister2 : tregister;
  85. hl4 : tasmlabel;
  86. mboverflow,
  87. unsigned:boolean;
  88. r:Tregister;
  89. begin
  90. pass_left_right;
  91. op1:=A_NONE;
  92. op2:=A_NONE;
  93. mboverflow:=false;
  94. opsize:=S_L;
  95. unsigned:=((left.resultdef.typ=orddef) and
  96. (torddef(left.resultdef).ordtype=u64bit)) or
  97. ((right.resultdef.typ=orddef) and
  98. (torddef(right.resultdef).ordtype=u64bit));
  99. case nodetype of
  100. addn :
  101. begin
  102. op:=OP_ADD;
  103. mboverflow:=true;
  104. end;
  105. subn :
  106. begin
  107. op:=OP_SUB;
  108. op1:=A_SUB;
  109. op2:=A_SBB;
  110. mboverflow:=true;
  111. end;
  112. xorn:
  113. op:=OP_XOR;
  114. orn:
  115. op:=OP_OR;
  116. andn:
  117. op:=OP_AND;
  118. muln:
  119. begin
  120. second_mul64bit;
  121. exit;
  122. end
  123. else
  124. begin
  125. { everything should be handled in pass_1 (JM) }
  126. internalerror(2001090505);
  127. end;
  128. end;
  129. { left and right no register? }
  130. { then one must be demanded }
  131. if (left.location.loc<>LOC_REGISTER) then
  132. begin
  133. if (right.location.loc<>LOC_REGISTER) then
  134. begin
  135. hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  136. hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  137. cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
  138. location_reset(left.location,LOC_REGISTER,left.location.size);
  139. left.location.register64.reglo:=hregister;
  140. left.location.register64.reghi:=hregister2;
  141. end
  142. else
  143. begin
  144. location_swap(left.location,right.location);
  145. toggleflag(nf_swapped);
  146. end;
  147. end;
  148. { at this point, left.location.loc should be LOC_REGISTER }
  149. if right.location.loc=LOC_REGISTER then
  150. begin
  151. { when swapped another result register }
  152. if (nodetype=subn) and (nf_swapped in flags) then
  153. begin
  154. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  155. left.location.register64,
  156. right.location.register64);
  157. location_swap(left.location,right.location);
  158. toggleflag(nf_swapped);
  159. end
  160. else
  161. begin
  162. cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  163. right.location.register64,
  164. left.location.register64);
  165. end;
  166. end
  167. else
  168. begin
  169. { right.location<>LOC_REGISTER }
  170. if (nodetype=subn) and (nf_swapped in flags) then
  171. begin
  172. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  173. cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  174. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  175. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  176. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  177. cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
  178. { the carry flag is still ok }
  179. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  180. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  181. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  182. end
  183. else
  184. begin
  185. cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
  186. left.location.register64);
  187. end;
  188. location_freetemp(current_asmdata.CurrAsmList,right.location);
  189. end;
  190. { only in case of overflow operations }
  191. { produce overflow code }
  192. { we must put it here directly, because sign of operation }
  193. { is in unsigned VAR!! }
  194. if mboverflow then
  195. begin
  196. if needoverflowcheck then
  197. begin
  198. current_asmdata.getjumplabel(hl4);
  199. if unsigned then
  200. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  201. else
  202. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  203. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  204. cg.a_label(current_asmdata.CurrAsmList,hl4);
  205. end;
  206. end;
  207. location_copy(location,left.location);
  208. end;
  209. procedure ti386addnode.second_cmp64bit;
  210. var
  211. truelabel,
  212. falselabel,
  213. hlab : tasmlabel;
  214. href : treference;
  215. unsigned : boolean;
  216. procedure firstjmp64bitcmp;
  217. var
  218. oldnodetype : tnodetype;
  219. begin
  220. {$ifdef OLDREGVARS}
  221. load_all_regvars(current_asmdata.CurrAsmList);
  222. {$endif OLDREGVARS}
  223. { the jump the sequence is a little bit hairy }
  224. case nodetype of
  225. ltn,gtn:
  226. begin
  227. if (hlab<>location.truelabel) then
  228. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
  229. { cheat a little bit for the negative test }
  230. toggleflag(nf_swapped);
  231. if (hlab<>location.falselabel) then
  232. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
  233. toggleflag(nf_swapped);
  234. end;
  235. lten,gten:
  236. begin
  237. oldnodetype:=nodetype;
  238. if nodetype=lten then
  239. nodetype:=ltn
  240. else
  241. nodetype:=gtn;
  242. if (hlab<>location.truelabel) then
  243. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
  244. { cheat for the negative test }
  245. if nodetype=ltn then
  246. nodetype:=gtn
  247. else
  248. nodetype:=ltn;
  249. if (hlab<>location.falselabel) then
  250. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
  251. nodetype:=oldnodetype;
  252. end;
  253. equaln:
  254. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
  255. unequaln:
  256. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
  257. else
  258. internalerror(2019050905);
  259. end;
  260. end;
  261. procedure secondjmp64bitcmp;
  262. begin
  263. { the jump the sequence is a little bit hairy }
  264. case nodetype of
  265. ltn,gtn,lten,gten:
  266. begin
  267. { the comparisaion of the low dword have to be }
  268. { always unsigned! }
  269. cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),location.truelabel);
  270. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  271. end;
  272. equaln:
  273. begin
  274. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
  275. cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
  276. end;
  277. unequaln:
  278. begin
  279. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
  280. cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
  281. end;
  282. else
  283. internalerror(2019050904);
  284. end;
  285. end;
  286. begin
  287. truelabel:=nil;
  288. falselabel:=nil;
  289. pass_left_right;
  290. unsigned:=((left.resultdef.typ=orddef) and
  291. (torddef(left.resultdef).ordtype=u64bit)) or
  292. ((right.resultdef.typ=orddef) and
  293. (torddef(right.resultdef).ordtype=u64bit));
  294. { we have LOC_JUMP as result }
  295. current_asmdata.getjumplabel(truelabel);
  296. current_asmdata.getjumplabel(falselabel);
  297. location_reset_jump(location,truelabel,falselabel);
  298. { Relational compares against constants having low dword=0 can omit the
  299. second compare based on the fact that any unsigned value is >=0 }
  300. hlab:=nil;
  301. if (right.location.loc=LOC_CONSTANT) and
  302. (lo(right.location.value64)=0) then
  303. begin
  304. case getresflags(true) of
  305. F_AE: hlab:=location.truelabel ;
  306. F_B: hlab:=location.falselabel;
  307. else
  308. ;
  309. end;
  310. end;
  311. if (right.location.loc=LOC_CONSTANT) and
  312. (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  313. begin
  314. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  315. href:=left.location.reference;
  316. inc(href.offset,4);
  317. emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
  318. firstjmp64bitcmp;
  319. if assigned(hlab) then
  320. cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
  321. else
  322. begin
  323. emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
  324. secondjmp64bitcmp;
  325. end;
  326. location_freetemp(current_asmdata.CurrAsmList,left.location);
  327. exit;
  328. end;
  329. { left and right no register? }
  330. { then one must be demanded }
  331. if not (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  332. begin
  333. if not (right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  334. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true)
  335. else
  336. begin
  337. location_swap(left.location,right.location);
  338. toggleflag(nf_swapped);
  339. end;
  340. end;
  341. { at this point, left.location.loc should be LOC_[C]REGISTER }
  342. case right.location.loc of
  343. LOC_REGISTER,
  344. LOC_CREGISTER :
  345. begin
  346. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  347. firstjmp64bitcmp;
  348. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  349. secondjmp64bitcmp;
  350. end;
  351. LOC_CREFERENCE,
  352. LOC_REFERENCE :
  353. begin
  354. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  355. href:=right.location.reference;
  356. inc(href.offset,4);
  357. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  358. firstjmp64bitcmp;
  359. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  360. secondjmp64bitcmp;
  361. location_freetemp(current_asmdata.CurrAsmList,right.location);
  362. end;
  363. LOC_CONSTANT :
  364. begin
  365. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  366. firstjmp64bitcmp;
  367. if assigned(hlab) then
  368. cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
  369. else
  370. begin
  371. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  372. secondjmp64bitcmp;
  373. end;
  374. end;
  375. else
  376. internalerror(2002032803);
  377. end;
  378. end;
  379. {*****************************************************************************
  380. x86 MUL
  381. *****************************************************************************}
  382. procedure ti386addnode.set_mul_result_location;
  383. begin
  384. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  385. {Free EAX,EDX}
  386. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  387. if is_64bit(resultdef) then
  388. begin
  389. {Allocate a couple of registers and store EDX:EAX into it}
  390. location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  391. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
  392. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  393. location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  394. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
  395. end
  396. else
  397. begin
  398. {Allocate a new register and store the result in EAX in it.}
  399. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  400. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  401. cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
  402. end;
  403. location_freetemp(current_asmdata.CurrAsmList,left.location);
  404. location_freetemp(current_asmdata.CurrAsmList,right.location);
  405. end;
  406. procedure ti386addnode.second_mul(unsigned: boolean);
  407. var reg,reghi,reglo:Tregister;
  408. ref:Treference;
  409. use_ref:boolean;
  410. hl4 : tasmlabel;
  411. const
  412. asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
  413. begin
  414. pass_left_right;
  415. reg:=NR_NO;
  416. reference_reset(ref,sizeof(pint),[]);
  417. { Mul supports registers and references, so if not register/reference,
  418. load the location into a register.
  419. The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
  420. use_ref:=false;
  421. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  422. reg:=left.location.register
  423. else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  424. begin
  425. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  426. ref:=left.location.reference;
  427. use_ref:=true;
  428. end
  429. else
  430. begin
  431. { LOC_CONSTANT for example.}
  432. reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  433. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
  434. end;
  435. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and
  436. (not(needoverflowcheck) or
  437. { 32->64 bit cannot overflow }
  438. is_64bit(resultdef)) then
  439. begin
  440. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  441. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EDX);
  442. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  443. reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  444. reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  445. if use_ref then
  446. current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(A_MULX,S_L,ref,reglo,reghi))
  447. else
  448. emit_reg_reg_reg(A_MULX,S_L,reg,reglo,reghi);
  449. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  450. location.register64.reglo:=reglo;
  451. if is_64bit(resultdef) then
  452. location.register64.reghi:=reghi;
  453. location_freetemp(current_asmdata.CurrAsmList,left.location);
  454. location_freetemp(current_asmdata.CurrAsmList,right.location);
  455. end
  456. else
  457. begin
  458. { Allocate EAX. }
  459. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
  460. { Load the right value. }
  461. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
  462. { Also allocate EDX, since it is also modified by a mul (JM). }
  463. cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
  464. if use_ref then
  465. emit_ref(asmops[unsigned],S_L,ref)
  466. else
  467. emit_reg(asmops[unsigned],S_L,reg);
  468. if needoverflowcheck and
  469. { 32->64 bit cannot overflow }
  470. (not is_64bit(resultdef)) then
  471. begin
  472. current_asmdata.getjumplabel(hl4);
  473. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
  474. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  475. cg.a_label(current_asmdata.CurrAsmList,hl4);
  476. end;
  477. set_mul_result_location;
  478. end;
  479. end;
  480. procedure ti386addnode.second_mul64bit;
  481. var
  482. list: TAsmList;
  483. hreg1,hreg2: tregister;
  484. begin
  485. { 64x64 multiplication yields 128-bit result, but we're only
  486. interested in its lower 64 bits. This lower part is independent
  487. of operand signs, and so is the generated code. }
  488. { pass_left_right already called from second_add64bit }
  489. list:=current_asmdata.CurrAsmList;
  490. if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  491. tcgx86(cg).make_simple_ref(list,left.location.reference);
  492. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  493. tcgx86(cg).make_simple_ref(list,right.location.reference);
  494. { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
  495. if (right.location.loc=LOC_CONSTANT) then
  496. begin
  497. { Omit zero terms, if any }
  498. hreg1:=NR_NO;
  499. hreg2:=NR_NO;
  500. if lo(right.location.value64)<>0 then
  501. hreg1:=cg.getintregister(list,OS_INT);
  502. if hi(right.location.value64)<>0 then
  503. hreg2:=cg.getintregister(list,OS_INT);
  504. { Take advantage of 3-operand form of IMUL }
  505. case left.location.loc of
  506. LOC_REGISTER,LOC_CREGISTER:
  507. begin
  508. if hreg1<>NR_NO then
  509. emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
  510. if hreg2<>NR_NO then
  511. emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
  512. end;
  513. LOC_REFERENCE,LOC_CREFERENCE:
  514. begin
  515. if hreg2<>NR_NO then
  516. list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
  517. inc(left.location.reference.offset,4);
  518. if hreg1<>NR_NO then
  519. list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
  520. dec(left.location.reference.offset,4);
  521. end;
  522. else
  523. InternalError(2014011602);
  524. end;
  525. end
  526. else
  527. begin
  528. hreg1:=cg.getintregister(list,OS_INT);
  529. hreg2:=cg.getintregister(list,OS_INT);
  530. cg64.a_load64low_loc_reg(list,left.location,hreg1);
  531. cg64.a_load64high_loc_reg(list,left.location,hreg2);
  532. case right.location.loc of
  533. LOC_REGISTER,LOC_CREGISTER:
  534. begin
  535. emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
  536. emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
  537. end;
  538. LOC_REFERENCE,LOC_CREFERENCE:
  539. begin
  540. emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
  541. inc(right.location.reference.offset,4);
  542. emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
  543. dec(right.location.reference.offset,4);
  544. end;
  545. else
  546. InternalError(2014011603);
  547. end;
  548. end;
  549. { add hi*lo and lo*hi terms together }
  550. if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
  551. emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
  552. { load lo(right) into EAX }
  553. cg.getcpuregister(list,NR_EAX);
  554. cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
  555. { multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
  556. cg.getcpuregister(list,NR_EDX);
  557. if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
  558. emit_reg(A_MUL,S_L,left.location.register64.reglo)
  559. else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  560. emit_ref(A_MUL,S_L,left.location.reference)
  561. else
  562. InternalError(2014011604);
  563. { add previously calculated terms to the high half }
  564. if (hreg1<>NR_NO) then
  565. emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
  566. else if (hreg2<>NR_NO) then
  567. emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
  568. else
  569. InternalError(2014011601);
  570. { Result is now in EDX:EAX. Copy it to virtual registers. }
  571. set_mul_result_location;
  572. end;
  573. begin
  574. caddnode:=ti386addnode;
  575. end.