n386mat.pas 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate i386 assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386mat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat;
  23. type
  24. ti386moddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. ti386shlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. { everything will be handled in pass_2 }
  30. function first_shlshr64bitint: tnode; override;
  31. end;
  32. ti386unaryminusnode = class(tunaryminusnode)
  33. function pass_1 : tnode;override;
  34. procedure pass_2;override;
  35. end;
  36. ti386notnode = class(tnotnode)
  37. procedure pass_2;override;
  38. end;
  39. implementation
  40. uses
  41. globtype,systems,
  42. cutils,verbose,globals,
  43. symconst,symdef,aasmbase,aasmtai,aasmcpu,defbase,
  44. cginfo,cgbase,pass_1,pass_2,
  45. ncon,
  46. cpubase,cpuinfo,
  47. cga,tgobj,ncgutil,cgobj,rgobj,rgcpu;
  48. {*****************************************************************************
  49. TI386MODDIVNODE
  50. *****************************************************************************}
  51. procedure ti386moddivnode.pass_2;
  52. var
  53. hreg1 : tregister;
  54. hreg2 : tregister;
  55. shrdiv,popeax,popedx : boolean;
  56. power : longint;
  57. hl : tasmlabel;
  58. pushedregs : tmaybesave;
  59. begin
  60. shrdiv := false;
  61. secondpass(left);
  62. if codegenerror then
  63. exit;
  64. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  65. secondpass(right);
  66. maybe_restore(exprasmlist,left.location,pushedregs);
  67. if codegenerror then
  68. exit;
  69. location_copy(location,left.location);
  70. if is_64bitint(resulttype.def) then
  71. begin
  72. { should be handled in pass_1 (JM) }
  73. internalerror(200109052);
  74. end
  75. else
  76. begin
  77. { put numerator in register }
  78. location_force_reg(exprasmlist,left.location,OS_INT,false);
  79. hreg1:=left.location.register;
  80. if (nodetype=divn) and
  81. (right.nodetype=ordconstn) and
  82. ispowerof2(tordconstnode(right).value,power) then
  83. Begin
  84. shrdiv := true;
  85. { for signed numbers, the numerator must be adjusted before the
  86. shift instruction, but not wih unsigned numbers! Otherwise,
  87. "Cardinal($ffffffff) div 16" overflows! (JM) }
  88. If is_signed(left.resulttype.def) Then
  89. Begin
  90. If (aktOptProcessor <> class386) and
  91. not(CS_LittleSize in aktglobalswitches) then
  92. { use a sequence without jumps, saw this in
  93. comp.compilers (JM) }
  94. begin
  95. { no jumps, but more operations }
  96. if (hreg1 = R_EAX) and
  97. (R_EDX in rg.unusedregsint) then
  98. begin
  99. hreg2 := rg.getexplicitregisterint(exprasmlist,R_EDX);
  100. emit_none(A_CDQ,S_NO);
  101. end
  102. else
  103. begin
  104. rg.getexplicitregisterint(exprasmlist,R_EDI);
  105. hreg2 := R_EDI;
  106. emit_reg_reg(A_MOV,S_L,hreg1,R_EDI);
  107. { if the left value is signed, R_EDI := $ffffffff,
  108. otherwise 0 }
  109. emit_const_reg(A_SAR,S_L,31,R_EDI);
  110. { if signed, R_EDI := right value-1, otherwise 0 }
  111. end;
  112. emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2);
  113. { add to the left value }
  114. emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
  115. { release EDX if we used it }
  116. { also releas EDI }
  117. rg.ungetregisterint(exprasmlist,hreg2);
  118. { do the shift }
  119. emit_const_reg(A_SAR,S_L,power,hreg1);
  120. end
  121. else
  122. begin
  123. { a jump, but less operations }
  124. emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
  125. objectlibrary.getlabel(hl);
  126. emitjmp(C_NS,hl);
  127. if power=1 then
  128. emit_reg(A_INC,S_L,hreg1)
  129. else
  130. emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1);
  131. cg.a_label(exprasmlist,hl);
  132. emit_const_reg(A_SAR,S_L,power,hreg1);
  133. end
  134. End
  135. Else
  136. emit_const_reg(A_SHR,S_L,power,hreg1);
  137. End
  138. else
  139. begin
  140. { bring denominator to EDI }
  141. { EDI is always free, it's }
  142. { only used for temporary }
  143. { purposes }
  144. rg.getexplicitregisterint(exprasmlist,R_EDI);
  145. if right.location.loc<>LOC_CREGISTER then
  146. location_release(exprasmlist,right.location);
  147. cg.a_load_loc_reg(exprasmlist,right.location,R_EDI);
  148. popedx:=false;
  149. popeax:=false;
  150. if hreg1=R_EDX then
  151. begin
  152. if not(R_EAX in rg.unusedregsint) then
  153. begin
  154. emit_reg(A_PUSH,S_L,R_EAX);
  155. popeax:=true;
  156. end
  157. else
  158. rg.getexplicitregisterint(exprasmlist,R_EAX);
  159. emit_reg_reg(A_MOV,S_L,R_EDX,R_EAX);
  160. end
  161. else
  162. begin
  163. if not(R_EDX in rg.unusedregsint) then
  164. begin
  165. emit_reg(A_PUSH,S_L,R_EDX);
  166. popedx:=true;
  167. end
  168. else
  169. rg.getexplicitregisterint(exprasmlist,R_EDX);
  170. if hreg1<>R_EAX then
  171. begin
  172. if not(R_EAX in rg.unusedregsint) then
  173. begin
  174. emit_reg(A_PUSH,S_L,R_EAX);
  175. popeax:=true;
  176. end
  177. else
  178. rg.getexplicitregisterint(exprasmlist,R_EAX);
  179. emit_reg_reg(A_MOV,S_L,hreg1,R_EAX);
  180. end;
  181. end;
  182. { sign extension depends on the left type }
  183. if torddef(left.resulttype.def).typ=u32bit then
  184. emit_reg_reg(A_XOR,S_L,R_EDX,R_EDX)
  185. else
  186. emit_none(A_CDQ,S_NO);
  187. { division depends on the right type }
  188. if torddef(right.resulttype.def).typ=u32bit then
  189. emit_reg(A_DIV,S_L,R_EDI)
  190. else
  191. emit_reg(A_IDIV,S_L,R_EDI);
  192. rg.ungetregisterint(exprasmlist,R_EDI);
  193. if nodetype=divn then
  194. begin
  195. if not popedx and (hreg1 <> R_EDX) then
  196. rg.ungetregister(exprasmlist,R_EDX);
  197. { if result register is busy then copy }
  198. if popeax then
  199. begin
  200. if hreg1=R_EAX then
  201. internalerror(112);
  202. emit_reg_reg(A_MOV,S_L,R_EAX,hreg1)
  203. end
  204. else
  205. if hreg1<>R_EAX then
  206. Begin
  207. rg.ungetregisterint(exprasmlist,hreg1);
  208. { no need to allocate eax, that's already done before }
  209. { the div (JM) }
  210. hreg1 := R_EAX;
  211. end;
  212. end
  213. else
  214. begin
  215. if not popeax and (hreg1 <> R_EAX)then
  216. rg.ungetregister(exprasmlist,R_EAX);
  217. if popedx then
  218. {the mod was done by an (i)div (so the result is now in
  219. edx), but edx was occupied prior to the division, so
  220. move the result into a safe place (JM)}
  221. emit_reg_reg(A_MOV,S_L,R_EDX,hreg1)
  222. else
  223. Begin
  224. if hreg1 <> R_EDX then
  225. rg.ungetregisterint(exprasmlist,hreg1);
  226. hreg1 := R_EDX
  227. End;
  228. end;
  229. if popeax then
  230. emit_reg(A_POP,S_L,R_EAX);
  231. if popedx then
  232. emit_reg(A_POP,S_L,R_EDX);
  233. end;
  234. If not(shrdiv) then
  235. { shrdiv only use hreg1 (which is already in usedinproc, }
  236. { since it was acquired with getregister), the others also }
  237. { use both EAX and EDX (JM) }
  238. Begin
  239. include(rg.usedinproc,R_EAX);
  240. include(rg.usedinproc,R_EDX);
  241. End;
  242. location_reset(location,LOC_REGISTER,OS_INT);
  243. location.register:=hreg1;
  244. end;
  245. end;
  246. {*****************************************************************************
  247. TI386SHLRSHRNODE
  248. *****************************************************************************}
  249. function ti386shlshrnode.first_shlshr64bitint: tnode;
  250. begin
  251. result := nil;
  252. end;
  253. procedure ti386shlshrnode.pass_2;
  254. var
  255. hregister2,hregister3,
  256. hregisterhigh,hregisterlow : tregister;
  257. popecx : boolean;
  258. op : tasmop;
  259. l1,l2,l3 : tasmlabel;
  260. pushedregs : tmaybesave;
  261. begin
  262. popecx:=false;
  263. secondpass(left);
  264. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  265. secondpass(right);
  266. maybe_restore(exprasmlist,left.location,pushedregs);
  267. { determine operator }
  268. case nodetype of
  269. shln: op:=A_SHL;
  270. shrn: op:=A_SHR;
  271. end;
  272. if is_64bitint(left.resulttype.def) then
  273. begin
  274. location_reset(location,LOC_REGISTER,OS_64);
  275. { load left operator in a register }
  276. location_force_reg(exprasmlist,left.location,OS_64,false);
  277. hregisterhigh:=left.location.registerhigh;
  278. hregisterlow:=left.location.registerlow;
  279. { shifting by a constant directly coded: }
  280. if (right.nodetype=ordconstn) then
  281. begin
  282. { shrd/shl works only for values <=31 !! }
  283. if tordconstnode(right).value>31 then
  284. begin
  285. if nodetype=shln then
  286. begin
  287. emit_reg_reg(A_XOR,S_L,hregisterhigh,
  288. hregisterhigh);
  289. if ((tordconstnode(right).value and 31) <> 0) then
  290. emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31,
  291. hregisterlow);
  292. end
  293. else
  294. begin
  295. emit_reg_reg(A_XOR,S_L,hregisterlow,
  296. hregisterlow);
  297. if ((tordconstnode(right).value and 31) <> 0) then
  298. emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31,
  299. hregisterhigh);
  300. end;
  301. location.registerhigh:=hregisterlow;
  302. location.registerlow:=hregisterhigh;
  303. end
  304. else
  305. begin
  306. if nodetype=shln then
  307. begin
  308. emit_const_reg_reg(A_SHLD,S_L,tordconstnode(right).value and 31,
  309. hregisterlow,hregisterhigh);
  310. emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31,
  311. hregisterlow);
  312. end
  313. else
  314. begin
  315. emit_const_reg_reg(A_SHRD,S_L,tordconstnode(right).value and 31,
  316. hregisterhigh,hregisterlow);
  317. emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31,
  318. hregisterhigh);
  319. end;
  320. location.registerlow:=hregisterlow;
  321. location.registerhigh:=hregisterhigh;
  322. end;
  323. end
  324. else
  325. begin
  326. { load right operators in a register }
  327. if right.location.loc<>LOC_REGISTER then
  328. begin
  329. if right.location.loc<>LOC_CREGISTER then
  330. location_release(exprasmlist,right.location);
  331. hregister2:=rg.getexplicitregisterint(exprasmlist,R_ECX);
  332. cg.a_load_loc_reg(exprasmlist,right.location,hregister2);
  333. end
  334. else
  335. hregister2:=right.location.register;
  336. { left operator is already in a register }
  337. { hence are both in a register }
  338. { is it in the case ECX ? }
  339. if (hregisterlow=R_ECX) then
  340. begin
  341. { then only swap }
  342. emit_reg_reg(A_XCHG,S_L,hregisterlow,hregister2);
  343. hregister3:=hregisterlow;
  344. hregisterlow:=hregister2;
  345. hregister2:=hregister3;
  346. end
  347. else if (hregisterhigh=R_ECX) then
  348. begin
  349. { then only swap }
  350. emit_reg_reg(A_XCHG,S_L,hregisterhigh,hregister2);
  351. hregister3:=hregisterhigh;
  352. hregisterhigh:=hregister2;
  353. hregister2:=hregister3;
  354. end
  355. { if second operator not in ECX ? }
  356. else if (hregister2<>R_ECX) then
  357. begin
  358. { ECX occupied then push it }
  359. if not (R_ECX in rg.unusedregsint) then
  360. begin
  361. popecx:=true;
  362. emit_reg(A_PUSH,S_L,R_ECX);
  363. end
  364. else
  365. rg.getexplicitregisterint(exprasmlist,R_ECX);
  366. emit_reg_reg(A_MOV,S_L,hregister2,R_ECX);
  367. end;
  368. if hregister2 <> R_ECX then
  369. rg.ungetregisterint(exprasmlist,hregister2);
  370. { the damned shift instructions work only til a count of 32 }
  371. { so we've to do some tricks here }
  372. if nodetype=shln then
  373. begin
  374. objectlibrary.getlabel(l1);
  375. objectlibrary.getlabel(l2);
  376. objectlibrary.getlabel(l3);
  377. emit_const_reg(A_CMP,S_L,64,R_ECX);
  378. emitjmp(C_L,l1);
  379. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  380. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  381. cg.a_jmp_always(exprasmlist,l3);
  382. cg.a_label(exprasmlist,l1);
  383. emit_const_reg(A_CMP,S_L,32,R_ECX);
  384. emitjmp(C_L,l2);
  385. emit_const_reg(A_SUB,S_L,32,R_ECX);
  386. emit_reg_reg(A_SHL,S_L,R_CL,
  387. hregisterlow);
  388. emit_reg_reg(A_MOV,S_L,hregisterlow,hregisterhigh);
  389. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  390. cg.a_jmp_always(exprasmlist,l3);
  391. cg.a_label(exprasmlist,l2);
  392. emit_reg_reg_reg(A_SHLD,S_L,R_CL,
  393. hregisterlow,hregisterhigh);
  394. emit_reg_reg(A_SHL,S_L,R_CL,
  395. hregisterlow);
  396. cg.a_label(exprasmlist,l3);
  397. end
  398. else
  399. begin
  400. objectlibrary.getlabel(l1);
  401. objectlibrary.getlabel(l2);
  402. objectlibrary.getlabel(l3);
  403. emit_const_reg(A_CMP,S_L,64,R_ECX);
  404. emitjmp(C_L,l1);
  405. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  406. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  407. cg.a_jmp_always(exprasmlist,l3);
  408. cg.a_label(exprasmlist,l1);
  409. emit_const_reg(A_CMP,S_L,32,R_ECX);
  410. emitjmp(C_L,l2);
  411. emit_const_reg(A_SUB,S_L,32,R_ECX);
  412. emit_reg_reg(A_SHR,S_L,R_CL,
  413. hregisterhigh);
  414. emit_reg_reg(A_MOV,S_L,hregisterhigh,hregisterlow);
  415. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  416. cg.a_jmp_always(exprasmlist,l3);
  417. cg.a_label(exprasmlist,l2);
  418. emit_reg_reg_reg(A_SHRD,S_L,R_CL,
  419. hregisterhigh,hregisterlow);
  420. emit_reg_reg(A_SHR,S_L,R_CL,
  421. hregisterhigh);
  422. cg.a_label(exprasmlist,l3);
  423. end;
  424. { maybe put ECX back }
  425. if popecx then
  426. emit_reg(A_POP,S_L,R_ECX)
  427. else
  428. rg.ungetregisterint(exprasmlist,R_ECX);
  429. location.registerlow:=hregisterlow;
  430. location.registerhigh:=hregisterhigh;
  431. end;
  432. end
  433. else
  434. begin
  435. { load left operators in a register }
  436. location_copy(location,left.location);
  437. location_force_reg(exprasmlist,location,OS_INT,false);
  438. { shifting by a constant directly coded: }
  439. if (right.nodetype=ordconstn) then
  440. begin
  441. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  442. if right.value<=31 then
  443. }
  444. emit_const_reg(op,S_L,tordconstnode(right).value and 31,
  445. location.register);
  446. {
  447. else
  448. emit_reg_reg(A_XOR,S_L,hregister1,
  449. hregister1);
  450. }
  451. end
  452. else
  453. begin
  454. { load right operators in a register }
  455. if right.location.loc<>LOC_REGISTER then
  456. begin
  457. if right.location.loc<>LOC_CREGISTER then
  458. location_release(exprasmlist,right.location);
  459. hregister2:=rg.getexplicitregisterint(exprasmlist,R_ECX);
  460. cg.a_load_loc_reg(exprasmlist,right.location,hregister2);
  461. end
  462. else
  463. hregister2:=right.location.register;
  464. { left operator is already in a register }
  465. { hence are both in a register }
  466. { is it in the case ECX ? }
  467. if (location.register=R_ECX) then
  468. begin
  469. { then only swap }
  470. emit_reg_reg(A_XCHG,S_L,location.register,hregister2);
  471. hregister3:=location.register;
  472. location.register:=hregister2;
  473. hregister2:=hregister3;
  474. end
  475. { if second operator not in ECX ? }
  476. else if (hregister2<>R_ECX) then
  477. begin
  478. { ECX occupied then push it }
  479. if not (R_ECX in rg.unusedregsint) then
  480. begin
  481. popecx:=true;
  482. emit_reg(A_PUSH,S_L,R_ECX);
  483. end
  484. else
  485. rg.getexplicitregisterint(exprasmlist,R_ECX);
  486. emit_reg_reg(A_MOV,S_L,hregister2,R_ECX);
  487. end;
  488. rg.ungetregisterint(exprasmlist,hregister2);
  489. { right operand is in ECX }
  490. emit_reg_reg(op,S_L,R_CL,location.register);
  491. { maybe ECX back }
  492. if popecx then
  493. emit_reg(A_POP,S_L,R_ECX)
  494. else
  495. rg.ungetregisterint(exprasmlist,R_ECX);
  496. end;
  497. end;
  498. end;
  499. {*****************************************************************************
  500. TI386UNARYMINUSNODE
  501. *****************************************************************************}
  502. function ti386unaryminusnode.pass_1 : tnode;
  503. begin
  504. result:=nil;
  505. firstpass(left);
  506. if codegenerror then
  507. exit;
  508. registers32:=left.registers32;
  509. registersfpu:=left.registersfpu;
  510. {$ifdef SUPPORT_MMX}
  511. registersmmx:=left.registersmmx;
  512. {$endif SUPPORT_MMX}
  513. if (left.resulttype.def.deftype=floatdef) then
  514. begin
  515. if (registersfpu < 1) then
  516. registersfpu := 1;
  517. location.loc:=LOC_FPUREGISTER;
  518. end
  519. {$ifdef SUPPORT_MMX}
  520. else if (cs_mmx in aktlocalswitches) and
  521. is_mmx_able_array(left.resulttype.def) then
  522. begin
  523. if (left.location.loc<>LOC_MMXREGISTER) and
  524. (registersmmx<1) then
  525. registersmmx:=1;
  526. end
  527. {$endif SUPPORT_MMX}
  528. else if is_64bitint(left.resulttype.def) then
  529. begin
  530. if (left.location.loc<>LOC_REGISTER) and
  531. (registers32<2) then
  532. registers32:=2;
  533. location.loc:=LOC_REGISTER;
  534. end
  535. else if (left.resulttype.def.deftype=orddef) then
  536. begin
  537. if (left.location.loc<>LOC_REGISTER) and
  538. (registers32<1) then
  539. registers32:=1;
  540. location.loc:=LOC_REGISTER;
  541. end;
  542. end;
  543. procedure ti386unaryminusnode.pass_2;
  544. {$ifdef SUPPORT_MMX}
  545. procedure do_mmx_neg;
  546. var
  547. op : tasmop;
  548. begin
  549. location_reset(location,LOC_MMXREGISTER,OS_NO);
  550. if cs_mmx_saturation in aktlocalswitches then
  551. case mmx_type(resulttype.def) of
  552. mmxs8bit:
  553. op:=A_PSUBSB;
  554. mmxu8bit:
  555. op:=A_PSUBUSB;
  556. mmxs16bit,mmxfixed16:
  557. op:=A_PSUBSW;
  558. mmxu16bit:
  559. op:=A_PSUBUSW;
  560. end
  561. else
  562. case mmx_type(resulttype.def) of
  563. mmxs8bit,mmxu8bit:
  564. op:=A_PSUBB;
  565. mmxs16bit,mmxu16bit,mmxfixed16:
  566. op:=A_PSUBW;
  567. mmxs32bit,mmxu32bit:
  568. op:=A_PSUBD;
  569. end;
  570. emit_reg_reg(op,S_NO,location.register,R_MM7);
  571. emit_reg_reg(A_MOVQ,S_NO,R_MM7,location.register);
  572. end;
  573. {$endif}
  574. begin
  575. if is_64bitint(left.resulttype.def) then
  576. begin
  577. secondpass(left);
  578. { load left operator in a register }
  579. location_copy(location,left.location);
  580. location_force_reg(exprasmlist,location,OS_64,false);
  581. emit_reg(A_NOT,S_L,location.registerhigh);
  582. emit_reg(A_NEG,S_L,location.registerlow);
  583. emit_const_reg(A_SBB,S_L,-1,location.registerhigh);
  584. end
  585. else
  586. begin
  587. secondpass(left);
  588. location_reset(location,LOC_REGISTER,OS_INT);
  589. case left.location.loc of
  590. LOC_REGISTER:
  591. begin
  592. location.register:=left.location.register;
  593. emit_reg(A_NEG,S_L,location.register);
  594. end;
  595. LOC_CREGISTER:
  596. begin
  597. location.register:=rg.getregisterint(exprasmlist);
  598. emit_reg_reg(A_MOV,S_L,left.location.register,
  599. location.register);
  600. emit_reg(A_NEG,S_L,location.register);
  601. end;
  602. {$ifdef SUPPORT_MMX}
  603. LOC_MMXREGISTER:
  604. begin
  605. location_copy(location,left.location);
  606. emit_reg_reg(A_PXOR,S_NO,R_MM7,R_MM7);
  607. do_mmx_neg;
  608. end;
  609. LOC_CMMXREGISTER:
  610. begin
  611. location.register:=rg.getregistermm(exprasmlist);
  612. emit_reg_reg(A_PXOR,S_NO,R_MM7,R_MM7);
  613. emit_reg_reg(A_MOVQ,S_NO,left.location.register,
  614. location.register);
  615. do_mmx_neg;
  616. end;
  617. {$endif SUPPORT_MMX}
  618. LOC_REFERENCE,
  619. LOC_CREFERENCE:
  620. begin
  621. reference_release(exprasmlist,left.location.reference);
  622. if (left.resulttype.def.deftype=floatdef) then
  623. begin
  624. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  625. location.register:=R_ST;
  626. cg.a_loadfpu_ref_reg(exprasmlist,
  627. def_cgsize(left.resulttype.def),
  628. left.location.reference,R_ST);
  629. emit_none(A_FCHS,S_NO);
  630. end
  631. {$ifdef SUPPORT_MMX}
  632. else if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  633. begin
  634. location.register:=rg.getregistermm(exprasmlist);
  635. emit_reg_reg(A_PXOR,S_NO,R_MM7,R_MM7);
  636. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  637. do_mmx_neg;
  638. end
  639. {$endif SUPPORT_MMX}
  640. else
  641. begin
  642. location.register:=rg.getregisterint(exprasmlist);
  643. emit_ref_reg(A_MOV,S_L,left.location.reference,location.register);
  644. emit_reg(A_NEG,S_L,location.register);
  645. end;
  646. end;
  647. LOC_FPUREGISTER,LOC_CFPUREGISTER:
  648. begin
  649. { "load st,st" is ignored by the code generator }
  650. cg.a_loadfpu_reg_reg(exprasmlist,left.location.register,R_ST);
  651. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  652. location.register:=R_ST;
  653. emit_none(A_FCHS,S_NO);
  654. end;
  655. else
  656. internalerror(200203225);
  657. end;
  658. end;
  659. { Here was a problem... }
  660. { Operand to be negated always }
  661. { seems to be converted to signed }
  662. { 32-bit before doing neg!! }
  663. { So this is useless... }
  664. { that's not true: -2^31 gives an overflow error if it is negaded (FK) }
  665. { emitoverflowcheck(p);}
  666. end;
  667. {*****************************************************************************
  668. TI386NOTNODE
  669. *****************************************************************************}
  670. procedure ti386notnode.pass_2;
  671. const
  672. flagsinvers : array[F_E..F_BE] of tresflags =
  673. (F_NE,F_E,F_LE,F_GE,F_L,F_G,F_NC,F_C,
  674. F_BE,F_B,F_AE,F_A);
  675. var
  676. hl : tasmlabel;
  677. opsize : topsize;
  678. begin
  679. if is_boolean(resulttype.def) then
  680. begin
  681. opsize:=def_opsize(resulttype.def);
  682. { the second pass could change the location of left }
  683. { if it is a register variable, so we've to do }
  684. { this before the case statement }
  685. if left.location.loc<>LOC_JUMP then
  686. secondpass(left);
  687. case left.location.loc of
  688. LOC_JUMP :
  689. begin
  690. location_reset(location,LOC_JUMP,OS_NO);
  691. hl:=truelabel;
  692. truelabel:=falselabel;
  693. falselabel:=hl;
  694. secondpass(left);
  695. maketojumpbool(exprasmlist,left,lr_load_regvars);
  696. hl:=truelabel;
  697. truelabel:=falselabel;
  698. falselabel:=hl;
  699. end;
  700. LOC_FLAGS :
  701. begin
  702. location_release(exprasmlist,left.location);
  703. location_reset(location,LOC_FLAGS,OS_NO);
  704. location.resflags:=flagsinvers[left.location.resflags];
  705. end;
  706. LOC_CONSTANT,
  707. LOC_REGISTER,
  708. LOC_CREGISTER,
  709. LOC_REFERENCE,
  710. LOC_CREFERENCE :
  711. begin
  712. location_force_reg(exprasmlist,left.location,def_cgsize(resulttype.def),true);
  713. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  714. location_release(exprasmlist,left.location);
  715. location_reset(location,LOC_FLAGS,OS_NO);
  716. location.resflags:=F_E;
  717. end;
  718. else
  719. internalerror(200203224);
  720. end;
  721. end
  722. {$ifdef SUPPORT_MMX}
  723. else
  724. if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  725. begin
  726. secondpass(left);
  727. location_reset(location,LOC_MMXREGISTER,OS_NO);
  728. { prepare EDI }
  729. rg.getexplicitregisterint(exprasmlist,R_EDI);
  730. emit_const_reg(A_MOV,S_L,longint($ffffffff),R_EDI);
  731. { load operand }
  732. case left.location.loc of
  733. LOC_MMXREGISTER:
  734. location_copy(location,left.location);
  735. LOC_CMMXREGISTER:
  736. begin
  737. location.register:=rg.getregistermm(exprasmlist);
  738. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  739. end;
  740. LOC_REFERENCE,
  741. LOC_CREFERENCE:
  742. begin
  743. location_release(exprasmlist,left.location);
  744. location.register:=rg.getregistermm(exprasmlist);
  745. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  746. end;
  747. end;
  748. { load mask }
  749. emit_reg_reg(A_MOVD,S_NO,R_EDI,R_MM7);
  750. rg.ungetregisterint(exprasmlist,R_EDI);
  751. { lower 32 bit }
  752. emit_reg_reg(A_PXOR,S_D,R_MM7,location.register);
  753. { shift mask }
  754. emit_const_reg(A_PSLLQ,S_NO,32,R_MM7);
  755. { higher 32 bit }
  756. emit_reg_reg(A_PXOR,S_D,R_MM7,location.register);
  757. end
  758. {$endif SUPPORT_MMX}
  759. else if is_64bitint(left.resulttype.def) then
  760. begin
  761. secondpass(left);
  762. location_copy(location,left.location);
  763. location_force_reg(exprasmlist,location,OS_64,false);
  764. emit_reg(A_NOT,S_L,location.registerlow);
  765. emit_reg(A_NOT,S_L,location.registerhigh);
  766. end
  767. else
  768. begin
  769. secondpass(left);
  770. location_copy(location,left.location);
  771. location_force_reg(exprasmlist,location,def_cgsize(resulttype.def),false);
  772. opsize:=def_opsize(resulttype.def);
  773. emit_reg(A_NOT,opsize,location.register);
  774. end;
  775. end;
  776. begin
  777. cmoddivnode:=ti386moddivnode;
  778. cshlshrnode:=ti386shlshrnode;
  779. cunaryminusnode:=ti386unaryminusnode;
  780. cnotnode:=ti386notnode;
  781. end.
  782. {
  783. $Log$
  784. Revision 1.40 2002-09-07 15:25:10 peter
  785. * old logs removed and tabs fixed
  786. Revision 1.39 2002/08/15 15:15:55 carl
  787. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  788. * more generic nodes for maths
  789. * several fixes for better m68k support
  790. Revision 1.38 2002/08/14 19:18:16 carl
  791. * bugfix of unaryminus node with left LOC_CREGISTER
  792. Revision 1.37 2002/08/12 15:08:42 carl
  793. + stab register indexes for powerpc (moved from gdb to cpubase)
  794. + tprocessor enumeration moved to cpuinfo
  795. + linker in target_info is now a class
  796. * many many updates for m68k (will soon start to compile)
  797. - removed some ifdef or correct them for correct cpu
  798. Revision 1.36 2002/08/11 14:32:30 peter
  799. * renamed current_library to objectlibrary
  800. Revision 1.35 2002/08/11 13:24:17 peter
  801. * saving of asmsymbols in ppu supported
  802. * asmsymbollist global is removed and moved into a new class
  803. tasmlibrarydata that will hold the info of a .a file which
  804. corresponds with a single module. Added librarydata to tmodule
  805. to keep the library info stored for the module. In the future the
  806. objectfiles will also be stored to the tasmlibrarydata class
  807. * all getlabel/newasmsymbol and friends are moved to the new class
  808. Revision 1.34 2002/08/02 07:44:31 jonas
  809. * made assigned() handling generic
  810. * add nodes now can also evaluate constant expressions at compile time
  811. that contain nil nodes
  812. Revision 1.33 2002/07/20 11:58:02 florian
  813. * types.pas renamed to defbase.pas because D6 contains a types
  814. unit so this would conflicts if D6 programms are compiled
  815. + Willamette/SSE2 instructions to assembler added
  816. Revision 1.32 2002/07/01 18:46:33 peter
  817. * internal linker
  818. * reorganized aasm layer
  819. Revision 1.31 2002/05/18 13:34:25 peter
  820. * readded missing revisions
  821. Revision 1.30 2002/05/16 19:46:51 carl
  822. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  823. + try to fix temp allocation (still in ifdef)
  824. + generic constructor calls
  825. + start of tassembler / tmodulebase class cleanup
  826. Revision 1.28 2002/05/13 19:54:38 peter
  827. * removed n386ld and n386util units
  828. * maybe_save/maybe_restore added instead of the old maybe_push
  829. Revision 1.27 2002/05/12 16:53:17 peter
  830. * moved entry and exitcode to ncgutil and cgobj
  831. * foreach gets extra argument for passing local data to the
  832. iterator function
  833. * -CR checks also class typecasts at runtime by changing them
  834. into as
  835. * fixed compiler to cycle with the -CR option
  836. * fixed stabs with elf writer, finally the global variables can
  837. be watched
  838. * removed a lot of routines from cga unit and replaced them by
  839. calls to cgobj
  840. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  841. u32bit then the other is typecasted also to u32bit without giving
  842. a rangecheck warning/error.
  843. * fixed pascal calling method with reversing also the high tree in
  844. the parast, detected by tcalcst3 test
  845. Revision 1.26 2002/04/04 19:06:12 peter
  846. * removed unused units
  847. * use tlocation.size in cg.a_*loc*() routines
  848. Revision 1.25 2002/04/02 17:11:36 peter
  849. * tlocation,treference update
  850. * LOC_CONSTANT added for better constant handling
  851. * secondadd splitted in multiple routines
  852. * location_force_reg added for loading a location to a register
  853. of a specified size
  854. * secondassignment parses now first the right and then the left node
  855. (this is compatible with Kylix). This saves a lot of push/pop especially
  856. with string operations
  857. * adapted some routines to use the new cg methods
  858. Revision 1.24 2002/03/31 20:26:39 jonas
  859. + a_loadfpu_* and a_loadmm_* methods in tcg
  860. * register allocation is now handled by a class and is mostly processor
  861. independent (+rgobj.pas and i386/rgcpu.pas)
  862. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  863. * some small improvements and fixes to the optimizer
  864. * some register allocation fixes
  865. * some fpuvaroffset fixes in the unary minus node
  866. * push/popusedregisters is now called rg.save/restoreusedregisters and
  867. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  868. also better optimizable)
  869. * fixed and optimized register saving/restoring for new/dispose nodes
  870. * LOC_FPU locations now also require their "register" field to be set to
  871. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  872. - list field removed of the tnode class because it's not used currently
  873. and can cause hard-to-find bugs
  874. Revision 1.23 2002/03/04 19:10:14 peter
  875. * removed compiler warnings
  876. }