n386mat.pas 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate i386 assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386mat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,ncgmat,nx86mat;
  23. type
  24. ti386moddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. ti386shlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. { everything will be handled in pass_2 }
  30. function first_shlshr64bitint: tnode; override;
  31. end;
  32. ti386unaryminusnode = class(tx86unaryminusnode)
  33. end;
  34. ti386notnode = class(tx86notnode)
  35. end;
  36. implementation
  37. uses
  38. globtype,systems,
  39. cutils,verbose,globals,
  40. symconst,symdef,aasmbase,aasmtai,defutil,
  41. cgbase,pass_1,pass_2,
  42. ncon,
  43. cpubase,cpuinfo,
  44. cga,ncgutil,cgobj;
  45. {*****************************************************************************
  46. TI386MODDIVNODE
  47. *****************************************************************************}
  48. procedure ti386moddivnode.pass_2;
  49. var hreg1,hreg2:Tregister;
  50. power:longint;
  51. hl:Tasmlabel;
  52. op:Tasmop;
  53. begin
  54. secondpass(left);
  55. if codegenerror then
  56. exit;
  57. secondpass(right);
  58. if codegenerror then
  59. exit;
  60. if is_64bitint(resulttype.def) then
  61. { should be handled in pass_1 (JM) }
  62. internalerror(200109052);
  63. { put numerator in register }
  64. location_reset(location,LOC_REGISTER,OS_INT);
  65. location_force_reg(exprasmlist,left.location,OS_INT,false);
  66. hreg1:=left.location.register;
  67. if (nodetype=divn) and (right.nodetype=ordconstn) and
  68. ispowerof2(tordconstnode(right).value,power) then
  69. begin
  70. { for signed numbers, the numerator must be adjusted before the
  71. shift instruction, but not wih unsigned numbers! Otherwise,
  72. "Cardinal($ffffffff) div 16" overflows! (JM) }
  73. if is_signed(left.resulttype.def) Then
  74. begin
  75. if (aktOptProcessor <> class386) and
  76. not(cs_littlesize in aktglobalswitches) then
  77. { use a sequence without jumps, saw this in
  78. comp.compilers (JM) }
  79. begin
  80. { no jumps, but more operations }
  81. hreg2:=cg.getintregister(exprasmlist,OS_INT);
  82. emit_reg_reg(A_MOV,S_L,hreg1,hreg2);
  83. {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
  84. emit_const_reg(A_SAR,S_L,31,hreg2);
  85. {If signed, hreg2=right value-1, otherwise 0.}
  86. emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2);
  87. { add to the left value }
  88. emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
  89. { release EDX if we used it }
  90. cg.ungetregister(exprasmlist,hreg2);
  91. { do the shift }
  92. emit_const_reg(A_SAR,S_L,power,hreg1);
  93. end
  94. else
  95. begin
  96. { a jump, but less operations }
  97. emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
  98. objectlibrary.getlabel(hl);
  99. cg.a_jmp_flags(exprasmlist,F_NS,hl);
  100. if power=1 then
  101. emit_reg(A_INC,S_L,hreg1)
  102. else
  103. emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1);
  104. cg.a_label(exprasmlist,hl);
  105. emit_const_reg(A_SAR,S_L,power,hreg1);
  106. end
  107. end
  108. else
  109. emit_const_reg(A_SHR,S_L,power,hreg1);
  110. location.register:=hreg1;
  111. end
  112. else
  113. begin
  114. {Bring denominator to a register.}
  115. cg.ungetregister(exprasmlist,hreg1);
  116. cg.getexplicitregister(exprasmlist,NR_EAX);
  117. emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
  118. cg.getexplicitregister(exprasmlist,NR_EDX);
  119. {Sign extension depends on the left type.}
  120. if torddef(left.resulttype.def).typ=u32bit then
  121. emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX)
  122. else
  123. emit_none(A_CDQ,S_NO);
  124. {Division depends on the right type.}
  125. if Torddef(right.resulttype.def).typ=u32bit then
  126. op:=A_DIV
  127. else
  128. op:=A_IDIV;
  129. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  130. emit_ref(op,S_L,right.location.reference)
  131. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  132. emit_reg(op,S_L,right.location.register)
  133. else
  134. begin
  135. hreg1:=cg.getintregister(exprasmlist,right.location.size);
  136. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,hreg1);
  137. cg.ungetregister(exprasmlist,hreg1);
  138. emit_reg(op,S_L,hreg1);
  139. end;
  140. location_release(exprasmlist,right.location);
  141. {Copy the result into a new register. Release EAX & EDX.}
  142. if nodetype=divn then
  143. begin
  144. cg.ungetregister(exprasmlist,NR_EDX);
  145. cg.ungetregister(exprasmlist,NR_EAX);
  146. location.register:=cg.getintregister(exprasmlist,OS_INT);
  147. emit_reg_reg(A_MOV,S_L,NR_EAX,location.register);
  148. end
  149. else
  150. begin
  151. cg.ungetregister(exprasmlist,NR_EAX);
  152. cg.ungetregister(exprasmlist,NR_EDX);
  153. location.register:=cg.getintregister(exprasmlist,OS_INT);
  154. emit_reg_reg(A_MOV,S_L,NR_EDX,location.register);
  155. end;
  156. end;
  157. end;
  158. {*****************************************************************************
  159. TI386SHLRSHRNODE
  160. *****************************************************************************}
  161. function ti386shlshrnode.first_shlshr64bitint: tnode;
  162. begin
  163. result := nil;
  164. end;
  165. procedure ti386shlshrnode.pass_2;
  166. var hregisterhigh,hregisterlow:Tregister;
  167. op:Tasmop;
  168. v : TConstExprInt;
  169. l1,l2,l3:Tasmlabel;
  170. begin
  171. secondpass(left);
  172. secondpass(right);
  173. { determine operator }
  174. if nodetype=shln then
  175. op:=A_SHL
  176. else
  177. op:=A_SHR;
  178. if is_64bitint(left.resulttype.def) then
  179. begin
  180. location_reset(location,LOC_REGISTER,OS_64);
  181. { load left operator in a register }
  182. location_force_reg(exprasmlist,left.location,OS_64,false);
  183. hregisterhigh:=left.location.registerhigh;
  184. hregisterlow:=left.location.registerlow;
  185. { shifting by a constant directly coded: }
  186. if (right.nodetype=ordconstn) then
  187. begin
  188. v:=Tordconstnode(right).value and 63;
  189. if v>31 then
  190. begin
  191. if nodetype=shln then
  192. begin
  193. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  194. if ((v and 31) <> 0) then
  195. emit_const_reg(A_SHL,S_L,v and 31,hregisterlow);
  196. end
  197. else
  198. begin
  199. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  200. if ((v and 31) <> 0) then
  201. emit_const_reg(A_SHR,S_L,v and 31,hregisterhigh);
  202. end;
  203. location.registerhigh:=hregisterlow;
  204. location.registerlow:=hregisterhigh;
  205. end
  206. else
  207. begin
  208. if nodetype=shln then
  209. begin
  210. emit_const_reg_reg(A_SHLD,S_L,v and 31,hregisterlow,hregisterhigh);
  211. emit_const_reg(A_SHL,S_L,v and 31,hregisterlow);
  212. end
  213. else
  214. begin
  215. emit_const_reg_reg(A_SHRD,S_L,v and 31,hregisterhigh,hregisterlow);
  216. emit_const_reg(A_SHR,S_L,v and 31,hregisterhigh);
  217. end;
  218. location.registerlow:=hregisterlow;
  219. location.registerhigh:=hregisterhigh;
  220. end;
  221. end
  222. else
  223. begin
  224. { load right operators in a register }
  225. cg.getexplicitregister(exprasmlist,NR_ECX);
  226. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
  227. if right.location.loc<>LOC_CREGISTER then
  228. location_release(exprasmlist,right.location);
  229. { left operator is already in a register }
  230. { hence are both in a register }
  231. { is it in the case ECX ? }
  232. { the damned shift instructions work only til a count of 32 }
  233. { so we've to do some tricks here }
  234. objectlibrary.getlabel(l1);
  235. objectlibrary.getlabel(l2);
  236. objectlibrary.getlabel(l3);
  237. emit_const_reg(A_CMP,S_L,64,NR_ECX);
  238. cg.a_jmp_flags(exprasmlist,F_L,l1);
  239. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  240. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  241. cg.a_jmp_always(exprasmlist,l3);
  242. cg.a_label(exprasmlist,l1);
  243. emit_const_reg(A_CMP,S_L,32,NR_ECX);
  244. cg.a_jmp_flags(exprasmlist,F_L,l2);
  245. emit_const_reg(A_SUB,S_L,32,NR_ECX);
  246. if nodetype=shln then
  247. begin
  248. emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow);
  249. emit_reg_reg(A_MOV,S_L,hregisterlow,hregisterhigh);
  250. emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
  251. cg.a_jmp_always(exprasmlist,l3);
  252. cg.a_label(exprasmlist,l2);
  253. emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hregisterlow,hregisterhigh);
  254. emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow);
  255. end
  256. else
  257. begin
  258. emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh);
  259. emit_reg_reg(A_MOV,S_L,hregisterhigh,hregisterlow);
  260. emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
  261. cg.a_jmp_always(exprasmlist,l3);
  262. cg.a_label(exprasmlist,l2);
  263. emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hregisterhigh,hregisterlow);
  264. emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh);
  265. end;
  266. cg.a_label(exprasmlist,l3);
  267. cg.ungetregister(exprasmlist,NR_ECX);
  268. location.registerlow:=hregisterlow;
  269. location.registerhigh:=hregisterhigh;
  270. end;
  271. end
  272. else
  273. begin
  274. { load left operators in a register }
  275. location_copy(location,left.location);
  276. location_force_reg(exprasmlist,location,OS_INT,false);
  277. { shifting by a constant directly coded: }
  278. if (right.nodetype=ordconstn) then
  279. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
  280. emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
  281. else
  282. begin
  283. { load right operators in a ECX }
  284. if right.location.loc<>LOC_CREGISTER then
  285. location_release(exprasmlist,right.location);
  286. cg.getexplicitregister(exprasmlist,NR_ECX);
  287. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
  288. { right operand is in ECX }
  289. cg.ungetregister(exprasmlist,NR_ECX);
  290. emit_reg_reg(op,S_L,NR_CL,location.register);
  291. end;
  292. end;
  293. end;
  294. begin
  295. cunaryminusnode:=ti386unaryminusnode;
  296. cmoddivnode:=ti386moddivnode;
  297. cshlshrnode:=ti386shlshrnode;
  298. cnotnode:=ti386notnode;
  299. end.
  300. {
  301. $Log$
  302. Revision 1.70 2004-05-23 14:10:17 peter
  303. * fix shl/shr with value > 63
  304. Revision 1.69 2004/01/20 12:59:37 florian
  305. * common addnode code for x86-64 and i386
  306. Revision 1.68 2003/12/26 13:19:16 florian
  307. * rtl and compiler compile with -Cfsse2
  308. Revision 1.67 2003/12/25 01:07:09 florian
  309. + $fputype directive support
  310. + single data type operations with sse unit
  311. * fixed more x86-64 stuff
  312. Revision 1.66 2003/12/10 17:28:41 peter
  313. * int64 shl/shr > 63 returns 0
  314. Revision 1.65 2003/10/10 17:48:14 peter
  315. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  316. * tregisteralloctor renamed to trgobj
  317. * removed rgobj from a lot of units
  318. * moved location_* and reference_* to cgobj
  319. * first things for mmx register allocation
  320. Revision 1.64 2003/10/09 21:31:37 daniel
  321. * Register allocator splitted, ans abstract now
  322. Revision 1.63 2003/10/01 20:34:49 peter
  323. * procinfo unit contains tprocinfo
  324. * cginfo renamed to cgbase
  325. * moved cgmessage to verbose
  326. * fixed ppc and sparc compiles
  327. Revision 1.62 2003/09/29 20:58:56 peter
  328. * optimized releasing of registers
  329. Revision 1.61 2003/09/28 21:48:20 peter
  330. * fix register leaks
  331. Revision 1.60 2003/09/03 15:55:01 peter
  332. * NEWRA branch merged
  333. Revision 1.59.2.2 2003/08/31 13:50:16 daniel
  334. * Remove sorting and use pregenerated indexes
  335. * Some work on making things compile
  336. Revision 1.59.2.1 2003/08/29 17:29:00 peter
  337. * next batch of updates
  338. Revision 1.59 2003/07/02 22:18:04 peter
  339. * paraloc splitted in callerparaloc,calleeparaloc
  340. * sparc calling convention updates
  341. Revision 1.58 2003/06/13 21:19:31 peter
  342. * current_procdef removed, use current_procinfo.procdef instead
  343. Revision 1.57 2003/06/03 21:11:09 peter
  344. * cg.a_load_* get a from and to size specifier
  345. * makeregsize only accepts newregister
  346. * i386 uses generic tcgnotnode,tcgunaryminus
  347. Revision 1.56 2003/06/03 13:01:59 daniel
  348. * Register allocator finished
  349. Revision 1.55 2003/05/31 15:04:31 peter
  350. * load_loc_reg update
  351. Revision 1.54 2003/05/22 21:32:29 peter
  352. * removed some unit dependencies
  353. Revision 1.53 2003/04/22 23:50:23 peter
  354. * firstpass uses expectloc
  355. * checks if there are differences between the expectloc and
  356. location.loc from secondpass in EXTDEBUG
  357. Revision 1.52 2003/04/22 14:33:38 peter
  358. * removed some notes/hints
  359. Revision 1.51 2003/04/22 10:09:35 daniel
  360. + Implemented the actual register allocator
  361. + Scratch registers unavailable when new register allocator used
  362. + maybe_save/maybe_restore unavailable when new register allocator used
  363. Revision 1.50 2003/04/21 19:15:26 peter
  364. * when ecx is not available allocated another register
  365. Revision 1.49 2003/04/17 10:02:48 daniel
  366. * Tweaked register allocate/deallocate positition to less interferences
  367. are generated.
  368. Revision 1.48 2003/03/28 19:16:57 peter
  369. * generic constructor working for i386
  370. * remove fixed self register
  371. * esi added as address register for i386
  372. Revision 1.47 2003/03/08 20:36:41 daniel
  373. + Added newra version of Ti386shlshrnode
  374. + Added interference graph construction code
  375. Revision 1.46 2003/03/08 13:59:17 daniel
  376. * Work to handle new register notation in ag386nsm
  377. + Added newra version of Ti386moddivnode
  378. Revision 1.45 2003/02/19 22:00:15 daniel
  379. * Code generator converted to new register notation
  380. - Horribily outdated todo.txt removed
  381. Revision 1.44 2003/01/13 18:37:44 daniel
  382. * Work on register conversion
  383. Revision 1.43 2003/01/13 14:54:34 daniel
  384. * Further work to convert codegenerator register convention;
  385. internalerror bug fixed.
  386. Revision 1.42 2003/01/08 18:43:57 daniel
  387. * Tregister changed into a record
  388. Revision 1.41 2002/11/25 17:43:26 peter
  389. * splitted defbase in defutil,symutil,defcmp
  390. * merged isconvertable and is_equal into compare_defs(_ext)
  391. * made operator search faster by walking the list only once
  392. Revision 1.40 2002/09/07 15:25:10 peter
  393. * old logs removed and tabs fixed
  394. Revision 1.39 2002/08/15 15:15:55 carl
  395. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  396. * more generic nodes for maths
  397. * several fixes for better m68k support
  398. Revision 1.38 2002/08/14 19:18:16 carl
  399. * bugfix of unaryminus node with left LOC_CREGISTER
  400. Revision 1.37 2002/08/12 15:08:42 carl
  401. + stab register indexes for powerpc (moved from gdb to cpubase)
  402. + tprocessor enumeration moved to cpuinfo
  403. + linker in target_info is now a class
  404. * many many updates for m68k (will soon start to compile)
  405. - removed some ifdef or correct them for correct cpu
  406. Revision 1.36 2002/08/11 14:32:30 peter
  407. * renamed current_library to objectlibrary
  408. Revision 1.35 2002/08/11 13:24:17 peter
  409. * saving of asmsymbols in ppu supported
  410. * asmsymbollist global is removed and moved into a new class
  411. tasmlibrarydata that will hold the info of a .a file which
  412. corresponds with a single module. Added librarydata to tmodule
  413. to keep the library info stored for the module. In the future the
  414. objectfiles will also be stored to the tasmlibrarydata class
  415. * all getlabel/newasmsymbol and friends are moved to the new class
  416. Revision 1.34 2002/08/02 07:44:31 jonas
  417. * made assigned() handling generic
  418. * add nodes now can also evaluate constant expressions at compile time
  419. that contain nil nodes
  420. Revision 1.33 2002/07/20 11:58:02 florian
  421. * types.pas renamed to defbase.pas because D6 contains a types
  422. unit so this would conflicts if D6 programms are compiled
  423. + Willamette/SSE2 instructions to assembler added
  424. Revision 1.32 2002/07/01 18:46:33 peter
  425. * internal linker
  426. * reorganized aasm layer
  427. Revision 1.31 2002/05/18 13:34:25 peter
  428. * readded missing revisions
  429. Revision 1.30 2002/05/16 19:46:51 carl
  430. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  431. + try to fix temp allocation (still in ifdef)
  432. + generic constructor calls
  433. + start of tassembler / tmodulebase class cleanup
  434. Revision 1.28 2002/05/13 19:54:38 peter
  435. * removed n386ld and n386util units
  436. * maybe_save/maybe_restore added instead of the old maybe_push
  437. Revision 1.27 2002/05/12 16:53:17 peter
  438. * moved entry and exitcode to ncgutil and cgobj
  439. * foreach gets extra argument for passing local data to the
  440. iterator function
  441. * -CR checks also class typecasts at runtime by changing them
  442. into as
  443. * fixed compiler to cycle with the -CR option
  444. * fixed stabs with elf writer, finally the global variables can
  445. be watched
  446. * removed a lot of routines from cga unit and replaced them by
  447. calls to cgobj
  448. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  449. u32bit then the other is typecasted also to u32bit without giving
  450. a rangecheck warning/error.
  451. * fixed pascal calling method with reversing also the high tree in
  452. the parast, detected by tcalcst3 test
  453. Revision 1.26 2002/04/04 19:06:12 peter
  454. * removed unused units
  455. * use tlocation.size in cg.a_*loc*() routines
  456. Revision 1.25 2002/04/02 17:11:36 peter
  457. * tlocation,treference update
  458. * LOC_CONSTANT added for better constant handling
  459. * secondadd splitted in multiple routines
  460. * location_force_reg added for loading a location to a register
  461. of a specified size
  462. * secondassignment parses now first the right and then the left node
  463. (this is compatible with Kylix). This saves a lot of push/pop especially
  464. with string operations
  465. * adapted some routines to use the new cg methods
  466. Revision 1.24 2002/03/31 20:26:39 jonas
  467. + a_loadfpu_* and a_loadmm_* methods in tcg
  468. * register allocation is now handled by a class and is mostly processor
  469. independent (+rgobj.pas and i386/rgcpu.pas)
  470. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  471. * some small improvements and fixes to the optimizer
  472. * some register allocation fixes
  473. * some fpuvaroffset fixes in the unary minus node
  474. * push/popusedregisters is now called rg.save/restoreusedregisters and
  475. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  476. also better optimizable)
  477. * fixed and optimized register saving/restoring for new/dispose nodes
  478. * LOC_FPU locations now also require their "register" field to be set to
  479. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  480. - list field removed of the tnode class because it's not used currently
  481. and can cause hard-to-find bugs
  482. Revision 1.23 2002/03/04 19:10:14 peter
  483. * removed compiler warnings
  484. }