cgx86.pas 76 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132
  1. {
  2. Copyright (c) 1998-2005 by Florian Klaempfl
  3. This unit implements the common parts of the code generator for the i386 and the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. { This unit implements the common parts of the code generator for the i386 and the x86-64.
  18. }
  19. unit cgx86;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. globtype,
  24. cgbase,cgutils,cgobj,
  25. aasmbase,aasmtai,aasmdata,aasmcpu,
  26. cpubase,cpuinfo,rgobj,rgx86,rgcpu,
  27. symconst,symtype,symdef;
  28. type
  29. tcgx86 = class(tcg)
  30. rgfpu : Trgx86fpu;
  31. procedure done_register_allocators;override;
  32. function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override;
  33. function getmmxregister(list:TAsmList):Tregister;
  34. function getmmregister(list:TAsmList;size:Tcgsize):Tregister;override;
  35. procedure getcpuregister(list:TAsmList;r:Tregister);override;
  36. procedure ungetcpuregister(list:TAsmList;r:Tregister);override;
  37. procedure alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  38. procedure dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  39. function uses_registers(rt:Tregistertype):boolean;override;
  40. procedure add_reg_instruction(instr:Tai;r:tregister);override;
  41. procedure dec_fpu_stack;
  42. procedure inc_fpu_stack;
  43. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  44. procedure a_call_reg(list : TAsmList;reg : tregister);override;
  45. procedure a_call_ref(list : TAsmList;ref : treference);override;
  46. procedure a_call_name_static(list : TAsmList;const s : string);override;
  47. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: aint; reg: TRegister); override;
  48. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: aint; const ref: TReference); override;
  49. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  50. procedure a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
  51. procedure a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
  52. { move instructions }
  53. procedure a_load_const_reg(list : TAsmList; tosize: tcgsize; a : aint;reg : tregister);override;
  54. procedure a_load_const_ref(list : TAsmList; tosize: tcgsize; a : aint;const ref : treference);override;
  55. procedure a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);override;
  56. procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
  57. procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
  58. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  59. { fpu move instructions }
  60. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  61. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  62. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  63. { vector register move instructions }
  64. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  65. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  66. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  67. procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  68. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);override;
  69. { comparison operations }
  70. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
  71. l : tasmlabel);override;
  72. procedure a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : aint;const ref : treference;
  73. l : tasmlabel);override;
  74. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  75. procedure a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister; l : tasmlabel); override;
  76. procedure a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister; const ref: treference; l : tasmlabel); override;
  77. procedure a_jmp_name(list : TAsmList;const s : string);override;
  78. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  79. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  80. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister); override;
  81. procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference); override;
  82. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : aint);override;
  83. { entry/exit code helpers }
  84. procedure g_profilecode(list : TAsmList);override;
  85. procedure g_stackpointer_alloc(list : TAsmList;localsize : longint);override;
  86. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  87. procedure g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);override;
  88. procedure g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string); override;
  89. procedure make_simple_ref(list:TAsmList;var ref: treference);
  90. protected
  91. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  92. procedure check_register_size(size:tcgsize;reg:tregister);
  93. procedure opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  94. function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  95. private
  96. procedure sizes2load(s1,s2 : tcgsize;var op: tasmop; var s3: topsize);
  97. procedure floatload(list: TAsmList; t : tcgsize;const ref : treference);
  98. procedure floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  99. procedure floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  100. procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  101. end;
  102. const
  103. {$ifdef x86_64}
  104. TCGSize2OpSize: Array[tcgsize] of topsize =
  105. (S_NO,S_B,S_W,S_L,S_Q,S_T,S_B,S_W,S_L,S_Q,S_Q,
  106. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  107. S_NO,S_NO,S_NO,S_MD,S_T,
  108. S_NO,S_NO,S_NO,S_NO,S_T);
  109. {$else x86_64}
  110. TCGSize2OpSize: Array[tcgsize] of topsize =
  111. (S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L,
  112. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  113. S_NO,S_NO,S_NO,S_MD,S_T,
  114. S_NO,S_NO,S_NO,S_NO,S_T);
  115. {$endif x86_64}
  116. {$ifndef NOTARGETWIN}
  117. winstackpagesize = 4096;
  118. {$endif NOTARGETWIN}
  119. implementation
  120. uses
  121. globals,verbose,systems,cutils,
  122. defutil,paramgr,procinfo,
  123. tgobj,ncgutil,
  124. fmodule;
  125. const
  126. TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
  127. A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
  128. A_SAR,A_SHL,A_SHR,A_SUB,A_XOR,A_ROL,A_ROR);
  129. TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
  130. C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
  131. procedure Tcgx86.done_register_allocators;
  132. begin
  133. rg[R_INTREGISTER].free;
  134. rg[R_MMREGISTER].free;
  135. rg[R_MMXREGISTER].free;
  136. rgfpu.free;
  137. inherited done_register_allocators;
  138. end;
  139. function Tcgx86.getfpuregister(list:TAsmList;size:Tcgsize):Tregister;
  140. begin
  141. result:=rgfpu.getregisterfpu(list);
  142. end;
  143. function Tcgx86.getmmxregister(list:TAsmList):Tregister;
  144. begin
  145. if not assigned(rg[R_MMXREGISTER]) then
  146. internalerror(2003121214);
  147. result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
  148. end;
  149. function Tcgx86.getmmregister(list:TAsmList;size:Tcgsize):Tregister;
  150. begin
  151. if not assigned(rg[R_MMREGISTER]) then
  152. internalerror(2003121234);
  153. case size of
  154. OS_F64:
  155. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
  156. OS_F32:
  157. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  158. OS_M128:
  159. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
  160. else
  161. internalerror(200506041);
  162. end;
  163. end;
  164. procedure Tcgx86.getcpuregister(list:TAsmList;r:Tregister);
  165. begin
  166. if getregtype(r)=R_FPUREGISTER then
  167. internalerror(2003121210)
  168. else
  169. inherited getcpuregister(list,r);
  170. end;
  171. procedure tcgx86.ungetcpuregister(list:TAsmList;r:Tregister);
  172. begin
  173. if getregtype(r)=R_FPUREGISTER then
  174. rgfpu.ungetregisterfpu(list,r)
  175. else
  176. inherited ungetcpuregister(list,r);
  177. end;
  178. procedure Tcgx86.alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  179. begin
  180. if rt<>R_FPUREGISTER then
  181. inherited alloccpuregisters(list,rt,r);
  182. end;
  183. procedure Tcgx86.dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  184. begin
  185. if rt<>R_FPUREGISTER then
  186. inherited dealloccpuregisters(list,rt,r);
  187. end;
  188. function Tcgx86.uses_registers(rt:Tregistertype):boolean;
  189. begin
  190. if rt=R_FPUREGISTER then
  191. result:=false
  192. else
  193. result:=inherited uses_registers(rt);
  194. end;
  195. procedure tcgx86.add_reg_instruction(instr:Tai;r:tregister);
  196. begin
  197. if getregtype(r)<>R_FPUREGISTER then
  198. inherited add_reg_instruction(instr,r);
  199. end;
  200. procedure tcgx86.dec_fpu_stack;
  201. begin
  202. if rgfpu.fpuvaroffset<=0 then
  203. internalerror(200604201);
  204. dec(rgfpu.fpuvaroffset);
  205. end;
  206. procedure tcgx86.inc_fpu_stack;
  207. begin
  208. inc(rgfpu.fpuvaroffset);
  209. end;
  210. {****************************************************************************
  211. This is private property, keep out! :)
  212. ****************************************************************************}
  213. procedure tcgx86.sizes2load(s1,s2 : tcgsize; var op: tasmop; var s3: topsize);
  214. begin
  215. { ensure to have always valid sizes }
  216. if s1=OS_NO then
  217. s1:=s2;
  218. if s2=OS_NO then
  219. s2:=s1;
  220. case s2 of
  221. OS_8,OS_S8 :
  222. if S1 in [OS_8,OS_S8] then
  223. s3 := S_B
  224. else
  225. internalerror(200109221);
  226. OS_16,OS_S16:
  227. case s1 of
  228. OS_8,OS_S8:
  229. s3 := S_BW;
  230. OS_16,OS_S16:
  231. s3 := S_W;
  232. else
  233. internalerror(200109222);
  234. end;
  235. OS_32,OS_S32:
  236. case s1 of
  237. OS_8,OS_S8:
  238. s3 := S_BL;
  239. OS_16,OS_S16:
  240. s3 := S_WL;
  241. OS_32,OS_S32:
  242. s3 := S_L;
  243. else
  244. internalerror(200109223);
  245. end;
  246. {$ifdef x86_64}
  247. OS_64,OS_S64:
  248. case s1 of
  249. OS_8:
  250. s3 := S_BL;
  251. OS_S8:
  252. s3 := S_BQ;
  253. OS_16:
  254. s3 := S_WL;
  255. OS_S16:
  256. s3 := S_WQ;
  257. OS_32:
  258. s3 := S_L;
  259. OS_S32:
  260. s3 := S_LQ;
  261. OS_64,OS_S64:
  262. s3 := S_Q;
  263. else
  264. internalerror(200304302);
  265. end;
  266. {$endif x86_64}
  267. else
  268. internalerror(200109227);
  269. end;
  270. if s3 in [S_B,S_W,S_L,S_Q] then
  271. op := A_MOV
  272. else if s1 in [OS_8,OS_16,OS_32,OS_64] then
  273. op := A_MOVZX
  274. else
  275. {$ifdef x86_64}
  276. if s3 in [S_LQ] then
  277. op := A_MOVSXD
  278. else
  279. {$endif x86_64}
  280. op := A_MOVSX;
  281. end;
  282. procedure tcgx86.make_simple_ref(list:TAsmList;var ref: treference);
  283. var
  284. hreg : tregister;
  285. href : treference;
  286. {$ifndef x86_64}
  287. add_hreg: boolean;
  288. {$endif not x86_64}
  289. begin
  290. {$ifdef x86_64}
  291. { Only 32bit is allowed }
  292. if ((ref.offset<low(longint)) or (ref.offset>high(longint))) then
  293. begin
  294. { Load constant value to register }
  295. hreg:=GetAddressRegister(list);
  296. list.concat(taicpu.op_const_reg(A_MOV,S_Q,ref.offset,hreg));
  297. ref.offset:=0;
  298. {if assigned(ref.symbol) then
  299. begin
  300. list.concat(taicpu.op_sym_ofs_reg(A_ADD,S_Q,ref.symbol,0,hreg));
  301. ref.symbol:=nil;
  302. end;}
  303. { Add register to reference }
  304. if ref.index=NR_NO then
  305. ref.index:=hreg
  306. else
  307. begin
  308. if ref.scalefactor<>0 then
  309. begin
  310. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.base,hreg));
  311. ref.base:=hreg;
  312. end
  313. else
  314. begin
  315. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.index,hreg));
  316. ref.index:=hreg;
  317. end;
  318. end;
  319. end;
  320. if assigned(ref.symbol) and not((ref.symbol.bind=AB_LOCAL) and (ref.symbol.typ in [AT_LABEL,AT_FUNCTION])) then
  321. begin
  322. if cs_create_pic in current_settings.moduleswitches then
  323. begin
  324. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  325. hreg:=getaddressregister(list);
  326. href.refaddr:=addr_pic;
  327. href.base:=NR_RIP;
  328. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,href,hreg));
  329. ref.symbol:=nil;
  330. if ref.base=NR_NO then
  331. ref.base:=hreg
  332. else if ref.index=NR_NO then
  333. begin
  334. ref.index:=hreg;
  335. ref.scalefactor:=1;
  336. end
  337. else
  338. begin
  339. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.base,hreg));
  340. ref.base:=hreg;
  341. end;
  342. end
  343. else
  344. { Always use RIP relative symbol addressing for Windows targets. }
  345. if (target_info.system in system_all_windows) and (ref.base<>NR_RIP) then
  346. begin
  347. if (ref.refaddr=addr_no) and (ref.base=NR_NO) and (ref.index=NR_NO) then
  348. { Set RIP relative addressing for simple symbol references }
  349. ref.base:=NR_RIP
  350. else
  351. begin
  352. { Use temp register to load calculated 64-bit symbol address for complex references }
  353. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  354. href.base:=NR_RIP;
  355. hreg:=GetAddressRegister(list);
  356. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  357. ref.symbol:=nil;
  358. if ref.base=NR_NO then
  359. ref.base:=hreg
  360. else if ref.index=NR_NO then
  361. begin
  362. ref.index:=hreg;
  363. ref.scalefactor:=0;
  364. end
  365. else
  366. begin
  367. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.base,hreg));
  368. ref.base:=hreg;
  369. end;
  370. end;
  371. end;
  372. end;
  373. {$else x86_64}
  374. add_hreg:=false;
  375. if (target_info.system=system_i386_darwin) then
  376. begin
  377. if assigned(ref.symbol) and
  378. not(assigned(ref.relsymbol)) and
  379. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  380. (cs_create_pic in current_settings.moduleswitches)) then
  381. begin
  382. if (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  383. ((cs_create_pic in current_settings.moduleswitches) and
  384. (ref.symbol.bind in [AB_COMMON,AB_GLOBAL])) then
  385. begin
  386. hreg:=g_indirect_sym_load(list,ref.symbol.name,ref.symbol.bind=AB_WEAK_EXTERNAL);
  387. ref.symbol:=nil;
  388. end
  389. else
  390. begin
  391. include(current_procinfo.flags,pi_needs_got);
  392. hreg:=current_procinfo.got;
  393. ref.relsymbol:=current_procinfo.CurrGOTLabel;
  394. end;
  395. add_hreg:=true
  396. end
  397. end
  398. else if (cs_create_pic in current_settings.moduleswitches) and
  399. assigned(ref.symbol) and
  400. not((ref.symbol.bind=AB_LOCAL) and
  401. (ref.symbol.typ in [AT_LABEL,AT_FUNCTION])) then
  402. begin
  403. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  404. href.base:=current_procinfo.got;
  405. href.refaddr:=addr_pic;
  406. include(current_procinfo.flags,pi_needs_got);
  407. hreg:=cg.getaddressregister(list);
  408. list.concat(taicpu.op_ref_reg(A_MOV,S_L,href,hreg));
  409. ref.symbol:=nil;
  410. add_hreg:=true;
  411. end;
  412. if add_hreg then
  413. begin
  414. if ref.base=NR_NO then
  415. ref.base:=hreg
  416. else if ref.index=NR_NO then
  417. begin
  418. ref.index:=hreg;
  419. ref.scalefactor:=1;
  420. end
  421. else
  422. begin
  423. list.concat(taicpu.op_reg_reg(A_ADD,S_L,ref.base,hreg));
  424. ref.base:=hreg;
  425. end;
  426. end;
  427. {$endif x86_64}
  428. end;
  429. procedure tcgx86.floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  430. begin
  431. case t of
  432. OS_F32 :
  433. begin
  434. op:=A_FLD;
  435. s:=S_FS;
  436. end;
  437. OS_F64 :
  438. begin
  439. op:=A_FLD;
  440. s:=S_FL;
  441. end;
  442. OS_F80 :
  443. begin
  444. op:=A_FLD;
  445. s:=S_FX;
  446. end;
  447. OS_C64 :
  448. begin
  449. op:=A_FILD;
  450. s:=S_IQ;
  451. end;
  452. else
  453. internalerror(200204043);
  454. end;
  455. end;
  456. procedure tcgx86.floatload(list: TAsmList; t : tcgsize;const ref : treference);
  457. var
  458. op : tasmop;
  459. s : topsize;
  460. tmpref : treference;
  461. begin
  462. tmpref:=ref;
  463. make_simple_ref(list,tmpref);
  464. floatloadops(t,op,s);
  465. list.concat(Taicpu.Op_ref(op,s,tmpref));
  466. inc_fpu_stack;
  467. end;
  468. procedure tcgx86.floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  469. begin
  470. case t of
  471. OS_F32 :
  472. begin
  473. op:=A_FSTP;
  474. s:=S_FS;
  475. end;
  476. OS_F64 :
  477. begin
  478. op:=A_FSTP;
  479. s:=S_FL;
  480. end;
  481. OS_F80 :
  482. begin
  483. op:=A_FSTP;
  484. s:=S_FX;
  485. end;
  486. OS_C64 :
  487. begin
  488. op:=A_FISTP;
  489. s:=S_IQ;
  490. end;
  491. else
  492. internalerror(200204042);
  493. end;
  494. end;
  495. procedure tcgx86.floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  496. var
  497. op : tasmop;
  498. s : topsize;
  499. tmpref : treference;
  500. begin
  501. tmpref:=ref;
  502. make_simple_ref(list,tmpref);
  503. floatstoreops(t,op,s);
  504. list.concat(Taicpu.Op_ref(op,s,tmpref));
  505. { storing non extended floats can cause a floating point overflow }
  506. if (t<>OS_F80) and
  507. (cs_fpu_fwait in current_settings.localswitches) then
  508. list.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  509. dec_fpu_stack;
  510. end;
  511. procedure tcgx86.check_register_size(size:tcgsize;reg:tregister);
  512. begin
  513. if TCGSize2OpSize[size]<>TCGSize2OpSize[reg_cgsize(reg)] then
  514. internalerror(200306031);
  515. end;
  516. {****************************************************************************
  517. Assembler code
  518. ****************************************************************************}
  519. procedure tcgx86.a_jmp_name(list : TAsmList;const s : string);
  520. var
  521. r: treference;
  522. begin
  523. if (target_info.system<>system_i386_darwin) then
  524. list.concat(taicpu.op_sym(A_JMP,S_NO,current_asmdata.RefAsmSymbol(s)))
  525. else
  526. begin
  527. reference_reset_symbol(r,get_darwin_call_stub(s,false),0,sizeof(pint));
  528. r.refaddr:=addr_full;
  529. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  530. end;
  531. end;
  532. procedure tcgx86.a_jmp_always(list : TAsmList;l: tasmlabel);
  533. begin
  534. a_jmp_cond(list, OC_NONE, l);
  535. end;
  536. function tcgx86.get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  537. var
  538. stubname: string;
  539. begin
  540. stubname := 'L'+s+'$stub';
  541. result := current_asmdata.getasmsymbol(stubname);
  542. if assigned(result) then
  543. exit;
  544. if current_asmdata.asmlists[al_imports]=nil then
  545. current_asmdata.asmlists[al_imports]:=TAsmList.create;
  546. current_asmdata.asmlists[al_imports].concat(Tai_section.create(sec_stub,'',0));
  547. result := current_asmdata.RefAsmSymbol(stubname);
  548. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(result,0));
  549. { register as a weak symbol if necessary }
  550. if weak then
  551. current_asmdata.weakrefasmsymbol(s);
  552. current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
  553. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  554. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  555. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  556. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  557. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  558. end;
  559. procedure tcgx86.a_call_name(list : TAsmList;const s : string; weak: boolean);
  560. var
  561. sym : tasmsymbol;
  562. r : treference;
  563. begin
  564. if (target_info.system <> system_i386_darwin) then
  565. begin
  566. if not(weak) then
  567. sym:=current_asmdata.RefAsmSymbol(s)
  568. else
  569. sym:=current_asmdata.WeakRefAsmSymbol(s);
  570. reference_reset_symbol(r,sym,0,sizeof(pint));
  571. if (cs_create_pic in current_settings.moduleswitches) and
  572. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  573. (target_info.system<>system_x86_64_darwin) then
  574. begin
  575. {$ifdef i386}
  576. include(current_procinfo.flags,pi_needs_got);
  577. {$endif i386}
  578. r.refaddr:=addr_pic
  579. end
  580. else
  581. r.refaddr:=addr_full;
  582. end
  583. else
  584. begin
  585. reference_reset_symbol(r,get_darwin_call_stub(s,weak),0,sizeof(pint));
  586. r.refaddr:=addr_full;
  587. end;
  588. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  589. end;
  590. procedure tcgx86.a_call_name_static(list : TAsmList;const s : string);
  591. var
  592. sym : tasmsymbol;
  593. r : treference;
  594. begin
  595. sym:=current_asmdata.RefAsmSymbol(s);
  596. reference_reset_symbol(r,sym,0,sizeof(pint));
  597. r.refaddr:=addr_full;
  598. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  599. end;
  600. procedure tcgx86.a_call_reg(list : TAsmList;reg : tregister);
  601. begin
  602. list.concat(taicpu.op_reg(A_CALL,S_NO,reg));
  603. end;
  604. procedure tcgx86.a_call_ref(list : TAsmList;ref : treference);
  605. begin
  606. list.concat(taicpu.op_ref(A_CALL,S_NO,ref));
  607. end;
  608. {********************** load instructions ********************}
  609. procedure tcgx86.a_load_const_reg(list : TAsmList; tosize: TCGSize; a : aint; reg : TRegister);
  610. begin
  611. check_register_size(tosize,reg);
  612. { the optimizer will change it to "xor reg,reg" when loading zero, }
  613. { no need to do it here too (JM) }
  614. list.concat(taicpu.op_const_reg(A_MOV,TCGSize2OpSize[tosize],a,reg))
  615. end;
  616. procedure tcgx86.a_load_const_ref(list : TAsmList; tosize: tcgsize; a : aint;const ref : treference);
  617. var
  618. tmpref : treference;
  619. begin
  620. tmpref:=ref;
  621. make_simple_ref(list,tmpref);
  622. {$ifdef x86_64}
  623. { x86_64 only supports signed 32 bits constants directly }
  624. if (tosize in [OS_S64,OS_64]) and
  625. ((a<low(longint)) or (a>high(longint))) then
  626. begin
  627. a_load_const_ref(list,OS_32,longint(a and $ffffffff),tmpref);
  628. inc(tmpref.offset,4);
  629. a_load_const_ref(list,OS_32,longint(a shr 32),tmpref);
  630. end
  631. else
  632. {$endif x86_64}
  633. list.concat(taicpu.op_const_ref(A_MOV,TCGSize2OpSize[tosize],a,tmpref));
  634. end;
  635. procedure tcgx86.a_load_reg_ref(list : TAsmList; fromsize,tosize: TCGSize; reg : tregister;const ref : treference);
  636. var
  637. op: tasmop;
  638. s: topsize;
  639. tmpsize : tcgsize;
  640. tmpreg : tregister;
  641. tmpref : treference;
  642. begin
  643. tmpref:=ref;
  644. make_simple_ref(list,tmpref);
  645. check_register_size(fromsize,reg);
  646. sizes2load(fromsize,tosize,op,s);
  647. case s of
  648. {$ifdef x86_64}
  649. S_BQ,S_WQ,S_LQ,
  650. {$endif x86_64}
  651. S_BW,S_BL,S_WL :
  652. begin
  653. tmpreg:=getintregister(list,tosize);
  654. {$ifdef x86_64}
  655. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  656. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  657. 64 bit (FK) }
  658. if s in [S_BL,S_WL,S_L] then
  659. begin
  660. tmpreg:=makeregsize(list,tmpreg,OS_32);
  661. tmpsize:=OS_32;
  662. end
  663. else
  664. {$endif x86_64}
  665. tmpsize:=tosize;
  666. list.concat(taicpu.op_reg_reg(op,s,reg,tmpreg));
  667. a_load_reg_ref(list,tmpsize,tosize,tmpreg,tmpref);
  668. end;
  669. else
  670. list.concat(taicpu.op_reg_ref(op,s,reg,tmpref));
  671. end;
  672. end;
  673. procedure tcgx86.a_load_ref_reg(list : TAsmList;fromsize,tosize : tcgsize;const ref: treference;reg : tregister);
  674. var
  675. op: tasmop;
  676. s: topsize;
  677. tmpref : treference;
  678. begin
  679. tmpref:=ref;
  680. make_simple_ref(list,tmpref);
  681. check_register_size(tosize,reg);
  682. sizes2load(fromsize,tosize,op,s);
  683. {$ifdef x86_64}
  684. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  685. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  686. 64 bit (FK) }
  687. if s in [S_BL,S_WL,S_L] then
  688. reg:=makeregsize(list,reg,OS_32);
  689. {$endif x86_64}
  690. list.concat(taicpu.op_ref_reg(op,s,tmpref,reg));
  691. end;
  692. procedure tcgx86.a_load_reg_reg(list : TAsmList;fromsize,tosize : tcgsize;reg1,reg2 : tregister);
  693. var
  694. op: tasmop;
  695. s: topsize;
  696. instr:Taicpu;
  697. begin
  698. check_register_size(fromsize,reg1);
  699. check_register_size(tosize,reg2);
  700. if tcgsize2size[fromsize]>tcgsize2size[tosize] then
  701. begin
  702. reg1:=makeregsize(list,reg1,tosize);
  703. s:=tcgsize2opsize[tosize];
  704. op:=A_MOV;
  705. end
  706. else
  707. sizes2load(fromsize,tosize,op,s);
  708. {$ifdef x86_64}
  709. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  710. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  711. 64 bit (FK)
  712. }
  713. if s in [S_BL,S_WL,S_L] then
  714. reg2:=makeregsize(list,reg2,OS_32);
  715. {$endif x86_64}
  716. if (reg1<>reg2) then
  717. begin
  718. instr:=taicpu.op_reg_reg(op,s,reg1,reg2);
  719. { Notify the register allocator that we have written a move instruction so
  720. it can try to eliminate it. }
  721. if (reg1<>current_procinfo.framepointer) and (reg1<>NR_STACK_POINTER_REG) then
  722. add_move_instruction(instr);
  723. list.concat(instr);
  724. end;
  725. {$ifdef x86_64}
  726. { avoid merging of registers and killing the zero extensions (FK) }
  727. if (tosize in [OS_64,OS_S64]) and (s=S_L) then
  728. list.concat(taicpu.op_const_reg(A_AND,S_L,$ffffffff,reg2));
  729. {$endif x86_64}
  730. end;
  731. procedure tcgx86.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  732. var
  733. tmpref : treference;
  734. begin
  735. with ref do
  736. begin
  737. if (base=NR_NO) and (index=NR_NO) then
  738. begin
  739. if assigned(ref.symbol) then
  740. begin
  741. if (target_info.system=system_i386_darwin) and
  742. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  743. (cs_create_pic in current_settings.moduleswitches)) then
  744. begin
  745. if (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  746. ((cs_create_pic in current_settings.moduleswitches) and
  747. (ref.symbol.bind in [AB_COMMON,AB_GLOBAL])) then
  748. begin
  749. reference_reset_base(tmpref,
  750. g_indirect_sym_load(list,ref.symbol.name,ref.symbol.bind=AB_WEAK_EXTERNAL),
  751. offset,sizeof(pint));
  752. a_loadaddr_ref_reg(list,tmpref,r);
  753. end
  754. else
  755. begin
  756. include(current_procinfo.flags,pi_needs_got);
  757. reference_reset_base(tmpref,current_procinfo.got,offset,ref.alignment);
  758. tmpref.symbol:=symbol;
  759. tmpref.relsymbol:=current_procinfo.CurrGOTLabel;
  760. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  761. end;
  762. end
  763. else if (cs_create_pic in current_settings.moduleswitches) then
  764. begin
  765. {$ifdef x86_64}
  766. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  767. tmpref.refaddr:=addr_pic;
  768. tmpref.base:=NR_RIP;
  769. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,tmpref,r));
  770. {$else x86_64}
  771. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  772. tmpref.refaddr:=addr_pic;
  773. tmpref.base:=current_procinfo.got;
  774. include(current_procinfo.flags,pi_needs_got);
  775. list.concat(taicpu.op_ref_reg(A_MOV,S_L,tmpref,r));
  776. {$endif x86_64}
  777. if offset<>0 then
  778. a_op_const_reg(list,OP_ADD,OS_ADDR,offset,r);
  779. end
  780. else
  781. begin
  782. tmpref:=ref;
  783. tmpref.refaddr:=ADDR_FULL;
  784. list.concat(Taicpu.op_ref_reg(A_MOV,tcgsize2opsize[OS_ADDR],tmpref,r));
  785. end
  786. end
  787. else
  788. a_load_const_reg(list,OS_ADDR,offset,r)
  789. end
  790. else if (base=NR_NO) and (index<>NR_NO) and
  791. (offset=0) and (scalefactor=0) and (symbol=nil) then
  792. a_load_reg_reg(list,OS_ADDR,OS_ADDR,index,r)
  793. else if (base<>NR_NO) and (index=NR_NO) and
  794. (offset=0) and (symbol=nil) then
  795. a_load_reg_reg(list,OS_ADDR,OS_ADDR,base,r)
  796. else
  797. begin
  798. tmpref:=ref;
  799. make_simple_ref(list,tmpref);
  800. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  801. end;
  802. if segment<>NR_NO then
  803. begin
  804. if (tf_section_threadvars in target_info.flags) then
  805. begin
  806. { Convert thread local address to a process global addres
  807. as we cannot handle far pointers.}
  808. case target_info.system of
  809. system_i386_linux:
  810. if segment=NR_GS then
  811. begin
  812. reference_reset_symbol(tmpref,current_asmdata.RefAsmSymbol('___fpc_threadvar_offset'),0,ref.alignment);
  813. tmpref.segment:=NR_GS;
  814. list.concat(Taicpu.op_ref_reg(A_ADD,tcgsize2opsize[OS_ADDR],tmpref,r));
  815. end
  816. else
  817. cgmessage(cg_e_cant_use_far_pointer_there);
  818. system_i386_win32:
  819. if segment=NR_FS then
  820. begin
  821. allocallcpuregisters(list);
  822. a_call_name(list,'GetTls',false);
  823. deallocallcpuregisters(list);
  824. list.concat(Taicpu.op_reg_reg(A_ADD,tcgsize2opsize[OS_ADDR],NR_EAX,r));
  825. end
  826. else
  827. cgmessage(cg_e_cant_use_far_pointer_there);
  828. else
  829. cgmessage(cg_e_cant_use_far_pointer_there);
  830. end;
  831. end
  832. else
  833. cgmessage(cg_e_cant_use_far_pointer_there);
  834. end;
  835. end;
  836. end;
  837. { all fpu load routines expect that R_ST[0-7] means an fpu regvar and }
  838. { R_ST means "the current value at the top of the fpu stack" (JM) }
  839. procedure tcgx86.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  840. var
  841. href: treference;
  842. op: tasmop;
  843. s: topsize;
  844. begin
  845. if (reg1<>NR_ST) then
  846. begin
  847. floatloadops(tosize,op,s);
  848. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg1,rgfpu.fpuvaroffset)));
  849. inc_fpu_stack;
  850. end;
  851. if (reg2<>NR_ST) then
  852. begin
  853. floatstoreops(tosize,op,s);
  854. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg2,rgfpu.fpuvaroffset)));
  855. dec_fpu_stack;
  856. end;
  857. { OS_F80 < OS_C64, but OS_C64 fits perfectly in OS_F80 }
  858. if (reg1=NR_ST) and
  859. (reg2=NR_ST) and
  860. (tosize<>OS_F80) and
  861. (tosize<fromsize) then
  862. begin
  863. { can't round down to lower precision in x87 :/ }
  864. tg.gettemp(list,tcgsize2size[tosize],tcgsize2size[tosize],tt_normal,href);
  865. a_loadfpu_reg_ref(list,fromsize,tosize,NR_ST,href);
  866. a_loadfpu_ref_reg(list,tosize,tosize,href,NR_ST);
  867. tg.ungettemp(list,href);
  868. end;
  869. end;
  870. procedure tcgx86.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  871. begin
  872. floatload(list,fromsize,ref);
  873. a_loadfpu_reg_reg(list,fromsize,tosize,NR_ST,reg);
  874. end;
  875. procedure tcgx86.a_loadfpu_reg_ref(list: TAsmList; fromsize,tosize: tcgsize; reg: tregister; const ref: treference);
  876. begin
  877. if reg<>NR_ST then
  878. a_loadfpu_reg_reg(list,fromsize,tosize,reg,NR_ST);
  879. floatstore(list,tosize,ref);
  880. end;
  881. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  882. const
  883. convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  884. (A_MOVSS,A_CVTSS2SD,A_NONE,A_NONE,A_NONE),
  885. (A_CVTSD2SS,A_MOVSD,A_NONE,A_NONE,A_NONE),
  886. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  887. (A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
  888. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  889. begin
  890. result:=convertop[fromsize,tosize];
  891. if result=A_NONE then
  892. internalerror(200312205);
  893. end;
  894. procedure tcgx86.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle);
  895. var
  896. instr : taicpu;
  897. begin
  898. if shuffle=nil then
  899. begin
  900. if fromsize=tosize then
  901. { needs correct size in case of spilling }
  902. case fromsize of
  903. OS_F32:
  904. instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2);
  905. OS_F64:
  906. instr:=taicpu.op_reg_reg(A_MOVAPD,S_NO,reg1,reg2);
  907. else
  908. internalerror(2006091201);
  909. end
  910. else
  911. internalerror(200312202);
  912. end
  913. else if shufflescalar(shuffle) then
  914. instr:=taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg1,reg2)
  915. else
  916. internalerror(200312201);
  917. case get_scalar_mm_op(fromsize,tosize) of
  918. A_MOVSS,
  919. A_MOVSD,
  920. A_MOVQ:
  921. add_move_instruction(instr);
  922. end;
  923. list.concat(instr);
  924. end;
  925. procedure tcgx86.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  926. var
  927. tmpref : treference;
  928. begin
  929. tmpref:=ref;
  930. make_simple_ref(list,tmpref);
  931. if shuffle=nil then
  932. begin
  933. if fromsize=OS_M64 then
  934. list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,reg))
  935. else
  936. {$ifdef x86_64}
  937. { x86-64 has always properly aligned data }
  938. list.concat(taicpu.op_ref_reg(A_MOVDQA,S_NO,tmpref,reg));
  939. {$else x86_64}
  940. list.concat(taicpu.op_ref_reg(A_MOVDQU,S_NO,tmpref,reg));
  941. {$endif x86_64}
  942. end
  943. else if shufflescalar(shuffle) then
  944. list.concat(taicpu.op_ref_reg(get_scalar_mm_op(fromsize,tosize),S_NO,tmpref,reg))
  945. else
  946. internalerror(200312252);
  947. end;
  948. procedure tcgx86.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle);
  949. var
  950. hreg : tregister;
  951. tmpref : treference;
  952. begin
  953. tmpref:=ref;
  954. make_simple_ref(list,tmpref);
  955. if shuffle=nil then
  956. begin
  957. if fromsize=OS_M64 then
  958. list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,tmpref))
  959. else
  960. {$ifdef x86_64}
  961. { x86-64 has always properly aligned data }
  962. list.concat(taicpu.op_reg_ref(A_MOVDQA,S_NO,reg,tmpref))
  963. {$else x86_64}
  964. list.concat(taicpu.op_reg_ref(A_MOVDQU,S_NO,reg,tmpref))
  965. {$endif x86_64}
  966. end
  967. else if shufflescalar(shuffle) then
  968. begin
  969. if tosize<>fromsize then
  970. begin
  971. hreg:=getmmregister(list,tosize);
  972. list.concat(taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg,hreg));
  973. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(tosize,tosize),S_NO,hreg,tmpref));
  974. end
  975. else
  976. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(fromsize,tosize),S_NO,reg,tmpref));
  977. end
  978. else
  979. internalerror(200312252);
  980. end;
  981. procedure tcgx86.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  982. var
  983. l : tlocation;
  984. begin
  985. l.loc:=LOC_REFERENCE;
  986. l.reference:=ref;
  987. l.size:=size;
  988. opmm_loc_reg(list,op,size,l,reg,shuffle);
  989. end;
  990. procedure tcgx86.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);
  991. var
  992. l : tlocation;
  993. begin
  994. l.loc:=LOC_MMREGISTER;
  995. l.register:=src;
  996. l.size:=size;
  997. opmm_loc_reg(list,op,size,l,dst,shuffle);
  998. end;
  999. procedure tcgx86.opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  1000. const
  1001. opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
  1002. ( { scalar }
  1003. ( { OS_F32 }
  1004. A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP,A_NOP,A_NOP
  1005. ),
  1006. ( { OS_F64 }
  1007. A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP,A_NOP,A_NOP
  1008. )
  1009. ),
  1010. ( { vectorized/packed }
  1011. { because the logical packed single instructions have shorter op codes, we use always
  1012. these
  1013. }
  1014. ( { OS_F32 }
  1015. A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS,A_NOP,A_NOP
  1016. ),
  1017. ( { OS_F64 }
  1018. A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD,A_NOP,A_NOP
  1019. )
  1020. )
  1021. );
  1022. var
  1023. resultreg : tregister;
  1024. asmop : tasmop;
  1025. begin
  1026. { this is an internally used procedure so the parameters have
  1027. some constrains
  1028. }
  1029. if loc.size<>size then
  1030. internalerror(200312213);
  1031. resultreg:=dst;
  1032. { deshuffle }
  1033. //!!!
  1034. if (shuffle<>nil) and not(shufflescalar(shuffle)) then
  1035. begin
  1036. end
  1037. else if (shuffle=nil) then
  1038. asmop:=opmm2asmop[1,size,op]
  1039. else if shufflescalar(shuffle) then
  1040. begin
  1041. asmop:=opmm2asmop[0,size,op];
  1042. { no scalar operation available? }
  1043. if asmop=A_NOP then
  1044. begin
  1045. { do vectorized and shuffle finally }
  1046. //!!!
  1047. end;
  1048. end
  1049. else
  1050. internalerror(200312211);
  1051. if asmop=A_NOP then
  1052. internalerror(200312216);
  1053. case loc.loc of
  1054. LOC_CREFERENCE,LOC_REFERENCE:
  1055. begin
  1056. make_simple_ref(current_asmdata.CurrAsmList,loc.reference);
  1057. list.concat(taicpu.op_ref_reg(asmop,S_NO,loc.reference,resultreg));
  1058. end;
  1059. LOC_CMMREGISTER,LOC_MMREGISTER:
  1060. list.concat(taicpu.op_reg_reg(asmop,S_NO,loc.register,resultreg));
  1061. else
  1062. internalerror(200312214);
  1063. end;
  1064. { shuffle }
  1065. if resultreg<>dst then
  1066. begin
  1067. internalerror(200312212);
  1068. end;
  1069. end;
  1070. procedure tcgx86.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: aint; reg: TRegister);
  1071. var
  1072. opcode : tasmop;
  1073. power : longint;
  1074. {$ifdef x86_64}
  1075. tmpreg : tregister;
  1076. {$endif x86_64}
  1077. begin
  1078. optimize_op_const(op, a);
  1079. {$ifdef x86_64}
  1080. { x86_64 only supports signed 32 bits constants directly }
  1081. if not(op in [OP_NONE,OP_MOVE]) and
  1082. (size in [OS_S64,OS_64]) and
  1083. ((a<low(longint)) or (a>high(longint))) then
  1084. begin
  1085. tmpreg:=getintregister(list,size);
  1086. a_load_const_reg(list,size,a,tmpreg);
  1087. a_op_reg_reg(list,op,size,tmpreg,reg);
  1088. exit;
  1089. end;
  1090. {$endif x86_64}
  1091. check_register_size(size,reg);
  1092. case op of
  1093. OP_NONE :
  1094. begin
  1095. { Opcode is optimized away }
  1096. end;
  1097. OP_MOVE :
  1098. begin
  1099. { Optimized, replaced with a simple load }
  1100. a_load_const_reg(list,size,a,reg);
  1101. end;
  1102. OP_DIV, OP_IDIV:
  1103. begin
  1104. if ispowerof2(int64(a),power) then
  1105. begin
  1106. case op of
  1107. OP_DIV:
  1108. opcode := A_SHR;
  1109. OP_IDIV:
  1110. opcode := A_SAR;
  1111. end;
  1112. list.concat(taicpu.op_const_reg(opcode,TCgSize2OpSize[size],power,reg));
  1113. exit;
  1114. end;
  1115. { the rest should be handled specifically in the code }
  1116. { generator because of the silly register usage restraints }
  1117. internalerror(200109224);
  1118. end;
  1119. OP_MUL,OP_IMUL:
  1120. begin
  1121. if not(cs_check_overflow in current_settings.localswitches) and
  1122. ispowerof2(int64(a),power) then
  1123. begin
  1124. list.concat(taicpu.op_const_reg(A_SHL,TCgSize2OpSize[size],power,reg));
  1125. exit;
  1126. end;
  1127. if op = OP_IMUL then
  1128. list.concat(taicpu.op_const_reg(A_IMUL,TCgSize2OpSize[size],a,reg))
  1129. else
  1130. { OP_MUL should be handled specifically in the code }
  1131. { generator because of the silly register usage restraints }
  1132. internalerror(200109225);
  1133. end;
  1134. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1135. if not(cs_check_overflow in current_settings.localswitches) and
  1136. (a = 1) and
  1137. (op in [OP_ADD,OP_SUB]) then
  1138. if op = OP_ADD then
  1139. list.concat(taicpu.op_reg(A_INC,TCgSize2OpSize[size],reg))
  1140. else
  1141. list.concat(taicpu.op_reg(A_DEC,TCgSize2OpSize[size],reg))
  1142. else if (a = 0) then
  1143. if (op <> OP_AND) then
  1144. exit
  1145. else
  1146. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],0,reg))
  1147. else if (aword(a) = high(aword)) and
  1148. (op in [OP_AND,OP_OR,OP_XOR]) then
  1149. begin
  1150. case op of
  1151. OP_AND:
  1152. exit;
  1153. OP_OR:
  1154. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],aint(high(aword)),reg));
  1155. OP_XOR:
  1156. list.concat(taicpu.op_reg(A_NOT,TCgSize2OpSize[size],reg));
  1157. end
  1158. end
  1159. else
  1160. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
  1161. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1162. begin
  1163. {$ifdef x86_64}
  1164. if (a and 63) <> 0 Then
  1165. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 63,reg));
  1166. if (a shr 6) <> 0 Then
  1167. internalerror(200609073);
  1168. {$else x86_64}
  1169. if (a and 31) <> 0 Then
  1170. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,reg));
  1171. if (a shr 5) <> 0 Then
  1172. internalerror(200609071);
  1173. {$endif x86_64}
  1174. end
  1175. else internalerror(200609072);
  1176. end;
  1177. end;
  1178. procedure tcgx86.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: aint; const ref: TReference);
  1179. var
  1180. opcode: tasmop;
  1181. power: longint;
  1182. {$ifdef x86_64}
  1183. tmpreg : tregister;
  1184. {$endif x86_64}
  1185. tmpref : treference;
  1186. begin
  1187. optimize_op_const(op, a);
  1188. tmpref:=ref;
  1189. make_simple_ref(list,tmpref);
  1190. {$ifdef x86_64}
  1191. { x86_64 only supports signed 32 bits constants directly }
  1192. if not(op in [OP_NONE,OP_MOVE]) and
  1193. (size in [OS_S64,OS_64]) and
  1194. ((a<low(longint)) or (a>high(longint))) then
  1195. begin
  1196. tmpreg:=getintregister(list,size);
  1197. a_load_const_reg(list,size,a,tmpreg);
  1198. a_op_reg_ref(list,op,size,tmpreg,tmpref);
  1199. exit;
  1200. end;
  1201. {$endif x86_64}
  1202. Case Op of
  1203. OP_NONE :
  1204. begin
  1205. { Opcode is optimized away }
  1206. end;
  1207. OP_MOVE :
  1208. begin
  1209. { Optimized, replaced with a simple load }
  1210. a_load_const_ref(list,size,a,ref);
  1211. end;
  1212. OP_DIV, OP_IDIV:
  1213. Begin
  1214. if ispowerof2(int64(a),power) then
  1215. begin
  1216. case op of
  1217. OP_DIV:
  1218. opcode := A_SHR;
  1219. OP_IDIV:
  1220. opcode := A_SAR;
  1221. end;
  1222. list.concat(taicpu.op_const_ref(opcode,
  1223. TCgSize2OpSize[size],power,tmpref));
  1224. exit;
  1225. end;
  1226. { the rest should be handled specifically in the code }
  1227. { generator because of the silly register usage restraints }
  1228. internalerror(200109231);
  1229. End;
  1230. OP_MUL,OP_IMUL:
  1231. begin
  1232. if not(cs_check_overflow in current_settings.localswitches) and
  1233. ispowerof2(int64(a),power) then
  1234. begin
  1235. list.concat(taicpu.op_const_ref(A_SHL,TCgSize2OpSize[size],
  1236. power,tmpref));
  1237. exit;
  1238. end;
  1239. { can't multiply a memory location directly with a constant }
  1240. if op = OP_IMUL then
  1241. inherited a_op_const_ref(list,op,size,a,tmpref)
  1242. else
  1243. { OP_MUL should be handled specifically in the code }
  1244. { generator because of the silly register usage restraints }
  1245. internalerror(200109232);
  1246. end;
  1247. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1248. if not(cs_check_overflow in current_settings.localswitches) and
  1249. (a = 1) and
  1250. (op in [OP_ADD,OP_SUB]) then
  1251. if op = OP_ADD then
  1252. list.concat(taicpu.op_ref(A_INC,TCgSize2OpSize[size],tmpref))
  1253. else
  1254. list.concat(taicpu.op_ref(A_DEC,TCgSize2OpSize[size],tmpref))
  1255. else if (a = 0) then
  1256. if (op <> OP_AND) then
  1257. exit
  1258. else
  1259. a_load_const_ref(list,size,0,tmpref)
  1260. else if (aword(a) = high(aword)) and
  1261. (op in [OP_AND,OP_OR,OP_XOR]) then
  1262. begin
  1263. case op of
  1264. OP_AND:
  1265. exit;
  1266. OP_OR:
  1267. list.concat(taicpu.op_const_ref(A_MOV,TCgSize2OpSize[size],aint(high(aword)),tmpref));
  1268. OP_XOR:
  1269. list.concat(taicpu.op_ref(A_NOT,TCgSize2OpSize[size],tmpref));
  1270. end
  1271. end
  1272. else
  1273. list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
  1274. TCgSize2OpSize[size],a,tmpref));
  1275. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1276. begin
  1277. if (a and 31) <> 0 then
  1278. list.concat(taicpu.op_const_ref(
  1279. TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,tmpref));
  1280. if (a shr 5) <> 0 Then
  1281. internalerror(68991);
  1282. end
  1283. else internalerror(68992);
  1284. end;
  1285. end;
  1286. procedure tcgx86.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  1287. var
  1288. dstsize: topsize;
  1289. instr:Taicpu;
  1290. begin
  1291. check_register_size(size,src);
  1292. check_register_size(size,dst);
  1293. dstsize := tcgsize2opsize[size];
  1294. case op of
  1295. OP_NEG,OP_NOT:
  1296. begin
  1297. if src<>dst then
  1298. a_load_reg_reg(list,size,size,src,dst);
  1299. list.concat(taicpu.op_reg(TOpCG2AsmOp[op],dstsize,dst));
  1300. end;
  1301. OP_MUL,OP_DIV,OP_IDIV:
  1302. { special stuff, needs separate handling inside code }
  1303. { generator }
  1304. internalerror(200109233);
  1305. OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR:
  1306. begin
  1307. { Use ecx to load the value, that allows better coalescing }
  1308. getcpuregister(list,NR_ECX);
  1309. a_load_reg_reg(list,size,OS_32,src,NR_ECX);
  1310. list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
  1311. ungetcpuregister(list,NR_ECX);
  1312. end;
  1313. else
  1314. begin
  1315. if reg2opsize(src) <> dstsize then
  1316. internalerror(200109226);
  1317. instr:=taicpu.op_reg_reg(TOpCG2AsmOp[op],dstsize,src,dst);
  1318. list.concat(instr);
  1319. end;
  1320. end;
  1321. end;
  1322. procedure tcgx86.a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister);
  1323. var
  1324. tmpref : treference;
  1325. begin
  1326. tmpref:=ref;
  1327. make_simple_ref(list,tmpref);
  1328. check_register_size(size,reg);
  1329. case op of
  1330. OP_NEG,OP_NOT,OP_IMUL:
  1331. begin
  1332. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1333. end;
  1334. OP_MUL,OP_DIV,OP_IDIV:
  1335. { special stuff, needs separate handling inside code }
  1336. { generator }
  1337. internalerror(200109239);
  1338. else
  1339. begin
  1340. reg := makeregsize(list,reg,size);
  1341. list.concat(taicpu.op_ref_reg(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref,reg));
  1342. end;
  1343. end;
  1344. end;
  1345. procedure tcgx86.a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference);
  1346. var
  1347. tmpref : treference;
  1348. begin
  1349. tmpref:=ref;
  1350. make_simple_ref(list,tmpref);
  1351. check_register_size(size,reg);
  1352. case op of
  1353. OP_NEG,OP_NOT:
  1354. begin
  1355. if reg<>NR_NO then
  1356. internalerror(200109237);
  1357. list.concat(taicpu.op_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref));
  1358. end;
  1359. OP_IMUL:
  1360. begin
  1361. { this one needs a load/imul/store, which is the default }
  1362. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1363. end;
  1364. OP_MUL,OP_DIV,OP_IDIV:
  1365. { special stuff, needs separate handling inside code }
  1366. { generator }
  1367. internalerror(200109238);
  1368. else
  1369. begin
  1370. list.concat(taicpu.op_reg_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],reg,tmpref));
  1371. end;
  1372. end;
  1373. end;
  1374. {*************** compare instructructions ****************}
  1375. procedure tcgx86.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
  1376. l : tasmlabel);
  1377. {$ifdef x86_64}
  1378. var
  1379. tmpreg : tregister;
  1380. {$endif x86_64}
  1381. begin
  1382. {$ifdef x86_64}
  1383. { x86_64 only supports signed 32 bits constants directly }
  1384. if (size in [OS_S64,OS_64]) and
  1385. ((a<low(longint)) or (a>high(longint))) then
  1386. begin
  1387. tmpreg:=getintregister(list,size);
  1388. a_load_const_reg(list,size,a,tmpreg);
  1389. a_cmp_reg_reg_label(list,size,cmp_op,tmpreg,reg,l);
  1390. exit;
  1391. end;
  1392. {$endif x86_64}
  1393. if (a = 0) then
  1394. list.concat(taicpu.op_reg_reg(A_TEST,tcgsize2opsize[size],reg,reg))
  1395. else
  1396. list.concat(taicpu.op_const_reg(A_CMP,tcgsize2opsize[size],a,reg));
  1397. a_jmp_cond(list,cmp_op,l);
  1398. end;
  1399. procedure tcgx86.a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : aint;const ref : treference;
  1400. l : tasmlabel);
  1401. var
  1402. {$ifdef x86_64}
  1403. tmpreg : tregister;
  1404. {$endif x86_64}
  1405. tmpref : treference;
  1406. begin
  1407. tmpref:=ref;
  1408. make_simple_ref(list,tmpref);
  1409. {$ifdef x86_64}
  1410. { x86_64 only supports signed 32 bits constants directly }
  1411. if (size in [OS_S64,OS_64]) and
  1412. ((a<low(longint)) or (a>high(longint))) then
  1413. begin
  1414. tmpreg:=getintregister(list,size);
  1415. a_load_const_reg(list,size,a,tmpreg);
  1416. a_cmp_reg_ref_label(list,size,cmp_op,tmpreg,tmpref,l);
  1417. exit;
  1418. end;
  1419. {$endif x86_64}
  1420. list.concat(taicpu.op_const_ref(A_CMP,TCgSize2OpSize[size],a,tmpref));
  1421. a_jmp_cond(list,cmp_op,l);
  1422. end;
  1423. procedure tcgx86.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;
  1424. reg1,reg2 : tregister;l : tasmlabel);
  1425. begin
  1426. check_register_size(size,reg1);
  1427. check_register_size(size,reg2);
  1428. list.concat(taicpu.op_reg_reg(A_CMP,TCgSize2OpSize[size],reg1,reg2));
  1429. a_jmp_cond(list,cmp_op,l);
  1430. end;
  1431. procedure tcgx86.a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister;l : tasmlabel);
  1432. var
  1433. tmpref : treference;
  1434. begin
  1435. tmpref:=ref;
  1436. make_simple_ref(list,tmpref);
  1437. check_register_size(size,reg);
  1438. list.concat(taicpu.op_ref_reg(A_CMP,TCgSize2OpSize[size],tmpref,reg));
  1439. a_jmp_cond(list,cmp_op,l);
  1440. end;
  1441. procedure tcgx86.a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister;const ref: treference; l : tasmlabel);
  1442. var
  1443. tmpref : treference;
  1444. begin
  1445. tmpref:=ref;
  1446. make_simple_ref(list,tmpref);
  1447. check_register_size(size,reg);
  1448. list.concat(taicpu.op_reg_ref(A_CMP,TCgSize2OpSize[size],reg,tmpref));
  1449. a_jmp_cond(list,cmp_op,l);
  1450. end;
  1451. procedure tcgx86.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  1452. var
  1453. ai : taicpu;
  1454. begin
  1455. if cond=OC_None then
  1456. ai := Taicpu.Op_sym(A_JMP,S_NO,l)
  1457. else
  1458. begin
  1459. ai:=Taicpu.Op_sym(A_Jcc,S_NO,l);
  1460. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1461. end;
  1462. ai.is_jmp:=true;
  1463. list.concat(ai);
  1464. end;
  1465. procedure tcgx86.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1466. var
  1467. ai : taicpu;
  1468. begin
  1469. ai := Taicpu.op_sym(A_Jcc,S_NO,l);
  1470. ai.SetCondition(flags_to_cond(f));
  1471. ai.is_jmp := true;
  1472. list.concat(ai);
  1473. end;
  1474. procedure tcgx86.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
  1475. var
  1476. ai : taicpu;
  1477. hreg : tregister;
  1478. begin
  1479. hreg:=makeregsize(list,reg,OS_8);
  1480. ai:=Taicpu.op_reg(A_SETcc,S_B,hreg);
  1481. ai.setcondition(flags_to_cond(f));
  1482. list.concat(ai);
  1483. if (reg<>hreg) then
  1484. a_load_reg_reg(list,OS_8,size,hreg,reg);
  1485. end;
  1486. procedure tcgx86.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
  1487. var
  1488. ai : taicpu;
  1489. tmpref : treference;
  1490. begin
  1491. tmpref:=ref;
  1492. make_simple_ref(list,tmpref);
  1493. if not(size in [OS_8,OS_S8]) then
  1494. a_load_const_ref(list,size,0,tmpref);
  1495. ai:=Taicpu.op_ref(A_SETcc,S_B,tmpref);
  1496. ai.setcondition(flags_to_cond(f));
  1497. list.concat(ai);
  1498. end;
  1499. { ************* concatcopy ************ }
  1500. procedure Tcgx86.g_concatcopy(list:TAsmList;const source,dest:Treference;len:aint);
  1501. const
  1502. {$ifdef cpu64bitalu}
  1503. REGCX=NR_RCX;
  1504. REGSI=NR_RSI;
  1505. REGDI=NR_RDI;
  1506. {$else cpu64bitalu}
  1507. REGCX=NR_ECX;
  1508. REGSI=NR_ESI;
  1509. REGDI=NR_EDI;
  1510. {$endif cpu64bitalu}
  1511. type copymode=(copy_move,copy_mmx,copy_string);
  1512. var srcref,dstref:Treference;
  1513. r,r0,r1,r2,r3:Tregister;
  1514. helpsize:aint;
  1515. copysize:byte;
  1516. cgsize:Tcgsize;
  1517. cm:copymode;
  1518. begin
  1519. cm:=copy_move;
  1520. helpsize:=3*sizeof(aword);
  1521. if cs_opt_size in current_settings.optimizerswitches then
  1522. helpsize:=2*sizeof(aword);
  1523. if (cs_mmx in current_settings.localswitches) and
  1524. not(pi_uses_fpu in current_procinfo.flags) and
  1525. ((len=8) or (len=16) or (len=24) or (len=32)) then
  1526. cm:=copy_mmx;
  1527. if (len>helpsize) then
  1528. cm:=copy_string;
  1529. if (cs_opt_size in current_settings.optimizerswitches) and
  1530. not((len<=16) and (cm=copy_mmx)) then
  1531. cm:=copy_string;
  1532. if (source.segment<>NR_NO) or
  1533. (dest.segment<>NR_NO) then
  1534. cm:=copy_string;
  1535. case cm of
  1536. copy_move:
  1537. begin
  1538. dstref:=dest;
  1539. srcref:=source;
  1540. copysize:=sizeof(aint);
  1541. cgsize:=int_cgsize(copysize);
  1542. while len<>0 do
  1543. begin
  1544. if len<2 then
  1545. begin
  1546. copysize:=1;
  1547. cgsize:=OS_8;
  1548. end
  1549. else if len<4 then
  1550. begin
  1551. copysize:=2;
  1552. cgsize:=OS_16;
  1553. end
  1554. else if len<8 then
  1555. begin
  1556. copysize:=4;
  1557. cgsize:=OS_32;
  1558. end
  1559. {$ifdef cpu64bitalu}
  1560. else if len<16 then
  1561. begin
  1562. copysize:=8;
  1563. cgsize:=OS_64;
  1564. end
  1565. {$endif}
  1566. ;
  1567. dec(len,copysize);
  1568. r:=getintregister(list,cgsize);
  1569. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  1570. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  1571. inc(srcref.offset,copysize);
  1572. inc(dstref.offset,copysize);
  1573. end;
  1574. end;
  1575. copy_mmx:
  1576. begin
  1577. dstref:=dest;
  1578. srcref:=source;
  1579. r0:=getmmxregister(list);
  1580. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
  1581. if len>=16 then
  1582. begin
  1583. inc(srcref.offset,8);
  1584. r1:=getmmxregister(list);
  1585. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
  1586. end;
  1587. if len>=24 then
  1588. begin
  1589. inc(srcref.offset,8);
  1590. r2:=getmmxregister(list);
  1591. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
  1592. end;
  1593. if len>=32 then
  1594. begin
  1595. inc(srcref.offset,8);
  1596. r3:=getmmxregister(list);
  1597. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
  1598. end;
  1599. a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
  1600. if len>=16 then
  1601. begin
  1602. inc(dstref.offset,8);
  1603. a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
  1604. end;
  1605. if len>=24 then
  1606. begin
  1607. inc(dstref.offset,8);
  1608. a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
  1609. end;
  1610. if len>=32 then
  1611. begin
  1612. inc(dstref.offset,8);
  1613. a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
  1614. end;
  1615. end
  1616. else {copy_string, should be a good fallback in case of unhandled}
  1617. begin
  1618. getcpuregister(list,REGDI);
  1619. if (dest.segment=NR_NO) then
  1620. a_loadaddr_ref_reg(list,dest,REGDI)
  1621. else
  1622. begin
  1623. dstref:=dest;
  1624. dstref.segment:=NR_NO;
  1625. a_loadaddr_ref_reg(list,dstref,REGDI);
  1626. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_ES));
  1627. list.concat(taicpu.op_reg(A_PUSH,S_L,dest.segment));
  1628. list.concat(taicpu.op_reg(A_POP,S_L,NR_ES));
  1629. end;
  1630. getcpuregister(list,REGSI);
  1631. if (source.segment=NR_NO) then
  1632. a_loadaddr_ref_reg(list,source,REGSI)
  1633. else
  1634. begin
  1635. srcref:=source;
  1636. srcref.segment:=NR_NO;
  1637. a_loadaddr_ref_reg(list,srcref,REGSI);
  1638. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_DS));
  1639. list.concat(taicpu.op_reg(A_PUSH,S_L,source.segment));
  1640. list.concat(taicpu.op_reg(A_POP,S_L,NR_DS));
  1641. end;
  1642. getcpuregister(list,REGCX);
  1643. {$ifdef i386}
  1644. list.concat(Taicpu.op_none(A_CLD,S_NO));
  1645. {$endif i386}
  1646. if (cs_opt_size in current_settings.optimizerswitches) and
  1647. (len>sizeof(aint)+(sizeof(aint) div 2)) then
  1648. begin
  1649. a_load_const_reg(list,OS_INT,len,REGCX);
  1650. list.concat(Taicpu.op_none(A_REP,S_NO));
  1651. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  1652. end
  1653. else
  1654. begin
  1655. helpsize:=len div sizeof(aint);
  1656. len:=len mod sizeof(aint);
  1657. if helpsize>1 then
  1658. begin
  1659. a_load_const_reg(list,OS_INT,helpsize,REGCX);
  1660. list.concat(Taicpu.op_none(A_REP,S_NO));
  1661. end;
  1662. if helpsize>0 then
  1663. begin
  1664. {$ifdef cpu64bitalu}
  1665. list.concat(Taicpu.op_none(A_MOVSQ,S_NO))
  1666. {$else}
  1667. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  1668. {$endif cpu64bitalu}
  1669. end;
  1670. if len>=4 then
  1671. begin
  1672. dec(len,4);
  1673. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  1674. end;
  1675. if len>=2 then
  1676. begin
  1677. dec(len,2);
  1678. list.concat(Taicpu.op_none(A_MOVSW,S_NO));
  1679. end;
  1680. if len=1 then
  1681. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  1682. end;
  1683. ungetcpuregister(list,REGCX);
  1684. ungetcpuregister(list,REGSI);
  1685. ungetcpuregister(list,REGDI);
  1686. if (source.segment<>NR_NO) then
  1687. list.concat(taicpu.op_reg(A_POP,S_L,NR_DS));
  1688. if (dest.segment<>NR_NO) then
  1689. list.concat(taicpu.op_reg(A_POP,S_L,NR_ES));
  1690. end;
  1691. end;
  1692. end;
  1693. {****************************************************************************
  1694. Entry/Exit Code Helpers
  1695. ****************************************************************************}
  1696. procedure tcgx86.g_profilecode(list : TAsmList);
  1697. var
  1698. pl : tasmlabel;
  1699. mcountprefix : String[4];
  1700. begin
  1701. case target_info.system of
  1702. {$ifndef NOTARGETWIN}
  1703. system_i386_win32,
  1704. {$endif}
  1705. system_i386_freebsd,
  1706. system_i386_netbsd,
  1707. // system_i386_openbsd,
  1708. system_i386_wdosx :
  1709. begin
  1710. Case target_info.system Of
  1711. system_i386_freebsd : mcountprefix:='.';
  1712. system_i386_netbsd : mcountprefix:='__';
  1713. // system_i386_openbsd : mcountprefix:='.';
  1714. else
  1715. mcountPrefix:='';
  1716. end;
  1717. current_asmdata.getaddrlabel(pl);
  1718. new_section(list,sec_data,lower(current_procinfo.procdef.mangledname),sizeof(pint));
  1719. list.concat(Tai_label.Create(pl));
  1720. list.concat(Tai_const.Create_32bit(0));
  1721. new_section(list,sec_code,lower(current_procinfo.procdef.mangledname),0);
  1722. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  1723. list.concat(Taicpu.Op_sym_ofs_reg(A_MOV,S_L,pl,0,NR_EDX));
  1724. a_call_name(list,target_info.Cprefix+mcountprefix+'mcount',false);
  1725. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
  1726. end;
  1727. system_i386_linux:
  1728. a_call_name(list,target_info.Cprefix+'mcount',false);
  1729. system_i386_go32v2,system_i386_watcom:
  1730. begin
  1731. a_call_name(list,'MCOUNT',false);
  1732. end;
  1733. system_x86_64_linux,
  1734. system_x86_64_darwin:
  1735. begin
  1736. a_call_name(list,'mcount',false);
  1737. end;
  1738. end;
  1739. end;
  1740. procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
  1741. {$ifdef x86}
  1742. {$ifndef NOTARGETWIN}
  1743. var
  1744. href : treference;
  1745. i : integer;
  1746. again : tasmlabel;
  1747. {$endif NOTARGETWIN}
  1748. {$endif x86}
  1749. begin
  1750. if localsize>0 then
  1751. begin
  1752. {$ifdef i386}
  1753. {$ifndef NOTARGETWIN}
  1754. { windows guards only a few pages for stack growing,
  1755. so we have to access every page first }
  1756. if (target_info.system in [system_i386_win32,system_i386_wince]) and
  1757. (localsize>=winstackpagesize) then
  1758. begin
  1759. if localsize div winstackpagesize<=5 then
  1760. begin
  1761. list.concat(Taicpu.Op_const_reg(A_SUB,S_L,localsize-4,NR_ESP));
  1762. for i:=1 to localsize div winstackpagesize do
  1763. begin
  1764. reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
  1765. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  1766. end;
  1767. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  1768. end
  1769. else
  1770. begin
  1771. current_asmdata.getjumplabel(again);
  1772. getcpuregister(list,NR_EDI);
  1773. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
  1774. list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
  1775. a_label(list,again);
  1776. list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
  1777. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  1778. list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
  1779. a_jmp_cond(list,OC_NE,again);
  1780. list.concat(Taicpu.op_const_reg(A_SUB,S_L,localsize mod winstackpagesize - 4,NR_ESP));
  1781. reference_reset_base(href,NR_ESP,localsize-4,4);
  1782. list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
  1783. ungetcpuregister(list,NR_EDI);
  1784. end
  1785. end
  1786. else
  1787. {$endif NOTARGETWIN}
  1788. {$endif i386}
  1789. {$ifdef x86_64}
  1790. {$ifndef NOTARGETWIN}
  1791. { windows guards only a few pages for stack growing,
  1792. so we have to access every page first }
  1793. if (target_info.system=system_x86_64_win64) and
  1794. (localsize>=winstackpagesize) then
  1795. begin
  1796. if localsize div winstackpagesize<=5 then
  1797. begin
  1798. list.concat(Taicpu.Op_const_reg(A_SUB,S_Q,localsize,NR_RSP));
  1799. for i:=1 to localsize div winstackpagesize do
  1800. begin
  1801. reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
  1802. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  1803. end;
  1804. reference_reset_base(href,NR_RSP,0,4);
  1805. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  1806. end
  1807. else
  1808. begin
  1809. current_asmdata.getjumplabel(again);
  1810. getcpuregister(list,NR_R10);
  1811. list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
  1812. a_label(list,again);
  1813. list.concat(Taicpu.op_const_reg(A_SUB,S_Q,winstackpagesize,NR_RSP));
  1814. reference_reset_base(href,NR_RSP,0,4);
  1815. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  1816. list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
  1817. a_jmp_cond(list,OC_NE,again);
  1818. list.concat(Taicpu.op_const_reg(A_SUB,S_Q,localsize mod winstackpagesize,NR_RSP));
  1819. ungetcpuregister(list,NR_R10);
  1820. end
  1821. end
  1822. else
  1823. {$endif NOTARGETWIN}
  1824. {$endif x86_64}
  1825. list.concat(Taicpu.Op_const_reg(A_SUB,tcgsize2opsize[OS_ADDR],localsize,NR_STACK_POINTER_REG));
  1826. end;
  1827. end;
  1828. procedure tcgx86.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  1829. var
  1830. stackmisalignment: longint;
  1831. begin
  1832. {$ifdef i386}
  1833. { interrupt support for i386 }
  1834. if (po_interrupt in current_procinfo.procdef.procoptions) and
  1835. { this messes up stack alignment }
  1836. (target_info.system <> system_i386_darwin) then
  1837. begin
  1838. { .... also the segment registers }
  1839. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_GS));
  1840. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_FS));
  1841. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_ES));
  1842. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DS));
  1843. { save the registers of an interrupt procedure }
  1844. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDI));
  1845. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ESI));
  1846. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  1847. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ECX));
  1848. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EBX));
  1849. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EAX));
  1850. end;
  1851. {$endif i386}
  1852. { save old framepointer }
  1853. if not nostackframe then
  1854. begin
  1855. { return address }
  1856. stackmisalignment := sizeof(pint);
  1857. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  1858. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1859. CGmessage(cg_d_stackframe_omited)
  1860. else
  1861. begin
  1862. { push <frame_pointer> }
  1863. inc(stackmisalignment,sizeof(pint));
  1864. include(rg[R_INTREGISTER].preserved_by_proc,RS_FRAME_POINTER_REG);
  1865. list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  1866. { Return address and FP are both on stack }
  1867. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  1868. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  1869. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG));
  1870. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  1871. end;
  1872. { allocate stackframe space }
  1873. if (localsize<>0) or
  1874. ((target_info.system in [system_i386_darwin,system_x86_64_darwin,
  1875. system_x86_64_win64,system_x86_64_linux,system_x86_64_freebsd]) and
  1876. (stackmisalignment <> 0) and
  1877. ((pi_do_call in current_procinfo.flags) or
  1878. (po_assembler in current_procinfo.procdef.procoptions))) then
  1879. begin
  1880. if (target_info.system in [system_i386_darwin,system_x86_64_darwin,
  1881. system_x86_64_win64,system_x86_64_linux,system_x86_64_freebsd]) then
  1882. localsize := align(localsize+stackmisalignment,16)-stackmisalignment;
  1883. cg.g_stackpointer_alloc(list,localsize);
  1884. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  1885. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  1886. end;
  1887. end;
  1888. end;
  1889. { produces if necessary overflowcode }
  1890. procedure tcgx86.g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);
  1891. var
  1892. hl : tasmlabel;
  1893. ai : taicpu;
  1894. cond : TAsmCond;
  1895. begin
  1896. if not(cs_check_overflow in current_settings.localswitches) then
  1897. exit;
  1898. current_asmdata.getjumplabel(hl);
  1899. if not ((def.typ=pointerdef) or
  1900. ((def.typ=orddef) and
  1901. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,pasbool]))) then
  1902. cond:=C_NO
  1903. else
  1904. cond:=C_NB;
  1905. ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl);
  1906. ai.SetCondition(cond);
  1907. ai.is_jmp:=true;
  1908. list.concat(ai);
  1909. a_call_name(list,'FPC_OVERFLOW',false);
  1910. a_label(list,hl);
  1911. end;
  1912. procedure tcgx86.g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string);
  1913. var
  1914. ref : treference;
  1915. sym : tasmsymbol;
  1916. begin
  1917. if (target_info.system=system_i386_darwin) then
  1918. begin
  1919. { a_jmp_name jumps to a stub which is always pic-safe on darwin }
  1920. inherited g_external_wrapper(list,procdef,externalname);
  1921. exit;
  1922. end;
  1923. sym:=current_asmdata.RefAsmSymbol(externalname);
  1924. reference_reset_symbol(ref,sym,0,sizeof(pint));
  1925. { create pic'ed? }
  1926. if (cs_create_pic in current_settings.moduleswitches) and
  1927. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  1928. (target_info.system<>system_x86_64_darwin) then
  1929. ref.refaddr:=addr_pic
  1930. else
  1931. ref.refaddr:=addr_full;
  1932. list.concat(taicpu.op_ref(A_JMP,S_NO,ref));
  1933. end;
  1934. end.