cgx86.pas 96 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628
  1. {
  2. Copyright (c) 1998-2005 by Florian Klaempfl
  3. This unit implements the common parts of the code generator for the i386 and the x86-64.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. { This unit implements the common parts of the code generator for the i386 and the x86-64.
  18. }
  19. unit cgx86;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses
  23. globtype,
  24. cgbase,cgutils,cgobj,
  25. aasmbase,aasmtai,aasmdata,aasmcpu,
  26. cpubase,cpuinfo,rgobj,rgx86,rgcpu,
  27. symconst,symtype,symdef;
  28. type
  29. { tcgx86 }
  30. tcgx86 = class(tcg)
  31. rgfpu : Trgx86fpu;
  32. procedure done_register_allocators;override;
  33. function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override;
  34. function getmmxregister(list:TAsmList):Tregister;
  35. function getmmregister(list:TAsmList;size:Tcgsize):Tregister;override;
  36. procedure getcpuregister(list:TAsmList;r:Tregister);override;
  37. procedure ungetcpuregister(list:TAsmList;r:Tregister);override;
  38. procedure alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  39. procedure dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);override;
  40. function uses_registers(rt:Tregistertype):boolean;override;
  41. procedure add_reg_instruction(instr:Tai;r:tregister);override;
  42. procedure dec_fpu_stack;
  43. procedure inc_fpu_stack;
  44. procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
  45. procedure a_call_name_near(list : TAsmList;const s : string; weak: boolean);
  46. procedure a_call_name_static(list : TAsmList;const s : string);override;
  47. procedure a_call_name_static_near(list : TAsmList;const s : string);
  48. procedure a_call_reg(list : TAsmList;reg : tregister);override;
  49. procedure a_call_reg_near(list : TAsmList;reg : tregister);
  50. procedure a_call_ref(list : TAsmList;ref : treference);override;
  51. procedure a_call_ref_near(list : TAsmList;ref : treference);
  52. procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
  53. procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
  54. procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
  55. procedure a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
  56. procedure a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
  57. { move instructions }
  58. procedure a_load_const_reg(list : TAsmList; tosize: tcgsize; a : tcgint;reg : tregister);override;
  59. procedure a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);override;
  60. procedure a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);override;
  61. procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
  62. procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
  63. procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
  64. { bit scan instructions }
  65. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
  66. { fpu move instructions }
  67. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  68. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  69. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  70. { vector register move instructions }
  71. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
  72. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  73. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
  74. procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
  75. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);override;
  76. procedure a_opmm_ref_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;const ref : treference;src,dst : tregister;shuffle : pmmshuffle);override;
  77. procedure a_opmm_reg_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;src1,src2,dst : tregister;shuffle : pmmshuffle);override;
  78. { comparison operations }
  79. procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  80. l : tasmlabel);override;
  81. procedure a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
  82. l : tasmlabel);override;
  83. procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
  84. procedure a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister; l : tasmlabel); override;
  85. procedure a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister; const ref: treference; l : tasmlabel); override;
  86. procedure a_jmp_name(list : TAsmList;const s : string);override;
  87. procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
  88. procedure a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel); override;
  89. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister); override;
  90. procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference); override;
  91. procedure g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);override;
  92. { entry/exit code helpers }
  93. procedure g_profilecode(list : TAsmList);override;
  94. procedure g_stackpointer_alloc(list : TAsmList;localsize : longint);override;
  95. procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
  96. procedure g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);override;
  97. procedure g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string); override;
  98. procedure make_simple_ref(list:TAsmList;var ref: treference);
  99. protected
  100. procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  101. procedure check_register_size(size:tcgsize;reg:tregister);
  102. procedure opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  103. procedure opmm_loc_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;loc : tlocation;src,dst : tregister;shuffle : pmmshuffle);
  104. function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  105. procedure sizes2load(s1,s2 : tcgsize;var op: tasmop; var s3: topsize);
  106. procedure floatload(list: TAsmList; t : tcgsize;const ref : treference);
  107. procedure floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  108. procedure floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  109. procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  110. end;
  111. const
  112. {$if defined(x86_64)}
  113. TCGSize2OpSize: Array[tcgsize] of topsize =
  114. (S_NO,S_B,S_W,S_L,S_Q,S_XMM,S_B,S_W,S_L,S_Q,S_XMM,
  115. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  116. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  117. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  118. {$elseif defined(i386)}
  119. TCGSize2OpSize: Array[tcgsize] of topsize =
  120. (S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L,
  121. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  122. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  123. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  124. {$elseif defined(i8086)}
  125. TCGSize2OpSize: Array[tcgsize] of topsize =
  126. (S_NO,S_B,S_W,S_W,S_W,S_T,S_B,S_W,S_W,S_W,S_W,
  127. S_FS,S_FL,S_FX,S_IQ,S_FXX,
  128. S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
  129. S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
  130. {$endif}
  131. {$ifndef NOTARGETWIN}
  132. winstackpagesize = 4096;
  133. {$endif NOTARGETWIN}
  134. function UseAVX: boolean;
  135. function UseIncDec: boolean;
  136. implementation
  137. uses
  138. globals,verbose,systems,cutils,
  139. defutil,paramgr,procinfo,
  140. tgobj,ncgutil,
  141. fmodule,symsym;
  142. function UseAVX: boolean;
  143. begin
  144. Result:=current_settings.fputype in fpu_avx_instructionsets;
  145. end;
  146. { modern CPUs prefer add/sub over inc/dec because add/sub break instructions dependencies on flags
  147. because they modify all flags }
  148. function UseIncDec: boolean;
  149. begin
  150. {$if defined(x86_64)}
  151. Result:=cs_opt_size in current_settings.optimizerswitches;
  152. {$elseif defined(i386)}
  153. Result:=(cs_opt_size in current_settings.optimizerswitches) or (current_settings.cputype in [cpu_386]);
  154. {$elseif defined(i8086)}
  155. Result:=(cs_opt_size in current_settings.optimizerswitches) or (current_settings.cputype in [cpu_8086..cpu_386]);
  156. {$endif}
  157. end;
  158. const
  159. TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
  160. A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
  161. A_SAR,A_SHL,A_SHR,A_SUB,A_XOR,A_ROL,A_ROR);
  162. TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
  163. C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
  164. procedure Tcgx86.done_register_allocators;
  165. begin
  166. rg[R_INTREGISTER].free;
  167. rg[R_MMREGISTER].free;
  168. rg[R_MMXREGISTER].free;
  169. rgfpu.free;
  170. inherited done_register_allocators;
  171. end;
  172. function Tcgx86.getfpuregister(list:TAsmList;size:Tcgsize):Tregister;
  173. begin
  174. result:=rgfpu.getregisterfpu(list);
  175. end;
  176. function Tcgx86.getmmxregister(list:TAsmList):Tregister;
  177. begin
  178. if not assigned(rg[R_MMXREGISTER]) then
  179. internalerror(2003121214);
  180. result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
  181. end;
  182. function Tcgx86.getmmregister(list:TAsmList;size:Tcgsize):Tregister;
  183. begin
  184. if not assigned(rg[R_MMREGISTER]) then
  185. internalerror(2003121234);
  186. case size of
  187. OS_F64:
  188. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
  189. OS_F32:
  190. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  191. OS_M64:
  192. result:=rg[R_MMREGISTER].getregister(list,R_SUBQ);
  193. OS_M128:
  194. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
  195. else
  196. internalerror(200506041);
  197. end;
  198. end;
  199. procedure Tcgx86.getcpuregister(list:TAsmList;r:Tregister);
  200. begin
  201. if getregtype(r)=R_FPUREGISTER then
  202. internalerror(2003121210)
  203. else
  204. inherited getcpuregister(list,r);
  205. end;
  206. procedure tcgx86.ungetcpuregister(list:TAsmList;r:Tregister);
  207. begin
  208. if getregtype(r)=R_FPUREGISTER then
  209. rgfpu.ungetregisterfpu(list,r)
  210. else
  211. inherited ungetcpuregister(list,r);
  212. end;
  213. procedure Tcgx86.alloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  214. begin
  215. if rt<>R_FPUREGISTER then
  216. inherited alloccpuregisters(list,rt,r);
  217. end;
  218. procedure Tcgx86.dealloccpuregisters(list:TAsmList;rt:Tregistertype;const r:Tcpuregisterset);
  219. begin
  220. if rt<>R_FPUREGISTER then
  221. inherited dealloccpuregisters(list,rt,r);
  222. end;
  223. function Tcgx86.uses_registers(rt:Tregistertype):boolean;
  224. begin
  225. if rt=R_FPUREGISTER then
  226. result:=false
  227. else
  228. result:=inherited uses_registers(rt);
  229. end;
  230. procedure tcgx86.add_reg_instruction(instr:Tai;r:tregister);
  231. begin
  232. if getregtype(r)<>R_FPUREGISTER then
  233. inherited add_reg_instruction(instr,r);
  234. end;
  235. procedure tcgx86.dec_fpu_stack;
  236. begin
  237. if rgfpu.fpuvaroffset<=0 then
  238. internalerror(200604201);
  239. dec(rgfpu.fpuvaroffset);
  240. end;
  241. procedure tcgx86.inc_fpu_stack;
  242. begin
  243. if rgfpu.fpuvaroffset>=7 then
  244. internalerror(2012062901);
  245. inc(rgfpu.fpuvaroffset);
  246. end;
  247. {****************************************************************************
  248. This is private property, keep out! :)
  249. ****************************************************************************}
  250. procedure tcgx86.sizes2load(s1,s2 : tcgsize; var op: tasmop; var s3: topsize);
  251. begin
  252. { ensure to have always valid sizes }
  253. if s1=OS_NO then
  254. s1:=s2;
  255. if s2=OS_NO then
  256. s2:=s1;
  257. case s2 of
  258. OS_8,OS_S8 :
  259. if S1 in [OS_8,OS_S8] then
  260. s3 := S_B
  261. else
  262. internalerror(200109221);
  263. OS_16,OS_S16:
  264. case s1 of
  265. OS_8,OS_S8:
  266. s3 := S_BW;
  267. OS_16,OS_S16:
  268. s3 := S_W;
  269. else
  270. internalerror(200109222);
  271. end;
  272. OS_32,OS_S32:
  273. case s1 of
  274. OS_8,OS_S8:
  275. s3 := S_BL;
  276. OS_16,OS_S16:
  277. s3 := S_WL;
  278. OS_32,OS_S32:
  279. s3 := S_L;
  280. else
  281. internalerror(200109223);
  282. end;
  283. {$ifdef x86_64}
  284. OS_64,OS_S64:
  285. case s1 of
  286. OS_8:
  287. s3 := S_BL;
  288. OS_S8:
  289. s3 := S_BQ;
  290. OS_16:
  291. s3 := S_WL;
  292. OS_S16:
  293. s3 := S_WQ;
  294. OS_32:
  295. s3 := S_L;
  296. OS_S32:
  297. s3 := S_LQ;
  298. OS_64,OS_S64:
  299. s3 := S_Q;
  300. else
  301. internalerror(200304302);
  302. end;
  303. {$endif x86_64}
  304. else
  305. internalerror(200109227);
  306. end;
  307. if s3 in [S_B,S_W,S_L,S_Q] then
  308. op := A_MOV
  309. else if s1 in [OS_8,OS_16,OS_32,OS_64] then
  310. op := A_MOVZX
  311. else
  312. {$ifdef x86_64}
  313. if s3 in [S_LQ] then
  314. op := A_MOVSXD
  315. else
  316. {$endif x86_64}
  317. op := A_MOVSX;
  318. end;
  319. procedure tcgx86.make_simple_ref(list:TAsmList;var ref: treference);
  320. var
  321. hreg : tregister;
  322. href : treference;
  323. {$ifndef x86_64}
  324. add_hreg: boolean;
  325. {$endif not x86_64}
  326. begin
  327. { make_simple_ref() may have already been called earlier, and in that
  328. case make sure we don't perform the PIC-simplifications twice }
  329. if (ref.refaddr in [addr_pic,addr_pic_no_got]) then
  330. exit;
  331. {$if defined(x86_64)}
  332. { Only 32bit is allowed }
  333. { Note that this isn't entirely correct: for RIP-relative targets/memory models,
  334. it is actually (offset+@symbol-RIP) that should fit into 32 bits. Since two last
  335. members aren't known until link time, ABIs place very pessimistic limits
  336. on offset values, e.g. SysV AMD64 allows +/-$1000000 (16 megabytes) }
  337. if ((ref.offset<low(longint)) or (ref.offset>high(longint))) or
  338. { absolute address is not a common thing in x64, but nevertheless a possible one }
  339. ((ref.base=NR_NO) and (ref.index=NR_NO) and (ref.symbol=nil)) then
  340. begin
  341. { Load constant value to register }
  342. hreg:=GetAddressRegister(list);
  343. list.concat(taicpu.op_const_reg(A_MOV,S_Q,ref.offset,hreg));
  344. ref.offset:=0;
  345. {if assigned(ref.symbol) then
  346. begin
  347. list.concat(taicpu.op_sym_ofs_reg(A_ADD,S_Q,ref.symbol,0,hreg));
  348. ref.symbol:=nil;
  349. end;}
  350. { Add register to reference }
  351. if ref.base=NR_NO then
  352. ref.base:=hreg
  353. else if ref.index=NR_NO then
  354. ref.index:=hreg
  355. else
  356. begin
  357. { don't use add, as the flags may contain a value }
  358. reference_reset_base(href,ref.base,0,8);
  359. href.index:=hreg;
  360. if ref.scalefactor<>0 then
  361. begin
  362. reference_reset_base(href,ref.base,0,8);
  363. href.index:=hreg;
  364. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  365. ref.base:=hreg;
  366. end
  367. else
  368. begin
  369. reference_reset_base(href,ref.index,0,8);
  370. href.index:=hreg;
  371. list.concat(taicpu.op_reg_reg(A_ADD,S_Q,ref.index,hreg));
  372. ref.index:=hreg;
  373. end;
  374. end;
  375. end;
  376. if assigned(ref.symbol) then
  377. begin
  378. if cs_create_pic in current_settings.moduleswitches then
  379. begin
  380. { Local symbols must not be accessed via the GOT }
  381. if (ref.symbol.bind=AB_LOCAL) then
  382. begin
  383. { unfortunately, RIP-based addresses don't support an index }
  384. if (ref.base<>NR_NO) or
  385. (ref.index<>NR_NO) then
  386. begin
  387. reference_reset_symbol(href,ref.symbol,0,ref.alignment);
  388. hreg:=getaddressregister(list);
  389. href.refaddr:=addr_pic_no_got;
  390. href.base:=NR_RIP;
  391. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  392. ref.symbol:=nil;
  393. end
  394. else
  395. begin
  396. ref.refaddr:=addr_pic_no_got;
  397. hreg:=NR_NO;
  398. ref.base:=NR_RIP;
  399. end;
  400. end
  401. else
  402. begin
  403. reference_reset_symbol(href,ref.symbol,0,ref.alignment);
  404. hreg:=getaddressregister(list);
  405. href.refaddr:=addr_pic;
  406. href.base:=NR_RIP;
  407. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,href,hreg));
  408. ref.symbol:=nil;
  409. end;
  410. if ref.base=NR_NO then
  411. ref.base:=hreg
  412. else if ref.index=NR_NO then
  413. begin
  414. ref.index:=hreg;
  415. ref.scalefactor:=1;
  416. end
  417. else
  418. begin
  419. { don't use add, as the flags may contain a value }
  420. reference_reset_base(href,ref.base,0,8);
  421. href.index:=hreg;
  422. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  423. ref.base:=hreg;
  424. end;
  425. end
  426. else
  427. { Always use RIP relative symbol addressing for Windows and Darwin targets. }
  428. if (target_info.system in (systems_all_windows+[system_x86_64_darwin])) and (ref.base<>NR_RIP) then
  429. begin
  430. if (ref.refaddr=addr_no) and (ref.base=NR_NO) and (ref.index=NR_NO) then
  431. begin
  432. { Set RIP relative addressing for simple symbol references }
  433. ref.base:=NR_RIP;
  434. ref.refaddr:=addr_pic_no_got
  435. end
  436. else
  437. begin
  438. { Use temp register to load calculated 64-bit symbol address for complex references }
  439. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  440. href.base:=NR_RIP;
  441. href.refaddr:=addr_pic_no_got;
  442. hreg:=GetAddressRegister(list);
  443. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  444. ref.symbol:=nil;
  445. if ref.base=NR_NO then
  446. ref.base:=hreg
  447. else if ref.index=NR_NO then
  448. begin
  449. ref.index:=hreg;
  450. ref.scalefactor:=0;
  451. end
  452. else
  453. begin
  454. { don't use add, as the flags may contain a value }
  455. reference_reset_base(href,ref.base,0,8);
  456. href.index:=hreg;
  457. list.concat(taicpu.op_ref_reg(A_LEA,S_Q,href,hreg));
  458. ref.base:=hreg;
  459. end;
  460. end;
  461. end;
  462. end;
  463. {$elseif defined(i386)}
  464. add_hreg:=false;
  465. if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) then
  466. begin
  467. if assigned(ref.symbol) and
  468. not(assigned(ref.relsymbol)) and
  469. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN]) or
  470. (cs_create_pic in current_settings.moduleswitches)) then
  471. begin
  472. if ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN] then
  473. begin
  474. hreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
  475. ref.symbol:=nil;
  476. end
  477. else
  478. begin
  479. include(current_procinfo.flags,pi_needs_got);
  480. { make a copy of the got register, hreg can get modified }
  481. hreg:=cg.getaddressregister(list);
  482. a_load_reg_reg(list,OS_ADDR,OS_ADDR,current_procinfo.got,hreg);
  483. ref.relsymbol:=current_procinfo.CurrGOTLabel;
  484. end;
  485. add_hreg:=true
  486. end
  487. end
  488. else if (cs_create_pic in current_settings.moduleswitches) and
  489. assigned(ref.symbol) then
  490. begin
  491. reference_reset_symbol(href,ref.symbol,0,sizeof(pint));
  492. href.base:=current_procinfo.got;
  493. href.refaddr:=addr_pic;
  494. include(current_procinfo.flags,pi_needs_got);
  495. hreg:=cg.getaddressregister(list);
  496. list.concat(taicpu.op_ref_reg(A_MOV,S_L,href,hreg));
  497. ref.symbol:=nil;
  498. add_hreg:=true;
  499. end;
  500. if add_hreg then
  501. begin
  502. if ref.base=NR_NO then
  503. ref.base:=hreg
  504. else if ref.index=NR_NO then
  505. begin
  506. ref.index:=hreg;
  507. ref.scalefactor:=1;
  508. end
  509. else
  510. begin
  511. { don't use add, as the flags may contain a value }
  512. reference_reset_base(href,ref.base,0,8);
  513. href.index:=hreg;
  514. list.concat(taicpu.op_ref_reg(A_LEA,S_L,href,hreg));
  515. ref.base:=hreg;
  516. end;
  517. end;
  518. {$elseif defined(i8086)}
  519. { i8086 does not support stack relative addressing }
  520. if ref.base = NR_STACK_POINTER_REG then
  521. begin
  522. href:=ref;
  523. href.base:=getaddressregister(list);
  524. { let the register allocator find a suitable register for the reference }
  525. list.Concat(Taicpu.op_reg_reg(A_MOV, S_W, NR_SP, href.base));
  526. ref:=href;
  527. end;
  528. { if there is a segment in an int register, move it to ES }
  529. if (ref.segment<>NR_NO) and (not is_segment_reg(ref.segment)) then
  530. begin
  531. list.concat(taicpu.op_reg(A_PUSH,S_W,ref.segment));
  532. list.concat(taicpu.op_reg(A_POP,S_W,NR_ES));
  533. ref.segment:=NR_ES;
  534. end;
  535. {$endif}
  536. end;
  537. procedure tcgx86.floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
  538. begin
  539. case t of
  540. OS_F32 :
  541. begin
  542. op:=A_FLD;
  543. s:=S_FS;
  544. end;
  545. OS_F64 :
  546. begin
  547. op:=A_FLD;
  548. s:=S_FL;
  549. end;
  550. OS_F80 :
  551. begin
  552. op:=A_FLD;
  553. s:=S_FX;
  554. end;
  555. OS_C64 :
  556. begin
  557. op:=A_FILD;
  558. s:=S_IQ;
  559. end;
  560. else
  561. internalerror(200204043);
  562. end;
  563. end;
  564. procedure tcgx86.floatload(list: TAsmList; t : tcgsize;const ref : treference);
  565. var
  566. op : tasmop;
  567. s : topsize;
  568. tmpref : treference;
  569. begin
  570. tmpref:=ref;
  571. make_simple_ref(list,tmpref);
  572. floatloadops(t,op,s);
  573. list.concat(Taicpu.Op_ref(op,s,tmpref));
  574. inc_fpu_stack;
  575. end;
  576. procedure tcgx86.floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
  577. begin
  578. case t of
  579. OS_F32 :
  580. begin
  581. op:=A_FSTP;
  582. s:=S_FS;
  583. end;
  584. OS_F64 :
  585. begin
  586. op:=A_FSTP;
  587. s:=S_FL;
  588. end;
  589. OS_F80 :
  590. begin
  591. op:=A_FSTP;
  592. s:=S_FX;
  593. end;
  594. OS_C64 :
  595. begin
  596. op:=A_FISTP;
  597. s:=S_IQ;
  598. end;
  599. else
  600. internalerror(200204042);
  601. end;
  602. end;
  603. procedure tcgx86.floatstore(list: TAsmList; t : tcgsize;const ref : treference);
  604. var
  605. op : tasmop;
  606. s : topsize;
  607. tmpref : treference;
  608. begin
  609. tmpref:=ref;
  610. make_simple_ref(list,tmpref);
  611. floatstoreops(t,op,s);
  612. list.concat(Taicpu.Op_ref(op,s,tmpref));
  613. { storing non extended floats can cause a floating point overflow }
  614. if (t<>OS_F80) and
  615. (cs_fpu_fwait in current_settings.localswitches) then
  616. list.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  617. dec_fpu_stack;
  618. end;
  619. procedure tcgx86.check_register_size(size:tcgsize;reg:tregister);
  620. begin
  621. if TCGSize2OpSize[size]<>TCGSize2OpSize[reg_cgsize(reg)] then
  622. internalerror(200306031);
  623. end;
  624. {****************************************************************************
  625. Assembler code
  626. ****************************************************************************}
  627. procedure tcgx86.a_jmp_name(list : TAsmList;const s : string);
  628. var
  629. r: treference;
  630. begin
  631. if (target_info.system <> system_i386_darwin) then
  632. list.concat(taicpu.op_sym(A_JMP,S_NO,current_asmdata.RefAsmSymbol(s)))
  633. else
  634. begin
  635. reference_reset_symbol(r,get_darwin_call_stub(s,false),0,sizeof(pint));
  636. r.refaddr:=addr_full;
  637. list.concat(taicpu.op_ref(A_JMP,S_NO,r));
  638. end;
  639. end;
  640. procedure tcgx86.a_jmp_always(list : TAsmList;l: tasmlabel);
  641. begin
  642. a_jmp_cond(list, OC_NONE, l);
  643. end;
  644. function tcgx86.get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
  645. var
  646. stubname: string;
  647. begin
  648. stubname := 'L'+s+'$stub';
  649. result := current_asmdata.getasmsymbol(stubname);
  650. if assigned(result) then
  651. exit;
  652. if current_asmdata.asmlists[al_imports]=nil then
  653. current_asmdata.asmlists[al_imports]:=TAsmList.create;
  654. new_section(current_asmdata.asmlists[al_imports],sec_stub,'',0);
  655. result := current_asmdata.RefAsmSymbol(stubname);
  656. current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(result,0));
  657. { register as a weak symbol if necessary }
  658. if weak then
  659. current_asmdata.weakrefasmsymbol(s);
  660. current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
  661. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  662. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  663. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  664. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  665. current_asmdata.asmlists[al_imports].concat(taicpu.op_none(A_HLT));
  666. end;
  667. procedure tcgx86.a_call_name(list : TAsmList;const s : string; weak: boolean);
  668. begin
  669. a_call_name_near(list,s,weak);
  670. end;
  671. procedure tcgx86.a_call_name_near(list : TAsmList;const s : string; weak: boolean);
  672. var
  673. sym : tasmsymbol;
  674. r : treference;
  675. begin
  676. if (target_info.system <> system_i386_darwin) then
  677. begin
  678. if not(weak) then
  679. sym:=current_asmdata.RefAsmSymbol(s)
  680. else
  681. sym:=current_asmdata.WeakRefAsmSymbol(s);
  682. reference_reset_symbol(r,sym,0,sizeof(pint));
  683. if (cs_create_pic in current_settings.moduleswitches) and
  684. { darwin's assembler doesn't want @PLT after call symbols }
  685. not(target_info.system in [system_x86_64_darwin,system_i386_iphonesim]) then
  686. begin
  687. {$ifdef i386}
  688. include(current_procinfo.flags,pi_needs_got);
  689. {$endif i386}
  690. r.refaddr:=addr_pic
  691. end
  692. else
  693. r.refaddr:=addr_full;
  694. end
  695. else
  696. begin
  697. reference_reset_symbol(r,get_darwin_call_stub(s,weak),0,sizeof(pint));
  698. r.refaddr:=addr_full;
  699. end;
  700. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  701. end;
  702. procedure tcgx86.a_call_name_static(list : TAsmList;const s : string);
  703. begin
  704. a_call_name_static_near(list,s);
  705. end;
  706. procedure tcgx86.a_call_name_static_near(list : TAsmList;const s : string);
  707. var
  708. sym : tasmsymbol;
  709. r : treference;
  710. begin
  711. sym:=current_asmdata.RefAsmSymbol(s);
  712. reference_reset_symbol(r,sym,0,sizeof(pint));
  713. r.refaddr:=addr_full;
  714. list.concat(taicpu.op_ref(A_CALL,S_NO,r));
  715. end;
  716. procedure tcgx86.a_call_reg(list : TAsmList;reg : tregister);
  717. begin
  718. a_call_reg_near(list,reg);
  719. end;
  720. procedure tcgx86.a_call_reg_near(list: TAsmList; reg: tregister);
  721. begin
  722. list.concat(taicpu.op_reg(A_CALL,S_NO,reg));
  723. end;
  724. procedure tcgx86.a_call_ref(list : TAsmList;ref : treference);
  725. begin
  726. a_call_ref_near(list,ref);
  727. end;
  728. procedure tcgx86.a_call_ref_near(list: TAsmList; ref: treference);
  729. begin
  730. list.concat(taicpu.op_ref(A_CALL,S_NO,ref));
  731. end;
  732. {********************** load instructions ********************}
  733. procedure tcgx86.a_load_const_reg(list : TAsmList; tosize: TCGSize; a : tcgint; reg : TRegister);
  734. begin
  735. check_register_size(tosize,reg);
  736. { the optimizer will change it to "xor reg,reg" when loading zero, }
  737. { no need to do it here too (JM) }
  738. list.concat(taicpu.op_const_reg(A_MOV,TCGSize2OpSize[tosize],a,reg))
  739. end;
  740. procedure tcgx86.a_load_const_ref(list : TAsmList; tosize: tcgsize; a : tcgint;const ref : treference);
  741. var
  742. tmpref : treference;
  743. begin
  744. tmpref:=ref;
  745. make_simple_ref(list,tmpref);
  746. {$ifdef x86_64}
  747. { x86_64 only supports signed 32 bits constants directly }
  748. if (tosize in [OS_S64,OS_64]) and
  749. ((a<low(longint)) or (a>high(longint))) then
  750. begin
  751. a_load_const_ref(list,OS_32,longint(a and $ffffffff),tmpref);
  752. inc(tmpref.offset,4);
  753. a_load_const_ref(list,OS_32,longint(a shr 32),tmpref);
  754. end
  755. else
  756. {$endif x86_64}
  757. list.concat(taicpu.op_const_ref(A_MOV,TCGSize2OpSize[tosize],a,tmpref));
  758. end;
  759. procedure tcgx86.a_load_reg_ref(list : TAsmList; fromsize,tosize: TCGSize; reg : tregister;const ref : treference);
  760. var
  761. op: tasmop;
  762. s: topsize;
  763. tmpsize : tcgsize;
  764. tmpreg : tregister;
  765. tmpref : treference;
  766. begin
  767. tmpref:=ref;
  768. make_simple_ref(list,tmpref);
  769. check_register_size(fromsize,reg);
  770. sizes2load(fromsize,tosize,op,s);
  771. case s of
  772. {$ifdef x86_64}
  773. S_BQ,S_WQ,S_LQ,
  774. {$endif x86_64}
  775. S_BW,S_BL,S_WL :
  776. begin
  777. tmpreg:=getintregister(list,tosize);
  778. {$ifdef x86_64}
  779. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  780. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  781. 64 bit (FK) }
  782. if s in [S_BL,S_WL,S_L] then
  783. begin
  784. tmpreg:=makeregsize(list,tmpreg,OS_32);
  785. tmpsize:=OS_32;
  786. end
  787. else
  788. {$endif x86_64}
  789. tmpsize:=tosize;
  790. list.concat(taicpu.op_reg_reg(op,s,reg,tmpreg));
  791. a_load_reg_ref(list,tmpsize,tosize,tmpreg,tmpref);
  792. end;
  793. else
  794. list.concat(taicpu.op_reg_ref(op,s,reg,tmpref));
  795. end;
  796. end;
  797. procedure tcgx86.a_load_ref_reg(list : TAsmList;fromsize,tosize : tcgsize;const ref: treference;reg : tregister);
  798. var
  799. op: tasmop;
  800. s: topsize;
  801. tmpref : treference;
  802. begin
  803. tmpref:=ref;
  804. make_simple_ref(list,tmpref);
  805. check_register_size(tosize,reg);
  806. sizes2load(fromsize,tosize,op,s);
  807. {$ifdef x86_64}
  808. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  809. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  810. 64 bit (FK) }
  811. if s in [S_BL,S_WL,S_L] then
  812. reg:=makeregsize(list,reg,OS_32);
  813. {$endif x86_64}
  814. list.concat(taicpu.op_ref_reg(op,s,tmpref,reg));
  815. end;
  816. procedure tcgx86.a_load_reg_reg(list : TAsmList;fromsize,tosize : tcgsize;reg1,reg2 : tregister);
  817. var
  818. op: tasmop;
  819. s: topsize;
  820. instr:Taicpu;
  821. begin
  822. check_register_size(fromsize,reg1);
  823. check_register_size(tosize,reg2);
  824. if tcgsize2size[fromsize]>tcgsize2size[tosize] then
  825. begin
  826. reg1:=makeregsize(list,reg1,tosize);
  827. s:=tcgsize2opsize[tosize];
  828. op:=A_MOV;
  829. end
  830. else
  831. sizes2load(fromsize,tosize,op,s);
  832. {$ifdef x86_64}
  833. { zero extensions to 64 bit on the x86_64 are simply done by writting to the lower 32 bit
  834. which clears the upper 64 bit too, so it could be that s is S_L while the reg is
  835. 64 bit (FK)
  836. }
  837. if s in [S_BL,S_WL,S_L] then
  838. reg2:=makeregsize(list,reg2,OS_32);
  839. {$endif x86_64}
  840. if (reg1<>reg2) then
  841. begin
  842. instr:=taicpu.op_reg_reg(op,s,reg1,reg2);
  843. { Notify the register allocator that we have written a move instruction so
  844. it can try to eliminate it. }
  845. if (reg1<>current_procinfo.framepointer) and (reg1<>NR_STACK_POINTER_REG) then
  846. add_move_instruction(instr);
  847. list.concat(instr);
  848. end;
  849. {$ifdef x86_64}
  850. { avoid merging of registers and killing the zero extensions (FK) }
  851. if (tosize in [OS_64,OS_S64]) and (s=S_L) then
  852. list.concat(taicpu.op_const_reg(A_AND,S_L,$ffffffff,reg2));
  853. {$endif x86_64}
  854. end;
  855. procedure tcgx86.a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);
  856. var
  857. tmpref : treference;
  858. begin
  859. with ref do
  860. begin
  861. if (base=NR_NO) and (index=NR_NO) then
  862. begin
  863. if assigned(ref.symbol) then
  864. begin
  865. if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
  866. ((ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  867. (cs_create_pic in current_settings.moduleswitches)) then
  868. begin
  869. if (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL]) or
  870. ((cs_create_pic in current_settings.moduleswitches) and
  871. (ref.symbol.bind in [AB_COMMON,AB_GLOBAL,AB_PRIVATE_EXTERN])) then
  872. begin
  873. reference_reset_base(tmpref,
  874. g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol)),
  875. offset,sizeof(pint));
  876. a_loadaddr_ref_reg(list,tmpref,r);
  877. end
  878. else
  879. begin
  880. include(current_procinfo.flags,pi_needs_got);
  881. reference_reset_base(tmpref,current_procinfo.got,offset,ref.alignment);
  882. tmpref.symbol:=symbol;
  883. tmpref.relsymbol:=current_procinfo.CurrGOTLabel;
  884. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  885. end;
  886. end
  887. else if (cs_create_pic in current_settings.moduleswitches)
  888. {$ifdef x86_64}
  889. and not(ref.symbol.bind=AB_LOCAL)
  890. {$endif x86_64}
  891. then
  892. begin
  893. {$ifdef x86_64}
  894. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  895. tmpref.refaddr:=addr_pic;
  896. tmpref.base:=NR_RIP;
  897. list.concat(taicpu.op_ref_reg(A_MOV,S_Q,tmpref,r));
  898. {$else x86_64}
  899. reference_reset_symbol(tmpref,ref.symbol,0,ref.alignment);
  900. tmpref.refaddr:=addr_pic;
  901. tmpref.base:=current_procinfo.got;
  902. include(current_procinfo.flags,pi_needs_got);
  903. list.concat(taicpu.op_ref_reg(A_MOV,S_L,tmpref,r));
  904. {$endif x86_64}
  905. if offset<>0 then
  906. a_op_const_reg(list,OP_ADD,OS_ADDR,offset,r);
  907. end
  908. {$ifdef x86_64}
  909. else if (target_info.system in (systems_all_windows+[system_x86_64_darwin]))
  910. or (cs_create_pic in current_settings.moduleswitches)
  911. then
  912. begin
  913. { Win64 and Darwin/x86_64 always require RIP-relative addressing }
  914. tmpref:=ref;
  915. tmpref.base:=NR_RIP;
  916. tmpref.refaddr:=addr_pic_no_got;
  917. list.concat(Taicpu.op_ref_reg(A_LEA,S_Q,tmpref,r));
  918. end
  919. {$endif x86_64}
  920. else
  921. begin
  922. tmpref:=ref;
  923. tmpref.refaddr:=ADDR_FULL;
  924. list.concat(Taicpu.op_ref_reg(A_MOV,tcgsize2opsize[OS_ADDR],tmpref,r));
  925. end
  926. end
  927. else
  928. a_load_const_reg(list,OS_ADDR,offset,r)
  929. end
  930. else if (base=NR_NO) and (index<>NR_NO) and
  931. (offset=0) and (scalefactor=0) and (symbol=nil) then
  932. a_load_reg_reg(list,OS_ADDR,OS_ADDR,index,r)
  933. else if (base<>NR_NO) and (index=NR_NO) and
  934. (offset=0) and (symbol=nil) then
  935. a_load_reg_reg(list,OS_ADDR,OS_ADDR,base,r)
  936. else
  937. begin
  938. tmpref:=ref;
  939. make_simple_ref(list,tmpref);
  940. list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,r));
  941. end;
  942. if segment<>NR_NO then
  943. begin
  944. if (tf_section_threadvars in target_info.flags) then
  945. begin
  946. { Convert thread local address to a process global addres
  947. as we cannot handle far pointers.}
  948. case target_info.system of
  949. system_i386_linux,system_i386_android:
  950. if segment=NR_GS then
  951. begin
  952. reference_reset_symbol(tmpref,current_asmdata.RefAsmSymbol('___fpc_threadvar_offset'),0,ref.alignment);
  953. tmpref.segment:=NR_GS;
  954. list.concat(Taicpu.op_ref_reg(A_ADD,tcgsize2opsize[OS_ADDR],tmpref,r));
  955. end
  956. else
  957. cgmessage(cg_e_cant_use_far_pointer_there);
  958. else
  959. cgmessage(cg_e_cant_use_far_pointer_there);
  960. end;
  961. end
  962. else
  963. cgmessage(cg_e_cant_use_far_pointer_there);
  964. end;
  965. end;
  966. end;
  967. { all fpu load routines expect that R_ST[0-7] means an fpu regvar and }
  968. { R_ST means "the current value at the top of the fpu stack" (JM) }
  969. procedure tcgx86.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  970. var
  971. href: treference;
  972. op: tasmop;
  973. s: topsize;
  974. begin
  975. if (reg1<>NR_ST) then
  976. begin
  977. floatloadops(tosize,op,s);
  978. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg1,rgfpu.fpuvaroffset)));
  979. inc_fpu_stack;
  980. end;
  981. if (reg2<>NR_ST) then
  982. begin
  983. floatstoreops(tosize,op,s);
  984. list.concat(taicpu.op_reg(op,s,rgfpu.correct_fpuregister(reg2,rgfpu.fpuvaroffset)));
  985. dec_fpu_stack;
  986. end;
  987. { OS_F80 < OS_C64, but OS_C64 fits perfectly in OS_F80 }
  988. if (reg1=NR_ST) and
  989. (reg2=NR_ST) and
  990. (tosize<>OS_F80) and
  991. (tosize<fromsize) then
  992. begin
  993. { can't round down to lower precision in x87 :/ }
  994. tg.gettemp(list,tcgsize2size[tosize],tcgsize2size[tosize],tt_normal,href);
  995. a_loadfpu_reg_ref(list,fromsize,tosize,NR_ST,href);
  996. a_loadfpu_ref_reg(list,tosize,tosize,href,NR_ST);
  997. tg.ungettemp(list,href);
  998. end;
  999. end;
  1000. procedure tcgx86.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  1001. begin
  1002. floatload(list,fromsize,ref);
  1003. a_loadfpu_reg_reg(list,fromsize,tosize,NR_ST,reg);
  1004. end;
  1005. procedure tcgx86.a_loadfpu_reg_ref(list: TAsmList; fromsize,tosize: tcgsize; reg: tregister; const ref: treference);
  1006. begin
  1007. { in case a record returned in a floating point register
  1008. (LOC_FPUREGISTER with OS_F32/OS_F64) is stored in memory
  1009. (LOC_REFERENCE with OS_32/OS_64), we have to adjust the
  1010. tosize }
  1011. if (fromsize in [OS_F32,OS_F64]) and
  1012. (tcgsize2size[fromsize]=tcgsize2size[tosize]) then
  1013. case tosize of
  1014. OS_32:
  1015. tosize:=OS_F32;
  1016. OS_64:
  1017. tosize:=OS_F64;
  1018. end;
  1019. if reg<>NR_ST then
  1020. a_loadfpu_reg_reg(list,fromsize,tosize,reg,NR_ST);
  1021. floatstore(list,tosize,ref);
  1022. end;
  1023. function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
  1024. const
  1025. convertopsse : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  1026. (A_MOVSS,A_CVTSS2SD,A_NONE,A_NONE,A_NONE),
  1027. (A_CVTSD2SS,A_MOVSD,A_NONE,A_NONE,A_NONE),
  1028. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  1029. (A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
  1030. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  1031. convertopavx : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
  1032. (A_VMOVSS,A_VCVTSS2SD,A_NONE,A_NONE,A_NONE),
  1033. (A_VCVTSD2SS,A_VMOVSD,A_NONE,A_NONE,A_NONE),
  1034. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
  1035. (A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
  1036. (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
  1037. begin
  1038. { we can have OS_F32/OS_F64 (record in function result/LOC_MMREGISTER) to
  1039. OS_32/OS_64 (record in memory/LOC_REFERENCE) }
  1040. if (fromsize in [OS_F32,OS_F64]) and
  1041. (tcgsize2size[fromsize]=tcgsize2size[tosize]) then
  1042. case tosize of
  1043. OS_32:
  1044. tosize:=OS_F32;
  1045. OS_64:
  1046. tosize:=OS_F64;
  1047. end;
  1048. if (fromsize in [low(convertopsse)..high(convertopsse)]) and
  1049. (tosize in [low(convertopsse)..high(convertopsse)]) then
  1050. begin
  1051. if UseAVX then
  1052. result:=convertopavx[fromsize,tosize]
  1053. else
  1054. result:=convertopsse[fromsize,tosize];
  1055. end
  1056. { we can have OS_M64 (record in function result/LOC_MMREGISTER) to
  1057. OS_64 (record in memory/LOC_REFERENCE) }
  1058. else if (tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  1059. (fromsize=OS_M64) then
  1060. begin
  1061. if UseAVX then
  1062. result:=A_VMOVQ
  1063. else
  1064. result:=A_MOVQ;
  1065. end
  1066. else
  1067. internalerror(2010060104);
  1068. if result=A_NONE then
  1069. internalerror(200312205);
  1070. end;
  1071. procedure tcgx86.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle);
  1072. var
  1073. instr : taicpu;
  1074. op : TAsmOp;
  1075. begin
  1076. if shuffle=nil then
  1077. begin
  1078. if fromsize=tosize then
  1079. { needs correct size in case of spilling }
  1080. case fromsize of
  1081. OS_F32:
  1082. instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2);
  1083. OS_F64:
  1084. instr:=taicpu.op_reg_reg(A_MOVAPD,S_NO,reg1,reg2);
  1085. OS_M64:
  1086. instr:=taicpu.op_reg_reg(A_MOVQ,S_NO,reg1,reg2);
  1087. else
  1088. internalerror(2006091201);
  1089. end
  1090. else
  1091. internalerror(200312202);
  1092. add_move_instruction(instr);
  1093. end
  1094. else if shufflescalar(shuffle) then
  1095. begin
  1096. op:=get_scalar_mm_op(fromsize,tosize);
  1097. { MOVAPD/MOVAPS are normally faster }
  1098. if op=A_MOVSD then
  1099. op:=A_MOVAPD
  1100. else if op=A_MOVSS then
  1101. op:=A_MOVAPS
  1102. { VMOVSD/SS is not available with two register operands }
  1103. else if op=A_VMOVSD then
  1104. op:=A_VMOVAPD
  1105. else if op=A_VMOVSS then
  1106. op:=A_VMOVAPS;
  1107. { A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
  1108. if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
  1109. instr:=taicpu.op_reg_reg_reg(op,S_NO,reg1,reg2,reg2)
  1110. else
  1111. instr:=taicpu.op_reg_reg(op,S_NO,reg1,reg2);
  1112. case op of
  1113. A_VMOVAPD,
  1114. A_VMOVAPS,
  1115. A_VMOVSS,
  1116. A_VMOVSD,
  1117. A_VMOVQ,
  1118. A_MOVAPD,
  1119. A_MOVAPS,
  1120. A_MOVSS,
  1121. A_MOVSD,
  1122. A_MOVQ:
  1123. add_move_instruction(instr);
  1124. end;
  1125. end
  1126. else
  1127. internalerror(200312201);
  1128. list.concat(instr);
  1129. end;
  1130. procedure tcgx86.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  1131. var
  1132. tmpref : treference;
  1133. op : tasmop;
  1134. begin
  1135. tmpref:=ref;
  1136. make_simple_ref(list,tmpref);
  1137. if shuffle=nil then
  1138. begin
  1139. if fromsize=OS_M64 then
  1140. list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,reg))
  1141. else
  1142. {$ifdef x86_64}
  1143. { x86-64 has always properly aligned data }
  1144. list.concat(taicpu.op_ref_reg(A_MOVDQA,S_NO,tmpref,reg));
  1145. {$else x86_64}
  1146. list.concat(taicpu.op_ref_reg(A_MOVDQU,S_NO,tmpref,reg));
  1147. {$endif x86_64}
  1148. end
  1149. else if shufflescalar(shuffle) then
  1150. begin
  1151. op:=get_scalar_mm_op(fromsize,tosize);
  1152. { A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
  1153. if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
  1154. list.concat(taicpu.op_ref_reg_reg(op,S_NO,tmpref,reg,reg))
  1155. else
  1156. list.concat(taicpu.op_ref_reg(op,S_NO,tmpref,reg))
  1157. end
  1158. else
  1159. internalerror(200312252);
  1160. end;
  1161. procedure tcgx86.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle);
  1162. var
  1163. hreg : tregister;
  1164. tmpref : treference;
  1165. op : tasmop;
  1166. begin
  1167. tmpref:=ref;
  1168. make_simple_ref(list,tmpref);
  1169. if shuffle=nil then
  1170. begin
  1171. if fromsize=OS_M64 then
  1172. list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,tmpref))
  1173. else
  1174. {$ifdef x86_64}
  1175. { x86-64 has always properly aligned data }
  1176. list.concat(taicpu.op_reg_ref(A_MOVDQA,S_NO,reg,tmpref))
  1177. {$else x86_64}
  1178. list.concat(taicpu.op_reg_ref(A_MOVDQU,S_NO,reg,tmpref))
  1179. {$endif x86_64}
  1180. end
  1181. else if shufflescalar(shuffle) then
  1182. begin
  1183. if tcgsize2size[tosize]<>tcgsize2size[fromsize] then
  1184. begin
  1185. hreg:=getmmregister(list,tosize);
  1186. op:=get_scalar_mm_op(fromsize,tosize);
  1187. { A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
  1188. if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
  1189. list.concat(taicpu.op_reg_reg_reg(op,S_NO,reg,hreg,hreg))
  1190. else
  1191. list.concat(taicpu.op_reg_reg(op,S_NO,reg,hreg));
  1192. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(tosize,tosize),S_NO,hreg,tmpref))
  1193. end
  1194. else
  1195. list.concat(taicpu.op_reg_ref(get_scalar_mm_op(fromsize,tosize),S_NO,reg,tmpref));
  1196. end
  1197. else
  1198. internalerror(200312252);
  1199. end;
  1200. procedure tcgx86.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
  1201. var
  1202. l : tlocation;
  1203. begin
  1204. l.loc:=LOC_REFERENCE;
  1205. l.reference:=ref;
  1206. l.size:=size;
  1207. opmm_loc_reg(list,op,size,l,reg,shuffle);
  1208. end;
  1209. procedure tcgx86.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);
  1210. var
  1211. l : tlocation;
  1212. begin
  1213. l.loc:=LOC_MMREGISTER;
  1214. l.register:=src;
  1215. l.size:=size;
  1216. opmm_loc_reg(list,op,size,l,dst,shuffle);
  1217. end;
  1218. procedure tcgx86.opmm_loc_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;src,dst: tregister; shuffle : pmmshuffle);
  1219. const
  1220. opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
  1221. ( { scalar }
  1222. ( { OS_F32 }
  1223. A_NOP,A_NOP,A_VADDSS,A_NOP,A_VDIVSS,A_NOP,A_NOP,A_VMULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBSS,A_NOP,A_NOP,A_NOP
  1224. ),
  1225. ( { OS_F64 }
  1226. A_NOP,A_NOP,A_VADDSD,A_NOP,A_VDIVSD,A_NOP,A_NOP,A_VMULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBSD,A_NOP,A_NOP,A_NOP
  1227. )
  1228. ),
  1229. ( { vectorized/packed }
  1230. { because the logical packed single instructions have shorter op codes, we use always
  1231. these
  1232. }
  1233. ( { OS_F32 }
  1234. A_NOP,A_NOP,A_VADDPS,A_NOP,A_VDIVPS,A_NOP,A_NOP,A_VMULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBPS,A_VXORPS,A_NOP,A_NOP
  1235. ),
  1236. ( { OS_F64 }
  1237. A_NOP,A_NOP,A_VADDPD,A_NOP,A_VDIVPD,A_NOP,A_NOP,A_VMULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBPD,A_VXORPD,A_NOP,A_NOP
  1238. )
  1239. )
  1240. );
  1241. var
  1242. resultreg : tregister;
  1243. asmop : tasmop;
  1244. begin
  1245. { this is an internally used procedure so the parameters have
  1246. some constrains
  1247. }
  1248. if loc.size<>size then
  1249. internalerror(2013061108);
  1250. resultreg:=dst;
  1251. { deshuffle }
  1252. //!!!
  1253. if (shuffle<>nil) and not(shufflescalar(shuffle)) then
  1254. begin
  1255. internalerror(2013061107);
  1256. end
  1257. else if (shuffle=nil) then
  1258. asmop:=opmm2asmop[1,size,op]
  1259. else if shufflescalar(shuffle) then
  1260. begin
  1261. asmop:=opmm2asmop[0,size,op];
  1262. { no scalar operation available? }
  1263. if asmop=A_NOP then
  1264. begin
  1265. { do vectorized and shuffle finally }
  1266. internalerror(2010060102);
  1267. end;
  1268. end
  1269. else
  1270. internalerror(2013061106);
  1271. if asmop=A_NOP then
  1272. internalerror(2013061105);
  1273. case loc.loc of
  1274. LOC_CREFERENCE,LOC_REFERENCE:
  1275. begin
  1276. make_simple_ref(current_asmdata.CurrAsmList,loc.reference);
  1277. list.concat(taicpu.op_ref_reg_reg(asmop,S_NO,loc.reference,src,resultreg));
  1278. end;
  1279. LOC_CMMREGISTER,LOC_MMREGISTER:
  1280. list.concat(taicpu.op_reg_reg_reg(asmop,S_NO,loc.register,src,resultreg));
  1281. else
  1282. internalerror(2013061104);
  1283. end;
  1284. { shuffle }
  1285. if resultreg<>dst then
  1286. begin
  1287. internalerror(2013061103);
  1288. end;
  1289. end;
  1290. procedure tcgx86.a_opmm_reg_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src1,src2,dst: tregister;shuffle : pmmshuffle);
  1291. var
  1292. l : tlocation;
  1293. begin
  1294. l.loc:=LOC_MMREGISTER;
  1295. l.register:=src1;
  1296. l.size:=size;
  1297. opmm_loc_reg_reg(list,op,size,l,src2,dst,shuffle);
  1298. end;
  1299. procedure tcgx86.a_opmm_ref_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; src,dst: tregister;shuffle : pmmshuffle);
  1300. var
  1301. l : tlocation;
  1302. begin
  1303. l.loc:=LOC_REFERENCE;
  1304. l.reference:=ref;
  1305. l.size:=size;
  1306. opmm_loc_reg_reg(list,op,size,l,src,dst,shuffle);
  1307. end;
  1308. procedure tcgx86.opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
  1309. const
  1310. opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
  1311. ( { scalar }
  1312. ( { OS_F32 }
  1313. A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP,A_NOP,A_NOP
  1314. ),
  1315. ( { OS_F64 }
  1316. A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP,A_NOP,A_NOP
  1317. )
  1318. ),
  1319. ( { vectorized/packed }
  1320. { because the logical packed single instructions have shorter op codes, we use always
  1321. these
  1322. }
  1323. ( { OS_F32 }
  1324. A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS,A_NOP,A_NOP
  1325. ),
  1326. ( { OS_F64 }
  1327. A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD,A_NOP,A_NOP
  1328. )
  1329. )
  1330. );
  1331. var
  1332. resultreg : tregister;
  1333. asmop : tasmop;
  1334. begin
  1335. { this is an internally used procedure so the parameters have
  1336. some constrains
  1337. }
  1338. if loc.size<>size then
  1339. internalerror(200312213);
  1340. resultreg:=dst;
  1341. { deshuffle }
  1342. //!!!
  1343. if (shuffle<>nil) and not(shufflescalar(shuffle)) then
  1344. begin
  1345. internalerror(2010060101);
  1346. end
  1347. else if (shuffle=nil) then
  1348. asmop:=opmm2asmop[1,size,op]
  1349. else if shufflescalar(shuffle) then
  1350. begin
  1351. asmop:=opmm2asmop[0,size,op];
  1352. { no scalar operation available? }
  1353. if asmop=A_NOP then
  1354. begin
  1355. { do vectorized and shuffle finally }
  1356. internalerror(2010060102);
  1357. end;
  1358. end
  1359. else
  1360. internalerror(200312211);
  1361. if asmop=A_NOP then
  1362. internalerror(200312216);
  1363. case loc.loc of
  1364. LOC_CREFERENCE,LOC_REFERENCE:
  1365. begin
  1366. make_simple_ref(current_asmdata.CurrAsmList,loc.reference);
  1367. list.concat(taicpu.op_ref_reg(asmop,S_NO,loc.reference,resultreg));
  1368. end;
  1369. LOC_CMMREGISTER,LOC_MMREGISTER:
  1370. list.concat(taicpu.op_reg_reg(asmop,S_NO,loc.register,resultreg));
  1371. else
  1372. internalerror(200312214);
  1373. end;
  1374. { shuffle }
  1375. if resultreg<>dst then
  1376. begin
  1377. internalerror(200312212);
  1378. end;
  1379. end;
  1380. procedure tcgx86.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
  1381. var
  1382. opcode : tasmop;
  1383. power : longint;
  1384. {$ifdef x86_64}
  1385. tmpreg : tregister;
  1386. {$endif x86_64}
  1387. begin
  1388. optimize_op_const(op, a);
  1389. {$ifdef x86_64}
  1390. { x86_64 only supports signed 32 bits constants directly }
  1391. if not(op in [OP_NONE,OP_MOVE]) and
  1392. (size in [OS_S64,OS_64]) and
  1393. ((a<low(longint)) or (a>high(longint))) then
  1394. begin
  1395. tmpreg:=getintregister(list,size);
  1396. a_load_const_reg(list,size,a,tmpreg);
  1397. a_op_reg_reg(list,op,size,tmpreg,reg);
  1398. exit;
  1399. end;
  1400. {$endif x86_64}
  1401. check_register_size(size,reg);
  1402. case op of
  1403. OP_NONE :
  1404. begin
  1405. { Opcode is optimized away }
  1406. end;
  1407. OP_MOVE :
  1408. begin
  1409. { Optimized, replaced with a simple load }
  1410. a_load_const_reg(list,size,a,reg);
  1411. end;
  1412. OP_DIV, OP_IDIV:
  1413. begin
  1414. if ispowerof2(int64(a),power) then
  1415. begin
  1416. case op of
  1417. OP_DIV:
  1418. opcode := A_SHR;
  1419. OP_IDIV:
  1420. opcode := A_SAR;
  1421. end;
  1422. list.concat(taicpu.op_const_reg(opcode,TCgSize2OpSize[size],power,reg));
  1423. exit;
  1424. end;
  1425. { the rest should be handled specifically in the code }
  1426. { generator because of the silly register usage restraints }
  1427. internalerror(200109224);
  1428. end;
  1429. OP_MUL,OP_IMUL:
  1430. begin
  1431. if not(cs_check_overflow in current_settings.localswitches) and
  1432. ispowerof2(int64(a),power) then
  1433. begin
  1434. list.concat(taicpu.op_const_reg(A_SHL,TCgSize2OpSize[size],power,reg));
  1435. exit;
  1436. end;
  1437. if op = OP_IMUL then
  1438. list.concat(taicpu.op_const_reg(A_IMUL,TCgSize2OpSize[size],a,reg))
  1439. else
  1440. { OP_MUL should be handled specifically in the code }
  1441. { generator because of the silly register usage restraints }
  1442. internalerror(200109225);
  1443. end;
  1444. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1445. if not(cs_check_overflow in current_settings.localswitches) and
  1446. (a = 1) and
  1447. (op in [OP_ADD,OP_SUB]) and
  1448. UseIncDec then
  1449. begin
  1450. if op = OP_ADD then
  1451. list.concat(taicpu.op_reg(A_INC,TCgSize2OpSize[size],reg))
  1452. else
  1453. list.concat(taicpu.op_reg(A_DEC,TCgSize2OpSize[size],reg))
  1454. end
  1455. else if (a = 0) then
  1456. if (op <> OP_AND) then
  1457. exit
  1458. else
  1459. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],0,reg))
  1460. else if (aword(a) = high(aword)) and
  1461. (op in [OP_AND,OP_OR,OP_XOR]) then
  1462. begin
  1463. case op of
  1464. OP_AND:
  1465. exit;
  1466. OP_OR:
  1467. list.concat(taicpu.op_const_reg(A_MOV,TCgSize2OpSize[size],aint(high(aword)),reg));
  1468. OP_XOR:
  1469. list.concat(taicpu.op_reg(A_NOT,TCgSize2OpSize[size],reg));
  1470. end
  1471. end
  1472. else
  1473. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],aint(a),reg));
  1474. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1475. begin
  1476. {$if defined(x86_64)}
  1477. if (a and 63) <> 0 Then
  1478. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 63,reg));
  1479. if (a shr 6) <> 0 Then
  1480. internalerror(200609073);
  1481. {$elseif defined(i386)}
  1482. if (a and 31) <> 0 Then
  1483. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,reg));
  1484. if (a shr 5) <> 0 Then
  1485. internalerror(200609071);
  1486. {$elseif defined(i8086)}
  1487. if (a shr 5) <> 0 Then
  1488. internalerror(2013043002);
  1489. a := a and 31;
  1490. if a <> 0 Then
  1491. begin
  1492. if (current_settings.cputype < cpu_186) and (a <> 1) then
  1493. begin
  1494. getcpuregister(list,NR_CL);
  1495. a_load_const_reg(list,OS_8,a,NR_CL);
  1496. list.concat(taicpu.op_reg_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],NR_CL,reg));
  1497. ungetcpuregister(list,NR_CL);
  1498. end
  1499. else
  1500. list.concat(taicpu.op_const_reg(TOpCG2AsmOp[op],TCgSize2OpSize[size],a,reg));
  1501. end;
  1502. {$endif}
  1503. end
  1504. else internalerror(200609072);
  1505. end;
  1506. end;
  1507. procedure tcgx86.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
  1508. var
  1509. opcode: tasmop;
  1510. power: longint;
  1511. {$ifdef x86_64}
  1512. tmpreg : tregister;
  1513. {$endif x86_64}
  1514. tmpref : treference;
  1515. begin
  1516. optimize_op_const(op, a);
  1517. tmpref:=ref;
  1518. make_simple_ref(list,tmpref);
  1519. {$ifdef x86_64}
  1520. { x86_64 only supports signed 32 bits constants directly }
  1521. if not(op in [OP_NONE,OP_MOVE]) and
  1522. (size in [OS_S64,OS_64]) and
  1523. ((a<low(longint)) or (a>high(longint))) then
  1524. begin
  1525. tmpreg:=getintregister(list,size);
  1526. a_load_const_reg(list,size,a,tmpreg);
  1527. a_op_reg_ref(list,op,size,tmpreg,tmpref);
  1528. exit;
  1529. end;
  1530. {$endif x86_64}
  1531. Case Op of
  1532. OP_NONE :
  1533. begin
  1534. { Opcode is optimized away }
  1535. end;
  1536. OP_MOVE :
  1537. begin
  1538. { Optimized, replaced with a simple load }
  1539. a_load_const_ref(list,size,a,ref);
  1540. end;
  1541. OP_DIV, OP_IDIV:
  1542. Begin
  1543. if ispowerof2(int64(a),power) then
  1544. begin
  1545. case op of
  1546. OP_DIV:
  1547. opcode := A_SHR;
  1548. OP_IDIV:
  1549. opcode := A_SAR;
  1550. end;
  1551. list.concat(taicpu.op_const_ref(opcode,
  1552. TCgSize2OpSize[size],power,tmpref));
  1553. exit;
  1554. end;
  1555. { the rest should be handled specifically in the code }
  1556. { generator because of the silly register usage restraints }
  1557. internalerror(200109231);
  1558. End;
  1559. OP_MUL,OP_IMUL:
  1560. begin
  1561. if not(cs_check_overflow in current_settings.localswitches) and
  1562. ispowerof2(int64(a),power) then
  1563. begin
  1564. list.concat(taicpu.op_const_ref(A_SHL,TCgSize2OpSize[size],
  1565. power,tmpref));
  1566. exit;
  1567. end;
  1568. { can't multiply a memory location directly with a constant }
  1569. if op = OP_IMUL then
  1570. inherited a_op_const_ref(list,op,size,a,tmpref)
  1571. else
  1572. { OP_MUL should be handled specifically in the code }
  1573. { generator because of the silly register usage restraints }
  1574. internalerror(200109232);
  1575. end;
  1576. OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
  1577. if not(cs_check_overflow in current_settings.localswitches) and
  1578. (a = 1) and
  1579. (op in [OP_ADD,OP_SUB]) and
  1580. UseIncDec then
  1581. begin
  1582. if op = OP_ADD then
  1583. list.concat(taicpu.op_ref(A_INC,TCgSize2OpSize[size],tmpref))
  1584. else
  1585. list.concat(taicpu.op_ref(A_DEC,TCgSize2OpSize[size],tmpref))
  1586. end
  1587. else if (a = 0) then
  1588. if (op <> OP_AND) then
  1589. exit
  1590. else
  1591. a_load_const_ref(list,size,0,tmpref)
  1592. else if (aword(a) = high(aword)) and
  1593. (op in [OP_AND,OP_OR,OP_XOR]) then
  1594. begin
  1595. case op of
  1596. OP_AND:
  1597. exit;
  1598. OP_OR:
  1599. list.concat(taicpu.op_const_ref(A_MOV,TCgSize2OpSize[size],aint(high(aword)),tmpref));
  1600. OP_XOR:
  1601. list.concat(taicpu.op_ref(A_NOT,TCgSize2OpSize[size],tmpref));
  1602. end
  1603. end
  1604. else
  1605. list.concat(taicpu.op_const_ref(TOpCG2AsmOp[op],
  1606. TCgSize2OpSize[size],a,tmpref));
  1607. OP_SHL,OP_SHR,OP_SAR,OP_ROL,OP_ROR:
  1608. begin
  1609. if (a and 31) <> 0 then
  1610. list.concat(taicpu.op_const_ref(
  1611. TOpCG2AsmOp[op],TCgSize2OpSize[size],a and 31,tmpref));
  1612. if (a shr 5) <> 0 Then
  1613. internalerror(68991);
  1614. end
  1615. else internalerror(68992);
  1616. end;
  1617. end;
  1618. procedure tcgx86.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
  1619. const
  1620. {$if defined(cpu64bitalu) or defined(cpu32bitalu)}
  1621. REGCX=NR_ECX;
  1622. REGCX_Size = OS_32;
  1623. {$elseif defined(cpu16bitalu)}
  1624. REGCX=NR_CX;
  1625. REGCX_Size = OS_16;
  1626. {$endif}
  1627. var
  1628. dstsize: topsize;
  1629. instr:Taicpu;
  1630. begin
  1631. check_register_size(size,src);
  1632. check_register_size(size,dst);
  1633. dstsize := tcgsize2opsize[size];
  1634. case op of
  1635. OP_NEG,OP_NOT:
  1636. begin
  1637. if src<>dst then
  1638. a_load_reg_reg(list,size,size,src,dst);
  1639. list.concat(taicpu.op_reg(TOpCG2AsmOp[op],dstsize,dst));
  1640. end;
  1641. OP_MUL,OP_DIV,OP_IDIV:
  1642. { special stuff, needs separate handling inside code }
  1643. { generator }
  1644. internalerror(200109233);
  1645. OP_SHR,OP_SHL,OP_SAR,OP_ROL,OP_ROR:
  1646. begin
  1647. { Use ecx to load the value, that allows better coalescing }
  1648. getcpuregister(list,REGCX);
  1649. a_load_reg_reg(list,size,REGCX_Size,src,REGCX);
  1650. list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
  1651. ungetcpuregister(list,REGCX);
  1652. end;
  1653. else
  1654. begin
  1655. if reg2opsize(src) <> dstsize then
  1656. internalerror(200109226);
  1657. instr:=taicpu.op_reg_reg(TOpCG2AsmOp[op],dstsize,src,dst);
  1658. list.concat(instr);
  1659. end;
  1660. end;
  1661. end;
  1662. procedure tcgx86.a_op_ref_reg(list : TAsmList; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister);
  1663. var
  1664. tmpref : treference;
  1665. begin
  1666. tmpref:=ref;
  1667. make_simple_ref(list,tmpref);
  1668. check_register_size(size,reg);
  1669. case op of
  1670. OP_NEG,OP_NOT,OP_IMUL:
  1671. begin
  1672. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1673. end;
  1674. OP_MUL,OP_DIV,OP_IDIV:
  1675. { special stuff, needs separate handling inside code }
  1676. { generator }
  1677. internalerror(200109239);
  1678. else
  1679. begin
  1680. reg := makeregsize(list,reg,size);
  1681. list.concat(taicpu.op_ref_reg(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref,reg));
  1682. end;
  1683. end;
  1684. end;
  1685. procedure tcgx86.a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference);
  1686. var
  1687. tmpref : treference;
  1688. begin
  1689. tmpref:=ref;
  1690. make_simple_ref(list,tmpref);
  1691. check_register_size(size,reg);
  1692. case op of
  1693. OP_NEG,OP_NOT:
  1694. begin
  1695. if reg<>NR_NO then
  1696. internalerror(200109237);
  1697. list.concat(taicpu.op_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],tmpref));
  1698. end;
  1699. OP_IMUL:
  1700. begin
  1701. { this one needs a load/imul/store, which is the default }
  1702. inherited a_op_ref_reg(list,op,size,tmpref,reg);
  1703. end;
  1704. OP_MUL,OP_DIV,OP_IDIV:
  1705. { special stuff, needs separate handling inside code }
  1706. { generator }
  1707. internalerror(200109238);
  1708. else
  1709. begin
  1710. list.concat(taicpu.op_reg_ref(TOpCG2AsmOp[op],tcgsize2opsize[size],reg,tmpref));
  1711. end;
  1712. end;
  1713. end;
  1714. procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
  1715. var
  1716. opsize: topsize;
  1717. l : TAsmLabel;
  1718. begin
  1719. opsize:=tcgsize2opsize[size];
  1720. if not reverse then
  1721. list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,dst))
  1722. else
  1723. list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,dst));
  1724. current_asmdata.getjumplabel(l);
  1725. a_jmp_cond(list,OC_NE,l);
  1726. list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,dst));
  1727. a_label(list,l);
  1728. end;
  1729. {*************** compare instructructions ****************}
  1730. procedure tcgx86.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
  1731. l : tasmlabel);
  1732. {$ifdef x86_64}
  1733. var
  1734. tmpreg : tregister;
  1735. {$endif x86_64}
  1736. begin
  1737. {$ifdef x86_64}
  1738. { x86_64 only supports signed 32 bits constants directly }
  1739. if (size in [OS_S64,OS_64]) and
  1740. ((a<low(longint)) or (a>high(longint))) then
  1741. begin
  1742. tmpreg:=getintregister(list,size);
  1743. a_load_const_reg(list,size,a,tmpreg);
  1744. a_cmp_reg_reg_label(list,size,cmp_op,tmpreg,reg,l);
  1745. exit;
  1746. end;
  1747. {$endif x86_64}
  1748. if (a = 0) then
  1749. list.concat(taicpu.op_reg_reg(A_TEST,tcgsize2opsize[size],reg,reg))
  1750. else
  1751. list.concat(taicpu.op_const_reg(A_CMP,tcgsize2opsize[size],a,reg));
  1752. a_jmp_cond(list,cmp_op,l);
  1753. end;
  1754. procedure tcgx86.a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
  1755. l : tasmlabel);
  1756. var
  1757. {$ifdef x86_64}
  1758. tmpreg : tregister;
  1759. {$endif x86_64}
  1760. tmpref : treference;
  1761. begin
  1762. tmpref:=ref;
  1763. make_simple_ref(list,tmpref);
  1764. {$ifdef x86_64}
  1765. { x86_64 only supports signed 32 bits constants directly }
  1766. if (size in [OS_S64,OS_64]) and
  1767. ((a<low(longint)) or (a>high(longint))) then
  1768. begin
  1769. tmpreg:=getintregister(list,size);
  1770. a_load_const_reg(list,size,a,tmpreg);
  1771. a_cmp_reg_ref_label(list,size,cmp_op,tmpreg,tmpref,l);
  1772. exit;
  1773. end;
  1774. {$endif x86_64}
  1775. list.concat(taicpu.op_const_ref(A_CMP,TCgSize2OpSize[size],a,tmpref));
  1776. a_jmp_cond(list,cmp_op,l);
  1777. end;
  1778. procedure tcgx86.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;
  1779. reg1,reg2 : tregister;l : tasmlabel);
  1780. begin
  1781. check_register_size(size,reg1);
  1782. check_register_size(size,reg2);
  1783. list.concat(taicpu.op_reg_reg(A_CMP,TCgSize2OpSize[size],reg1,reg2));
  1784. a_jmp_cond(list,cmp_op,l);
  1785. end;
  1786. procedure tcgx86.a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister;l : tasmlabel);
  1787. var
  1788. tmpref : treference;
  1789. begin
  1790. tmpref:=ref;
  1791. make_simple_ref(list,tmpref);
  1792. check_register_size(size,reg);
  1793. list.concat(taicpu.op_ref_reg(A_CMP,TCgSize2OpSize[size],tmpref,reg));
  1794. a_jmp_cond(list,cmp_op,l);
  1795. end;
  1796. procedure tcgx86.a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister;const ref: treference; l : tasmlabel);
  1797. var
  1798. tmpref : treference;
  1799. begin
  1800. tmpref:=ref;
  1801. make_simple_ref(list,tmpref);
  1802. check_register_size(size,reg);
  1803. list.concat(taicpu.op_reg_ref(A_CMP,TCgSize2OpSize[size],reg,tmpref));
  1804. a_jmp_cond(list,cmp_op,l);
  1805. end;
  1806. procedure tcgx86.a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
  1807. var
  1808. ai : taicpu;
  1809. begin
  1810. if cond=OC_None then
  1811. ai := Taicpu.Op_sym(A_JMP,S_NO,l)
  1812. else
  1813. begin
  1814. ai:=Taicpu.Op_sym(A_Jcc,S_NO,l);
  1815. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1816. end;
  1817. ai.is_jmp:=true;
  1818. list.concat(ai);
  1819. end;
  1820. procedure tcgx86.a_jmp_flags(list : TAsmList;const f : TResFlags;l: tasmlabel);
  1821. var
  1822. ai : taicpu;
  1823. begin
  1824. ai := Taicpu.op_sym(A_Jcc,S_NO,l);
  1825. ai.SetCondition(flags_to_cond(f));
  1826. ai.is_jmp := true;
  1827. list.concat(ai);
  1828. end;
  1829. procedure tcgx86.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
  1830. var
  1831. ai : taicpu;
  1832. hreg : tregister;
  1833. begin
  1834. hreg:=makeregsize(list,reg,OS_8);
  1835. ai:=Taicpu.op_reg(A_SETcc,S_B,hreg);
  1836. ai.setcondition(flags_to_cond(f));
  1837. list.concat(ai);
  1838. if reg<>hreg then
  1839. a_load_reg_reg(list,OS_8,size,hreg,reg);
  1840. end;
  1841. procedure tcgx86.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
  1842. var
  1843. ai : taicpu;
  1844. tmpref : treference;
  1845. begin
  1846. tmpref:=ref;
  1847. make_simple_ref(list,tmpref);
  1848. if not(size in [OS_8,OS_S8]) then
  1849. a_load_const_ref(list,size,0,tmpref);
  1850. ai:=Taicpu.op_ref(A_SETcc,S_B,tmpref);
  1851. ai.setcondition(flags_to_cond(f));
  1852. list.concat(ai);
  1853. {$ifndef cpu64bitalu}
  1854. if size in [OS_S64,OS_64] then
  1855. begin
  1856. inc(tmpref.offset,4);
  1857. a_load_const_ref(list,OS_32,0,tmpref);
  1858. end;
  1859. {$endif cpu64bitalu}
  1860. end;
  1861. { ************* concatcopy ************ }
  1862. procedure Tcgx86.g_concatcopy(list:TAsmList;const source,dest:Treference;len:tcgint);
  1863. const
  1864. {$if defined(cpu64bitalu)}
  1865. REGCX=NR_RCX;
  1866. REGSI=NR_RSI;
  1867. REGDI=NR_RDI;
  1868. copy_len_sizes = [1, 2, 4, 8];
  1869. push_segment_size = S_L;
  1870. {$elseif defined(cpu32bitalu)}
  1871. REGCX=NR_ECX;
  1872. REGSI=NR_ESI;
  1873. REGDI=NR_EDI;
  1874. copy_len_sizes = [1, 2, 4];
  1875. push_segment_size = S_L;
  1876. {$elseif defined(cpu16bitalu)}
  1877. REGCX=NR_CX;
  1878. REGSI=NR_SI;
  1879. REGDI=NR_DI;
  1880. copy_len_sizes = [1, 2];
  1881. push_segment_size = S_W;
  1882. {$endif}
  1883. type copymode=(copy_move,copy_mmx,copy_string);
  1884. var srcref,dstref:Treference;
  1885. r,r0,r1,r2,r3:Tregister;
  1886. helpsize:tcgint;
  1887. copysize:byte;
  1888. cgsize:Tcgsize;
  1889. cm:copymode;
  1890. begin
  1891. cm:=copy_move;
  1892. helpsize:=3*sizeof(aword);
  1893. if cs_opt_size in current_settings.optimizerswitches then
  1894. helpsize:=2*sizeof(aword);
  1895. if (cs_mmx in current_settings.localswitches) and
  1896. not(pi_uses_fpu in current_procinfo.flags) and
  1897. ((len=8) or (len=16) or (len=24) or (len=32)) then
  1898. cm:=copy_mmx;
  1899. if (len>helpsize) then
  1900. cm:=copy_string;
  1901. if (cs_opt_size in current_settings.optimizerswitches) and
  1902. not((len<=16) and (cm=copy_mmx)) and
  1903. not(len in copy_len_sizes) then
  1904. cm:=copy_string;
  1905. if (source.segment<>NR_NO) or
  1906. (dest.segment<>NR_NO) then
  1907. cm:=copy_string;
  1908. case cm of
  1909. copy_move:
  1910. begin
  1911. dstref:=dest;
  1912. srcref:=source;
  1913. copysize:=sizeof(aint);
  1914. cgsize:=int_cgsize(copysize);
  1915. while len<>0 do
  1916. begin
  1917. if len<2 then
  1918. begin
  1919. copysize:=1;
  1920. cgsize:=OS_8;
  1921. end
  1922. else if len<4 then
  1923. begin
  1924. copysize:=2;
  1925. cgsize:=OS_16;
  1926. end
  1927. {$if defined(cpu32bitalu) or defined(cpu64bitalu)}
  1928. else if len<8 then
  1929. begin
  1930. copysize:=4;
  1931. cgsize:=OS_32;
  1932. end
  1933. {$endif cpu32bitalu or cpu64bitalu}
  1934. {$ifdef cpu64bitalu}
  1935. else if len<16 then
  1936. begin
  1937. copysize:=8;
  1938. cgsize:=OS_64;
  1939. end
  1940. {$endif}
  1941. ;
  1942. dec(len,copysize);
  1943. r:=getintregister(list,cgsize);
  1944. a_load_ref_reg(list,cgsize,cgsize,srcref,r);
  1945. a_load_reg_ref(list,cgsize,cgsize,r,dstref);
  1946. inc(srcref.offset,copysize);
  1947. inc(dstref.offset,copysize);
  1948. end;
  1949. end;
  1950. copy_mmx:
  1951. begin
  1952. dstref:=dest;
  1953. srcref:=source;
  1954. r0:=getmmxregister(list);
  1955. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
  1956. if len>=16 then
  1957. begin
  1958. inc(srcref.offset,8);
  1959. r1:=getmmxregister(list);
  1960. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
  1961. end;
  1962. if len>=24 then
  1963. begin
  1964. inc(srcref.offset,8);
  1965. r2:=getmmxregister(list);
  1966. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
  1967. end;
  1968. if len>=32 then
  1969. begin
  1970. inc(srcref.offset,8);
  1971. r3:=getmmxregister(list);
  1972. a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
  1973. end;
  1974. a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
  1975. if len>=16 then
  1976. begin
  1977. inc(dstref.offset,8);
  1978. a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
  1979. end;
  1980. if len>=24 then
  1981. begin
  1982. inc(dstref.offset,8);
  1983. a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
  1984. end;
  1985. if len>=32 then
  1986. begin
  1987. inc(dstref.offset,8);
  1988. a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
  1989. end;
  1990. end
  1991. else {copy_string, should be a good fallback in case of unhandled}
  1992. begin
  1993. getcpuregister(list,REGDI);
  1994. if (dest.segment=NR_NO) then
  1995. begin
  1996. a_loadaddr_ref_reg(list,dest,REGDI);
  1997. {$ifdef volatile_es}
  1998. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,NR_DS));
  1999. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  2000. {$endif volatile_es}
  2001. end
  2002. else
  2003. begin
  2004. dstref:=dest;
  2005. dstref.segment:=NR_NO;
  2006. a_loadaddr_ref_reg(list,dstref,REGDI);
  2007. {$ifndef volatile_es}
  2008. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,NR_ES));
  2009. {$endif not volatile_es}
  2010. list.concat(taicpu.op_reg(A_PUSH,push_segment_size,dest.segment));
  2011. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  2012. end;
  2013. getcpuregister(list,REGSI);
  2014. if (source.segment=NR_NO) then
  2015. a_loadaddr_ref_reg(list,source,REGSI)
  2016. else
  2017. begin
  2018. srcref:=source;
  2019. srcref.segment:=NR_NO;
  2020. a_loadaddr_ref_reg(list,srcref,REGSI);
  2021. list.concat(taicpu.op_reg(A_PUSH,S_L,NR_DS));
  2022. list.concat(taicpu.op_reg(A_PUSH,S_L,source.segment));
  2023. list.concat(taicpu.op_reg(A_POP,S_L,NR_DS));
  2024. end;
  2025. getcpuregister(list,REGCX);
  2026. {$if defined(i8086) or defined(i386)}
  2027. list.concat(Taicpu.op_none(A_CLD,S_NO));
  2028. {$endif i8086 or i386}
  2029. if (cs_opt_size in current_settings.optimizerswitches) and
  2030. (len>sizeof(aint)+(sizeof(aint) div 2)) then
  2031. begin
  2032. a_load_const_reg(list,OS_INT,len,REGCX);
  2033. list.concat(Taicpu.op_none(A_REP,S_NO));
  2034. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  2035. end
  2036. else
  2037. begin
  2038. helpsize:=len div sizeof(aint);
  2039. len:=len mod sizeof(aint);
  2040. if helpsize>1 then
  2041. begin
  2042. a_load_const_reg(list,OS_INT,helpsize,REGCX);
  2043. list.concat(Taicpu.op_none(A_REP,S_NO));
  2044. end;
  2045. if helpsize>0 then
  2046. begin
  2047. {$if defined(cpu64bitalu)}
  2048. list.concat(Taicpu.op_none(A_MOVSQ,S_NO))
  2049. {$elseif defined(cpu32bitalu)}
  2050. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  2051. {$elseif defined(cpu16bitalu)}
  2052. list.concat(Taicpu.op_none(A_MOVSW,S_NO));
  2053. {$endif}
  2054. end;
  2055. if len>=4 then
  2056. begin
  2057. dec(len,4);
  2058. list.concat(Taicpu.op_none(A_MOVSD,S_NO));
  2059. end;
  2060. if len>=2 then
  2061. begin
  2062. dec(len,2);
  2063. list.concat(Taicpu.op_none(A_MOVSW,S_NO));
  2064. end;
  2065. if len=1 then
  2066. list.concat(Taicpu.op_none(A_MOVSB,S_NO));
  2067. end;
  2068. ungetcpuregister(list,REGCX);
  2069. ungetcpuregister(list,REGSI);
  2070. ungetcpuregister(list,REGDI);
  2071. if (source.segment<>NR_NO) then
  2072. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_DS));
  2073. {$ifndef volatile_es}
  2074. if (dest.segment<>NR_NO) then
  2075. list.concat(taicpu.op_reg(A_POP,push_segment_size,NR_ES));
  2076. {$endif not volatile_es}
  2077. end;
  2078. end;
  2079. end;
  2080. {****************************************************************************
  2081. Entry/Exit Code Helpers
  2082. ****************************************************************************}
  2083. procedure tcgx86.g_profilecode(list : TAsmList);
  2084. var
  2085. pl : tasmlabel;
  2086. mcountprefix : String[4];
  2087. begin
  2088. case target_info.system of
  2089. {$ifndef NOTARGETWIN}
  2090. system_i386_win32,
  2091. {$endif}
  2092. system_i386_freebsd,
  2093. system_i386_netbsd,
  2094. // system_i386_openbsd,
  2095. system_i386_wdosx :
  2096. begin
  2097. Case target_info.system Of
  2098. system_i386_freebsd : mcountprefix:='.';
  2099. system_i386_netbsd : mcountprefix:='__';
  2100. // system_i386_openbsd : mcountprefix:='.';
  2101. else
  2102. mcountPrefix:='';
  2103. end;
  2104. current_asmdata.getaddrlabel(pl);
  2105. new_section(list,sec_data,lower(current_procinfo.procdef.mangledname),sizeof(pint));
  2106. list.concat(Tai_label.Create(pl));
  2107. list.concat(Tai_const.Create_32bit(0));
  2108. new_section(list,sec_code,lower(current_procinfo.procdef.mangledname),0);
  2109. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  2110. list.concat(Taicpu.Op_sym_ofs_reg(A_MOV,S_L,pl,0,NR_EDX));
  2111. a_call_name(list,target_info.Cprefix+mcountprefix+'mcount',false);
  2112. list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
  2113. end;
  2114. system_i386_linux:
  2115. a_call_name(list,target_info.Cprefix+'mcount',false);
  2116. system_i386_go32v2,system_i386_watcom:
  2117. begin
  2118. a_call_name(list,'MCOUNT',false);
  2119. end;
  2120. system_x86_64_linux,
  2121. system_x86_64_darwin:
  2122. begin
  2123. a_call_name(list,'mcount',false);
  2124. end;
  2125. end;
  2126. end;
  2127. procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
  2128. procedure decrease_sp(a : tcgint);
  2129. {$ifdef i8086}
  2130. begin
  2131. list.concat(Taicpu.Op_const_reg(A_SUB,S_W,a,NR_STACK_POINTER_REG));
  2132. end;
  2133. {$else i8086}
  2134. var
  2135. href : treference;
  2136. begin
  2137. reference_reset_base(href,NR_STACK_POINTER_REG,-a,0);
  2138. { normally, lea is a better choice than a sub to adjust the stack pointer }
  2139. list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
  2140. end;
  2141. {$endif i8086}
  2142. {$ifdef x86}
  2143. {$ifndef NOTARGETWIN}
  2144. var
  2145. href : treference;
  2146. i : integer;
  2147. again : tasmlabel;
  2148. {$endif NOTARGETWIN}
  2149. {$endif x86}
  2150. begin
  2151. if localsize>0 then
  2152. begin
  2153. {$ifdef i386}
  2154. {$ifndef NOTARGETWIN}
  2155. { windows guards only a few pages for stack growing,
  2156. so we have to access every page first }
  2157. if (target_info.system in [system_i386_win32,system_i386_wince]) and
  2158. (localsize>=winstackpagesize) then
  2159. begin
  2160. if localsize div winstackpagesize<=5 then
  2161. begin
  2162. decrease_sp(localsize-4);
  2163. for i:=1 to localsize div winstackpagesize do
  2164. begin
  2165. reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
  2166. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2167. end;
  2168. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  2169. end
  2170. else
  2171. begin
  2172. current_asmdata.getjumplabel(again);
  2173. getcpuregister(list,NR_EDI);
  2174. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
  2175. list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
  2176. a_label(list,again);
  2177. decrease_sp(winstackpagesize-4);
  2178. list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
  2179. if UseIncDec then
  2180. list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI))
  2181. else
  2182. list.concat(Taicpu.op_const_reg(A_SUB,S_L,1,NR_EDI));
  2183. a_jmp_cond(list,OC_NE,again);
  2184. decrease_sp(localsize mod winstackpagesize-4);
  2185. reference_reset_base(href,NR_ESP,localsize-4,4);
  2186. list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
  2187. ungetcpuregister(list,NR_EDI);
  2188. end
  2189. end
  2190. else
  2191. {$endif NOTARGETWIN}
  2192. {$endif i386}
  2193. {$ifdef x86_64}
  2194. {$ifndef NOTARGETWIN}
  2195. { windows guards only a few pages for stack growing,
  2196. so we have to access every page first }
  2197. if (target_info.system=system_x86_64_win64) and
  2198. (localsize>=winstackpagesize) then
  2199. begin
  2200. if localsize div winstackpagesize<=5 then
  2201. begin
  2202. decrease_sp(localsize);
  2203. for i:=1 to localsize div winstackpagesize do
  2204. begin
  2205. reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
  2206. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2207. end;
  2208. reference_reset_base(href,NR_RSP,0,4);
  2209. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2210. end
  2211. else
  2212. begin
  2213. current_asmdata.getjumplabel(again);
  2214. getcpuregister(list,NR_R10);
  2215. list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
  2216. a_label(list,again);
  2217. decrease_sp(winstackpagesize);
  2218. reference_reset_base(href,NR_RSP,0,4);
  2219. list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
  2220. if UseIncDec then
  2221. list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10))
  2222. else
  2223. list.concat(Taicpu.op_const_reg(A_SUB,S_Q,1,NR_R10));
  2224. a_jmp_cond(list,OC_NE,again);
  2225. decrease_sp(localsize mod winstackpagesize);
  2226. ungetcpuregister(list,NR_R10);
  2227. end
  2228. end
  2229. else
  2230. {$endif NOTARGETWIN}
  2231. {$endif x86_64}
  2232. decrease_sp(localsize);
  2233. end;
  2234. end;
  2235. procedure tcgx86.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
  2236. var
  2237. stackmisalignment: longint;
  2238. para: tparavarsym;
  2239. {$ifdef i8086}
  2240. dgroup: treference;
  2241. {$endif i8086}
  2242. begin
  2243. {$ifdef i8086}
  2244. { interrupt support for i8086 }
  2245. if po_interrupt in current_procinfo.procdef.procoptions then
  2246. begin
  2247. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_AX));
  2248. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_BX));
  2249. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_CX));
  2250. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DX));
  2251. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_SI));
  2252. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DI));
  2253. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DS));
  2254. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_ES));
  2255. reference_reset(dgroup,0);
  2256. dgroup.refaddr:=addr_dgroup;
  2257. list.concat(Taicpu.Op_ref_reg(A_MOV,S_W,dgroup,NR_AX));
  2258. list.concat(Taicpu.Op_reg_reg(A_MOV,S_W,NR_AX,NR_DS));
  2259. end;
  2260. {$endif i8086}
  2261. {$ifdef i386}
  2262. { interrupt support for i386 }
  2263. if (po_interrupt in current_procinfo.procdef.procoptions) and
  2264. { this messes up stack alignment }
  2265. not(target_info.system in [system_i386_darwin,system_i386_iphonesim,system_i386_android]) then
  2266. begin
  2267. { .... also the segment registers }
  2268. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_GS));
  2269. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_FS));
  2270. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_ES));
  2271. list.concat(Taicpu.Op_reg(A_PUSH,S_W,NR_DS));
  2272. { save the registers of an interrupt procedure }
  2273. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDI));
  2274. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ESI));
  2275. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EDX));
  2276. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_ECX));
  2277. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EBX));
  2278. list.concat(Taicpu.Op_reg(A_PUSH,S_L,NR_EAX));
  2279. end;
  2280. {$endif i386}
  2281. { save old framepointer }
  2282. if not nostackframe then
  2283. begin
  2284. { return address }
  2285. stackmisalignment := sizeof(pint);
  2286. list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
  2287. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  2288. CGmessage(cg_d_stackframe_omited)
  2289. else
  2290. begin
  2291. { push <frame_pointer> }
  2292. inc(stackmisalignment,sizeof(pint));
  2293. include(rg[R_INTREGISTER].preserved_by_proc,RS_FRAME_POINTER_REG);
  2294. list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
  2295. if (target_info.system=system_x86_64_win64) then
  2296. begin
  2297. list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
  2298. include(current_procinfo.flags,pi_has_unwind_info);
  2299. end;
  2300. { Return address and FP are both on stack }
  2301. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  2302. current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
  2303. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  2304. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
  2305. else
  2306. begin
  2307. { load framepointer from hidden $parentfp parameter }
  2308. para:=tparavarsym(current_procinfo.procdef.paras[0]);
  2309. if not (vo_is_parentfp in para.varoptions) then
  2310. InternalError(201201142);
  2311. if (para.paraloc[calleeside].location^.loc<>LOC_REGISTER) or
  2312. (para.paraloc[calleeside].location^.next<>nil) then
  2313. InternalError(201201143);
  2314. list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],
  2315. para.paraloc[calleeside].location^.register,NR_FRAME_POINTER_REG));
  2316. { Need only as much stack space as necessary to do the calls.
  2317. Exception filters don't have own local vars, and temps are 'mapped'
  2318. to the parent procedure.
  2319. maxpushedparasize is already aligned at least on x86_64. }
  2320. localsize:=current_procinfo.maxpushedparasize;
  2321. end;
  2322. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FRAME_POINTER_REG);
  2323. {
  2324. TODO: current framepointer handling is not compatible with Win64 at all:
  2325. Win64 expects FP to point to the top or into the middle of local area.
  2326. In FPC it points to the bottom, making it impossible to generate
  2327. UWOP_SET_FPREG unwind code if local area is > 240 bytes.
  2328. So for now pretend we never have a framepointer.
  2329. }
  2330. end;
  2331. { allocate stackframe space }
  2332. if (localsize<>0) or
  2333. ((target_info.stackalign>sizeof(pint)) and
  2334. (stackmisalignment <> 0) and
  2335. ((pi_do_call in current_procinfo.flags) or
  2336. (po_assembler in current_procinfo.procdef.procoptions))) then
  2337. begin
  2338. if target_info.stackalign>sizeof(pint) then
  2339. localsize := align(localsize+stackmisalignment,target_info.stackalign)-stackmisalignment;
  2340. cg.g_stackpointer_alloc(list,localsize);
  2341. if current_procinfo.framepointer=NR_STACK_POINTER_REG then
  2342. current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
  2343. current_procinfo.final_localsize:=localsize;
  2344. if (target_info.system=system_x86_64_win64) then
  2345. begin
  2346. if localsize<>0 then
  2347. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  2348. include(current_procinfo.flags,pi_has_unwind_info);
  2349. end;
  2350. end;
  2351. end;
  2352. end;
  2353. { produces if necessary overflowcode }
  2354. procedure tcgx86.g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);
  2355. var
  2356. hl : tasmlabel;
  2357. ai : taicpu;
  2358. cond : TAsmCond;
  2359. begin
  2360. if not(cs_check_overflow in current_settings.localswitches) then
  2361. exit;
  2362. current_asmdata.getjumplabel(hl);
  2363. if not ((def.typ=pointerdef) or
  2364. ((def.typ=orddef) and
  2365. (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
  2366. pasbool8,pasbool16,pasbool32,pasbool64]))) then
  2367. cond:=C_NO
  2368. else
  2369. cond:=C_NB;
  2370. ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl);
  2371. ai.SetCondition(cond);
  2372. ai.is_jmp:=true;
  2373. list.concat(ai);
  2374. a_call_name(list,'FPC_OVERFLOW',false);
  2375. a_label(list,hl);
  2376. end;
  2377. procedure tcgx86.g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string);
  2378. var
  2379. ref : treference;
  2380. sym : tasmsymbol;
  2381. begin
  2382. if (target_info.system = system_i386_darwin) then
  2383. begin
  2384. { a_jmp_name jumps to a stub which is always pic-safe on darwin }
  2385. inherited g_external_wrapper(list,procdef,externalname);
  2386. exit;
  2387. end;
  2388. sym:=current_asmdata.RefAsmSymbol(externalname);
  2389. reference_reset_symbol(ref,sym,0,sizeof(pint));
  2390. { create pic'ed? }
  2391. if (cs_create_pic in current_settings.moduleswitches) and
  2392. { darwin/x86_64's assembler doesn't want @PLT after call symbols }
  2393. not(target_info.system in [system_x86_64_darwin,system_i386_iphonesim]) then
  2394. ref.refaddr:=addr_pic
  2395. else
  2396. ref.refaddr:=addr_full;
  2397. list.concat(taicpu.op_ref(A_JMP,S_NO,ref));
  2398. end;
  2399. end.