cgcpu.pas 103 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for Xtensa
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu,
  27. cg64f32;
  28. type
  29. tcgcpu=class(tcg)
  30. private
  31. procedure fixref(list : TAsmList; var ref : treference);
  32. public
  33. procedure init_register_allocators;override;
  34. procedure done_register_allocators;override;
  35. { move instructions }
  36. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  37. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  38. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  39. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  40. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  41. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  42. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  43. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  44. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  45. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  46. procedure a_jmp_name(list: TAsmList; const s: string);override;
  47. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  48. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  49. { comparison operations }
  50. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  51. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  52. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  53. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  54. {$ifdef dummy}
  55. protected
  56. { changes register size without adding register allocation info }
  57. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  58. public
  59. { simplifies "ref" so it can be used with "op". If "ref" can be used
  60. with a different load/Store operation that has the same meaning as the
  61. original one, "op" will be replaced with the alternative }
  62. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  63. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  64. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  65. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  66. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  67. { General purpose instructions }
  68. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  69. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  70. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  71. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  72. { move instructions }
  73. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  74. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  75. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  76. { fpu move instructions (not used, all floating point is vector unit-based) }
  77. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  78. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  79. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  80. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  81. { comparison operations }
  82. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  83. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  84. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  85. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  86. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  87. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  88. procedure g_maybe_got_init(list: TAsmList); override;
  89. procedure g_restore_registers(list: TAsmList);override;
  90. procedure g_save_registers(list: TAsmList);override;
  91. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  92. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  93. procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
  94. procedure g_profilecode(list: TAsmList);override;
  95. private
  96. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  97. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  98. {$endif dummy}
  99. end;
  100. tcg64fxtensa = class(tcg64f32)
  101. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  102. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  103. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  104. //procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  105. //procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  106. //procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  107. //procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  108. //procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  109. //procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  110. end;
  111. procedure create_codegen;
  112. const
  113. TOpCG2AsmOp: array[topcg] of TAsmOp = (
  114. A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MULL,A_MULL,A_NEG,A_NONE,A_OR,A_SRA,A_SLL,A_SRL,A_SUB,A_XOR,A_NONE,A_NONE
  115. );
  116. {
  117. );TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  118. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  119. );
  120. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  121. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  122. );
  123. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  124. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  125. );
  126. }
  127. implementation
  128. uses
  129. globals,verbose,systems,cutils,
  130. paramgr,fmodule,
  131. symtable,symsym,
  132. tgobj,
  133. procinfo,cpupi;
  134. procedure tcgcpu.init_register_allocators;
  135. begin
  136. inherited init_register_allocators;
  137. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  138. [RS_A2,RS_A3,RS_A4,RS_A5,RS_A6,RS_A7,RS_A8,RS_A9,
  139. RS_A10,RS_A11,RS_A12,RS_A13,RS_A14,RS_A15],first_int_imreg,[]);
  140. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  141. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7,RS_F8,RS_F9,
  142. RS_F10,RS_F11,RS_F12,RS_F13,RS_F14,RS_F15],first_fpu_imreg,[]);
  143. end;
  144. procedure tcgcpu.done_register_allocators;
  145. begin
  146. rg[R_INTREGISTER].free;
  147. rg[R_FPUREGISTER].free;
  148. inherited done_register_allocators;
  149. end;
  150. procedure tcgcpu.a_load_reg_reg(list : TAsmList; fromsize,tosize : tcgsize;
  151. reg1,reg2 : tregister);
  152. var
  153. conv_done : Boolean;
  154. instr : taicpu;
  155. begin
  156. if (tcgsize2size[fromsize]>32) or (tcgsize2size[tosize]>32) or (fromsize=OS_NO) or (tosize=OS_NO) then
  157. internalerror(2020030710);
  158. conv_done:=false;
  159. if tosize<>fromsize then
  160. begin
  161. conv_done:=true;
  162. if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
  163. fromsize:=tosize;
  164. case fromsize of
  165. OS_8:
  166. list.concat(taicpu.op_reg_reg_const_const(A_EXTUI,reg2,reg1,0,8));
  167. OS_S8:
  168. begin
  169. list.concat(taicpu.op_reg_reg_const(A_SEXT,reg2,reg1,7));
  170. if tosize=OS_16 then
  171. list.concat(taicpu.op_reg_reg_const_const(A_EXTUI,reg2,reg2,0,16));
  172. end;
  173. OS_16:
  174. list.concat(taicpu.op_reg_reg_const_const(A_EXTUI,reg2,reg1,0,16));
  175. OS_S16:
  176. list.concat(taicpu.op_reg_reg_const(A_SEXT,reg2,reg1,15));
  177. else
  178. conv_done:=false;
  179. end;
  180. end;
  181. if not conv_done and (reg1<>reg2) then
  182. begin
  183. { same size, only a register mov required }
  184. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  185. list.Concat(instr);
  186. { Notify the register allocator that we have written a move instruction so
  187. it can try to eliminate it. }
  188. add_move_instruction(instr);
  189. end;
  190. end;
  191. procedure tcgcpu.a_load_reg_ref(list : TAsmList; fromsize,tosize : tcgsize;
  192. reg : tregister; const ref : TReference);
  193. begin
  194. list.Concat(taicpu.op_none(A_NOP));
  195. end;
  196. procedure tcgcpu.a_load_ref_reg(list : TAsmList; fromsize,tosize : tcgsize;
  197. const ref : TReference; reg : tregister);
  198. begin
  199. list.Concat(taicpu.op_none(A_NOP));
  200. end;
  201. procedure tcgcpu.a_load_const_reg(list : TAsmList; size : tcgsize;
  202. a : tcgint; reg : tregister);
  203. var
  204. hr : treference;
  205. l : TAsmLabel;
  206. begin
  207. if (a>=-2048) and (a<=2047) then
  208. list.Concat(taicpu.op_reg_const(A_MOVI,reg,a))
  209. else
  210. begin
  211. reference_reset(hr,4,[]);
  212. current_asmdata.getjumplabel(l);
  213. cg.a_label(current_procinfo.aktlocaldata,l);
  214. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
  215. hr.symbol:=l;
  216. list.concat(taicpu.op_reg_ref(A_L32R,reg,hr));
  217. end;
  218. end;
  219. procedure tcgcpu.fixref(list : TAsmList;var ref : treference);
  220. var
  221. tmpreg, tmpreg2 : tregister;
  222. tmpref : treference;
  223. l : tasmlabel;
  224. begin
  225. { absolute symbols can't be handled directly, we've to store the symbol reference
  226. in the text segment and access it pc relative
  227. For now, we assume that references where base or index equals to PC are already
  228. relative, all other references are assumed to be absolute and thus they need
  229. to be handled extra.
  230. A proper solution would be to change refoptions to a set and store the information
  231. if the symbol is absolute or relative there.
  232. }
  233. { create consts entry }
  234. reference_reset(tmpref,4,[]);
  235. current_asmdata.getjumplabel(l);
  236. cg.a_label(current_procinfo.aktlocaldata,l);
  237. tmpreg:=NR_NO;
  238. if assigned(ref.symbol) then
  239. current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
  240. else
  241. current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
  242. { load consts entry }
  243. tmpreg:=getintregister(list,OS_INT);
  244. tmpref.symbol:=l;
  245. list.concat(taicpu.op_reg_ref(A_L32R,tmpreg,tmpref));
  246. if ref.base<>NR_NO then
  247. begin
  248. if ref.index<>NR_NO then
  249. begin
  250. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
  251. ref.base:=tmpreg;
  252. end
  253. else
  254. ref.base:=tmpreg;
  255. end
  256. else
  257. ref.base:=tmpreg;
  258. if ref.index<>NR_NO then
  259. begin
  260. list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.index,tmpreg));
  261. ref.index:=NR_NO;
  262. end;
  263. ref.offset:=0;
  264. ref.symbol:=nil;
  265. end;
  266. procedure tcgcpu.a_loadaddr_ref_reg(list : TAsmList;
  267. const ref : TReference; r : tregister);
  268. var
  269. b : byte;
  270. tmpref : treference;
  271. instr : taicpu;
  272. begin
  273. tmpref:=ref;
  274. { Be sure to have a base register }
  275. if tmpref.base=NR_NO then
  276. begin
  277. tmpref.base:=tmpref.index;
  278. tmpref.index:=NR_NO;
  279. end;
  280. if assigned(tmpref.symbol) then
  281. fixref(list,tmpref);
  282. { expect a base here if there is an index }
  283. if (tmpref.base=NR_NO) and (tmpref.index<>NR_NO) then
  284. internalerror(200312022);
  285. if tmpref.index<>NR_NO then
  286. begin
  287. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,tmpref.base,tmpref.index,r);
  288. if tmpref.offset<>0 then
  289. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,r,r);
  290. end
  291. else
  292. begin
  293. if tmpref.base=NR_NO then
  294. a_load_const_reg(list,OS_ADDR,tmpref.offset,r)
  295. else
  296. if tmpref.offset<>0 then
  297. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tmpref.offset,tmpref.base,r)
  298. else
  299. begin
  300. instr:=taicpu.op_reg_reg(A_MOV,r,tmpref.base);
  301. list.concat(instr);
  302. add_move_instruction(instr);
  303. end;
  304. end;
  305. end;
  306. procedure tcgcpu.a_op_reg_reg(list : TAsmList; op : topcg; size : tcgsize;
  307. src,dst : tregister);
  308. begin
  309. list.Concat(taicpu.op_none(A_NOP));
  310. end;
  311. procedure tcgcpu.a_op_const_reg(list : TAsmList; op : topcg;
  312. size : tcgsize; a : tcgint; reg : tregister);
  313. begin
  314. list.Concat(taicpu.op_none(A_NOP));
  315. end;
  316. procedure tcgcpu.a_op_reg_reg_reg(list : TAsmList; op : topcg;
  317. size : tcgsize; src1,src2,dst : tregister);
  318. var
  319. tmpreg : TRegister;
  320. begin
  321. if op=OP_NOT then
  322. begin
  323. tmpreg:=getintregister(list,size);
  324. list.concat(taicpu.op_reg_const(A_MOVI,tmpreg,-1));
  325. maybeadjustresult(list,op,size,dst);
  326. end
  327. else if op=OP_NEG then
  328. begin
  329. list.concat(taicpu.op_reg_reg(A_NEG,dst,src1));
  330. maybeadjustresult(list,op,size,dst);
  331. end
  332. else if op in [OP_SAR,OP_SHL,OP_SHR] then
  333. begin
  334. if op=OP_SHL then
  335. list.concat(taicpu.op_reg(A_SSL,src1))
  336. else
  337. list.concat(taicpu.op_reg(A_SSR,src1));
  338. list.concat(taicpu.op_reg_reg(TOpCG2AsmOp[op],dst,src2));
  339. maybeadjustresult(list,op,size,dst);
  340. end
  341. else
  342. case op of
  343. OP_MOVE:
  344. a_load_reg_reg(list,size,size,src1,dst);
  345. else
  346. begin
  347. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOp[op],dst,src2,src1));
  348. maybeadjustresult(list,op,size,dst);
  349. end;
  350. end;
  351. end;
  352. procedure tcgcpu.a_call_name(list : TAsmList; const s : string;
  353. weak : boolean);
  354. begin
  355. if not weak then
  356. list.concat(taicpu.op_sym(txtensaprocinfo(current_procinfo).callins,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
  357. else
  358. list.concat(taicpu.op_sym(txtensaprocinfo(current_procinfo).callins,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
  359. end;
  360. procedure tcgcpu.a_call_reg(list : TAsmList; Reg : tregister);
  361. begin
  362. list.concat(taicpu.op_reg(txtensaprocinfo(current_procinfo).callxins,reg));
  363. end;
  364. procedure tcgcpu.a_jmp_name(list : TAsmList; const s : string);
  365. var
  366. ai : taicpu;
  367. begin
  368. ai:=TAiCpu.op_sym(A_J,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  369. ai.is_jmp:=true;
  370. list.Concat(ai);
  371. end;
  372. procedure tcgcpu.g_proc_entry(list : TAsmList; localsize : longint;
  373. nostackframe : boolean);
  374. begin
  375. if target_info.abi=abi_xtensa_windowed then
  376. list.Concat(taicpu.op_reg_const(A_ENTRY,NR_STACK_POINTER_REG,32));
  377. end;
  378. procedure tcgcpu.g_proc_exit(list : TAsmList; parasize : longint;
  379. nostackframe : boolean);
  380. begin
  381. if target_info.abi=abi_xtensa_windowed then
  382. list.Concat(taicpu.op_none(A_RETW))
  383. else
  384. list.Concat(taicpu.op_none(A_RET));
  385. end;
  386. procedure tcgcpu.a_cmp_reg_reg_label(list : TAsmList; size : tcgsize;
  387. cmp_op : topcmp; reg1,reg2 : tregister; l : tasmlabel);
  388. begin
  389. list.Concat(taicpu.op_none(A_NOP));
  390. end;
  391. procedure tcgcpu.a_jmp_always(list : TAsmList; l : TAsmLabel);
  392. var
  393. ai : taicpu;
  394. begin
  395. ai:=taicpu.op_sym(A_J,l);
  396. ai.is_jmp:=true;
  397. list.concat(ai);
  398. end;
  399. procedure tcgcpu.g_concatcopy(list : TAsmList; const source,
  400. dest : treference; len : tcgint);
  401. begin
  402. list.Concat(taicpu.op_none(A_NOP));
  403. end;
  404. procedure tcgcpu.maybeadjustresult(list : TAsmList; op : TOpCg;
  405. size : tcgsize; dst : tregister);
  406. begin
  407. end;
  408. procedure tcg64fxtensa.a_op64_reg_reg(list : TAsmList; op : TOpCG;
  409. size : tcgsize; regsrc,regdst : tregister64);
  410. begin
  411. list.Concat(taicpu.op_none(A_NOP));
  412. end;
  413. procedure tcg64fxtensa.a_op64_const_reg_reg(list : TAsmList; op : TOpCG;
  414. size : tcgsize; value : int64; regsrc,regdst : tregister64);
  415. begin
  416. list.Concat(taicpu.op_none(A_NOP));
  417. end;
  418. procedure tcg64fxtensa.a_op64_const_reg(list : TAsmList; op : TOpCG;
  419. size : tcgsize; value : int64; reg : tregister64);
  420. begin
  421. list.Concat(taicpu.op_none(A_NOP));
  422. end;
  423. {$ifdef dummy}
  424. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  425. var
  426. href: treference;
  427. so: tshifterop;
  428. accesssize: longint;
  429. begin
  430. if (ref.base=NR_NO) then
  431. begin
  432. if ref.shiftmode<>SM_None then
  433. internalerror(2014110701);
  434. ref.base:=ref.index;
  435. ref.index:=NR_NO;
  436. end;
  437. { no abitrary scale factor support (the generic code doesn't set it,
  438. AArch-specific code shouldn't either) }
  439. if not(ref.scalefactor in [0,1]) then
  440. internalerror(2014111002);
  441. case simple_ref_type(op,size,oppostfix,ref) of
  442. sr_simple:
  443. exit;
  444. sr_internal_illegal:
  445. internalerror(2014121702);
  446. sr_complex:
  447. { continue } ;
  448. end;
  449. if assigned(ref.symbol) then
  450. begin
  451. { internal "load symbol" instructions should already be valid }
  452. if assigned(ref.symboldata) or
  453. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  454. internalerror(2014110802);
  455. { no relative symbol support (needed) yet }
  456. if assigned(ref.relsymbol) then
  457. internalerror(2014111001);
  458. { loading a symbol address (whether it's in the GOT or not) consists
  459. of two parts: first load the page on which it is located, then
  460. either the offset in the page or load the value at that offset in
  461. the page. This final GOT-load can be relaxed by the linker in case
  462. the variable itself can be stored directly in the GOT }
  463. if (preferred_newbasereg=NR_NO) or
  464. (ref.base=preferred_newbasereg) or
  465. (ref.index=preferred_newbasereg) then
  466. preferred_newbasereg:=getaddressregister(list);
  467. { load the (GOT) page }
  468. reference_reset_symbol(href,ref.symbol,0,8,[]);
  469. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  470. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  471. ((ref.symbol.typ=AT_DATA) and
  472. (ref.symbol.bind=AB_LOCAL)) then
  473. href.refaddr:=addr_page
  474. else
  475. href.refaddr:=addr_gotpage;
  476. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  477. { load the GOT entry (= address of the variable) }
  478. reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
  479. href.symbol:=ref.symbol;
  480. { code symbols defined in the current compilation unit do not
  481. have to be accessed via the GOT }
  482. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  483. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  484. ((ref.symbol.typ=AT_DATA) and
  485. (ref.symbol.bind=AB_LOCAL)) then
  486. begin
  487. href.base:=NR_NO;
  488. href.refaddr:=addr_pageoffset;
  489. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  490. end
  491. else
  492. begin
  493. href.refaddr:=addr_gotpageoffset;
  494. { use a_load_ref_reg() rather than directly encoding the LDR,
  495. so that we'll check the validity of the reference }
  496. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  497. end;
  498. { set as new base register }
  499. if ref.base=NR_NO then
  500. ref.base:=preferred_newbasereg
  501. else if ref.index=NR_NO then
  502. ref.index:=preferred_newbasereg
  503. else
  504. begin
  505. { make sure it's valid in case ref.base is SP -> make it
  506. the second operand}
  507. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  508. ref.base:=preferred_newbasereg
  509. end;
  510. ref.symbol:=nil;
  511. end;
  512. { base & index }
  513. if (ref.base<>NR_NO) and
  514. (ref.index<>NR_NO) then
  515. begin
  516. case op of
  517. A_LDR, A_STR:
  518. begin
  519. if (ref.shiftmode=SM_None) and
  520. (ref.shiftimm<>0) then
  521. internalerror(2014110805);
  522. { wrong shift? (possible in case of something like
  523. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  524. the final load is a 1 byte -> can't use shift after all }
  525. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  526. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  527. (ref.offset<>0)) then
  528. begin
  529. if preferred_newbasereg=NR_NO then
  530. preferred_newbasereg:=getaddressregister(list);
  531. { "add" supports a superset of the shift modes supported by
  532. load/store instructions }
  533. shifterop_reset(so);
  534. so.shiftmode:=ref.shiftmode;
  535. so.shiftimm:=ref.shiftimm;
  536. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  537. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  538. { possibly still an invalid offset -> fall through }
  539. end
  540. else if ref.offset<>0 then
  541. begin
  542. if (preferred_newbasereg=NR_NO) or
  543. { we keep ref.index, so it must not be overwritten }
  544. (ref.index=preferred_newbasereg) then
  545. preferred_newbasereg:=getaddressregister(list);
  546. { add to the base and not to the index, because the index
  547. may be scaled; this works even if the base is SP }
  548. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  549. ref.offset:=0;
  550. ref.base:=preferred_newbasereg;
  551. { finished }
  552. exit;
  553. end
  554. else
  555. { valid -> exit }
  556. exit;
  557. end;
  558. { todo }
  559. A_LD1,A_LD2,A_LD3,A_LD4,
  560. A_ST1,A_ST2,A_ST3,A_ST4:
  561. internalerror(2014110704);
  562. { these don't support base+index }
  563. A_LDUR,A_STUR,
  564. A_LDP,A_STP:
  565. begin
  566. { these either don't support pre-/post-indexing, or don't
  567. support it with base+index }
  568. if ref.addressmode<>AM_OFFSET then
  569. internalerror(2014110911);
  570. if preferred_newbasereg=NR_NO then
  571. preferred_newbasereg:=getaddressregister(list);
  572. if ref.shiftmode<>SM_None then
  573. begin
  574. { "add" supports a superset of the shift modes supported by
  575. load/store instructions }
  576. shifterop_reset(so);
  577. so.shiftmode:=ref.shiftmode;
  578. so.shiftimm:=ref.shiftimm;
  579. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  580. end
  581. else
  582. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  583. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  584. { fall through to the handling of base + offset, since the
  585. offset may still be too big }
  586. end;
  587. else
  588. internalerror(2014110901);
  589. end;
  590. end;
  591. { base + offset }
  592. if ref.base<>NR_NO then
  593. begin
  594. { valid offset for LDUR/STUR -> use that }
  595. if (ref.addressmode=AM_OFFSET) and
  596. (op in [A_LDR,A_STR]) and
  597. (ref.offset>=-256) and
  598. (ref.offset<=255) then
  599. begin
  600. if op=A_LDR then
  601. op:=A_LDUR
  602. else
  603. op:=A_STUR
  604. end
  605. { if it's not a valid LDUR/STUR, use LDR/STR }
  606. else if (op in [A_LDUR,A_STUR]) and
  607. ((ref.offset<-256) or
  608. (ref.offset>255) or
  609. (ref.addressmode<>AM_OFFSET)) then
  610. begin
  611. if op=A_LDUR then
  612. op:=A_LDR
  613. else
  614. op:=A_STR
  615. end;
  616. case op of
  617. A_LDR,A_STR:
  618. begin
  619. case ref.addressmode of
  620. AM_PREINDEXED:
  621. begin
  622. { since the loaded/stored register cannot be the same
  623. as the base register, we can safely add the
  624. offset to the base if it doesn't fit}
  625. if (ref.offset<-256) or
  626. (ref.offset>255) then
  627. begin
  628. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  629. ref.offset:=0;
  630. end;
  631. end;
  632. AM_POSTINDEXED:
  633. begin
  634. { cannot emulate post-indexing if we have to fold the
  635. offset into the base register }
  636. if (ref.offset<-256) or
  637. (ref.offset>255) then
  638. internalerror(2014110909);
  639. { ok }
  640. end;
  641. AM_OFFSET:
  642. begin
  643. { unsupported offset -> fold into base register }
  644. accesssize:=1 shl tcgsizep2size[size];
  645. if (ref.offset<0) or
  646. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  647. ((ref.offset mod accesssize)<>0) then
  648. begin
  649. if preferred_newbasereg=NR_NO then
  650. preferred_newbasereg:=getaddressregister(list);
  651. { can we split the offset beween an
  652. "add/sub (imm12 shl 12)" and the load (also an
  653. imm12)?
  654. -- the offset from the load will always be added,
  655. that's why the lower bound has a smaller range
  656. than the upper bound; it must also be a multiple
  657. of the access size }
  658. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  659. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  660. ((ref.offset mod accesssize)=0) then
  661. begin
  662. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  663. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  664. end
  665. else
  666. begin
  667. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  668. ref.offset:=0;
  669. end;
  670. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  671. end;
  672. end
  673. end;
  674. end;
  675. A_LDP,A_STP:
  676. begin
  677. { unsupported offset -> fold into base register (these
  678. instructions support all addressmodes) }
  679. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  680. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  681. begin
  682. case ref.addressmode of
  683. AM_POSTINDEXED:
  684. { don't emulate post-indexing if we have to fold the
  685. offset into the base register }
  686. internalerror(2014110910);
  687. AM_PREINDEXED:
  688. { this means the offset must be added to the current
  689. base register }
  690. preferred_newbasereg:=ref.base;
  691. AM_OFFSET:
  692. if preferred_newbasereg=NR_NO then
  693. preferred_newbasereg:=getaddressregister(list);
  694. end;
  695. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  696. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
  697. end
  698. end;
  699. A_LDUR,A_STUR:
  700. begin
  701. { valid, checked above }
  702. end;
  703. { todo }
  704. A_LD1,A_LD2,A_LD3,A_LD4,
  705. A_ST1,A_ST2,A_ST3,A_ST4:
  706. internalerror(2014110908);
  707. else
  708. internalerror(2014110708);
  709. end;
  710. { done }
  711. exit;
  712. end;
  713. { only an offset -> change to base (+ offset 0) }
  714. if preferred_newbasereg=NR_NO then
  715. preferred_newbasereg:=getaddressregister(list);
  716. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  717. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
  718. end;
  719. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  720. var
  721. subreg:Tsubregister;
  722. begin
  723. subreg:=cgsize2subreg(getregtype(reg),size);
  724. result:=reg;
  725. setsubreg(result,subreg);
  726. end;
  727. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  728. begin
  729. internalerror(2014122110);
  730. { squash warning }
  731. result:=NR_NO;
  732. end;
  733. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  734. begin
  735. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  736. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  737. result:=ref;
  738. end;
  739. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  740. var
  741. instr: taicpu;
  742. so: tshifterop;
  743. hadtmpreg: boolean;
  744. begin
  745. { imm12 }
  746. if (a>=0) and
  747. (a<=((1 shl 12)-1)) then
  748. if usedest then
  749. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  750. else
  751. instr:=taicpu.op_reg_const(op,src,a)
  752. { imm12 lsl 12 }
  753. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  754. begin
  755. so.shiftmode:=SM_LSL;
  756. so.shiftimm:=12;
  757. if usedest then
  758. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  759. else
  760. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  761. end
  762. else
  763. begin
  764. { todo: other possible optimizations (e.g. load 16 bit constant in
  765. register and then add/sub/cmp/cmn shifted the rest) }
  766. if tmpreg=NR_NO then
  767. begin
  768. hadtmpreg:=false;
  769. tmpreg:=getintregister(list,size);
  770. end
  771. else
  772. begin
  773. hadtmpreg:=true;
  774. getcpuregister(list,tmpreg);
  775. end;
  776. a_load_const_reg(list,size,a,tmpreg);
  777. if usedest then
  778. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  779. else
  780. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  781. if hadtmpreg then
  782. ungetcpuregister(list,tmpreg);
  783. end;
  784. if setflags then
  785. setoppostfix(instr,PF_S);
  786. list.concat(instr);
  787. end;
  788. {****************************************************************************
  789. Assembler code
  790. ****************************************************************************}
  791. procedure tcgaarch64.init_register_allocators;
  792. begin
  793. inherited init_register_allocators;
  794. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  795. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  796. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  797. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  798. { maybe we can enable this in the future for leaf functions (it's
  799. the frame pointer)
  800. ,RS_X29 }],
  801. first_int_imreg,[]);
  802. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  803. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  804. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  805. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  806. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  807. first_mm_imreg,[]);
  808. end;
  809. procedure tcgaarch64.done_register_allocators;
  810. begin
  811. rg[R_INTREGISTER].free;
  812. rg[R_FPUREGISTER].free;
  813. rg[R_MMREGISTER].free;
  814. inherited done_register_allocators;
  815. end;
  816. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  817. begin
  818. case size of
  819. OS_F32:
  820. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  821. OS_F64:
  822. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  823. else
  824. internalerror(2014102701);
  825. end;
  826. end;
  827. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  828. begin
  829. if not weak then
  830. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
  831. else
  832. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
  833. end;
  834. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  835. begin
  836. list.concat(taicpu.op_reg(A_BLR,reg));
  837. end;
  838. {********************** load instructions ********************}
  839. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  840. var
  841. preva: tcgint;
  842. opc: tasmop;
  843. shift,maxshift: byte;
  844. so: tshifterop;
  845. reginited: boolean;
  846. mask: tcgint;
  847. begin
  848. { if we load a value into a 32 bit register, it is automatically
  849. zero-extended to 64 bit }
  850. if (hi(a)=0) and
  851. (size in [OS_64,OS_S64]) then
  852. begin
  853. size:=OS_32;
  854. reg:=makeregsize(reg,size);
  855. end;
  856. { values <= 32 bit are stored in a 32 bit register }
  857. if not(size in [OS_64,OS_S64]) then
  858. a:=cardinal(a);
  859. if size in [OS_64,OS_S64] then
  860. begin
  861. mask:=-1;
  862. maxshift:=64;
  863. end
  864. else
  865. begin
  866. mask:=$ffffffff;
  867. maxshift:=32;
  868. end;
  869. { single movn enough? (to be extended) }
  870. shift:=16;
  871. preva:=a;
  872. repeat
  873. if (a shr shift)=(mask shr shift) then
  874. begin
  875. if shift=16 then
  876. list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
  877. else
  878. begin
  879. shifterop_reset(so);
  880. so.shiftmode:=SM_LSL;
  881. so.shiftimm:=shift-16;
  882. list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
  883. end;
  884. exit;
  885. end;
  886. { only try the next 16 bits if the current one is all 1 bits, since
  887. the movn will set all lower bits to 1 }
  888. if word(a shr (shift-16))<>$ffff then
  889. break;
  890. inc(shift,16);
  891. until shift=maxshift;
  892. reginited:=false;
  893. shift:=0;
  894. { can be optimized later to use more movn }
  895. repeat
  896. { leftover is shifterconst? (don't check if we can represent it just
  897. as effectively with movz/movk, as this check is expensive) }
  898. if ((shift<tcgsize2size[size]*(8 div 2)) and
  899. (word(a)<>0) and
  900. ((a shr 16)<>0)) and
  901. is_shifter_const(a shl shift,size) then
  902. begin
  903. if reginited then
  904. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  905. else
  906. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  907. exit;
  908. end;
  909. { set all 16 bit parts <> 0 }
  910. if (word(a)<>0) or
  911. ((shift=0) and
  912. (a=0)) then
  913. if shift=0 then
  914. begin
  915. list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
  916. reginited:=true;
  917. end
  918. else
  919. begin
  920. shifterop_reset(so);
  921. so.shiftmode:=SM_LSL;
  922. so.shiftimm:=shift;
  923. if not reginited then
  924. begin
  925. opc:=A_MOVZ;
  926. reginited:=true;
  927. end
  928. else
  929. opc:=A_MOVK;
  930. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  931. end;
  932. preva:=a;
  933. a:=a shr 16;
  934. inc(shift,16);
  935. until word(preva)=preva;
  936. if not reginited then
  937. internalerror(2014102702);
  938. end;
  939. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  940. var
  941. reg: tregister;
  942. begin
  943. { use the zero register if possible }
  944. if a=0 then
  945. begin
  946. if size in [OS_64,OS_S64] then
  947. reg:=NR_XZR
  948. else
  949. reg:=NR_WZR;
  950. a_load_reg_ref(list,size,size,reg,ref);
  951. end
  952. else
  953. inherited;
  954. end;
  955. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  956. var
  957. oppostfix:toppostfix;
  958. hreg: tregister;
  959. begin
  960. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  961. begin
  962. fromsize:=tosize;
  963. reg:=makeregsize(list,reg,fromsize);
  964. end
  965. { have a 32 bit register but need a 64 bit one? }
  966. else if tosize in [OS_64,OS_S64] then
  967. begin
  968. { sign extend if necessary }
  969. if fromsize in [OS_S8,OS_S16,OS_S32] then
  970. begin
  971. { can't overwrite reg, may be a constant reg }
  972. hreg:=getintregister(list,tosize);
  973. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  974. reg:=hreg;
  975. end
  976. else
  977. { top 32 bit are zero by default }
  978. reg:=makeregsize(reg,OS_64);
  979. fromsize:=tosize;
  980. end;
  981. if (ref.alignment<>0) and
  982. (ref.alignment<tcgsize2size[tosize]) then
  983. begin
  984. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  985. end
  986. else
  987. begin
  988. case tosize of
  989. { signed integer registers }
  990. OS_8,
  991. OS_S8:
  992. oppostfix:=PF_B;
  993. OS_16,
  994. OS_S16:
  995. oppostfix:=PF_H;
  996. OS_32,
  997. OS_S32,
  998. OS_64,
  999. OS_S64:
  1000. oppostfix:=PF_None;
  1001. else
  1002. InternalError(200308299);
  1003. end;
  1004. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  1005. end;
  1006. end;
  1007. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  1008. var
  1009. oppostfix:toppostfix;
  1010. begin
  1011. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  1012. fromsize:=tosize;
  1013. { ensure that all bits of the 32/64 register are always correctly set:
  1014. * default behaviour is always to zero-extend to the entire (64 bit)
  1015. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  1016. target register, as the upper 32 bit will be zeroed implicitly
  1017. -> always make target register 32 bit
  1018. * signed loads exist both with 32 and 64 bit target registers,
  1019. depending on whether the value should be sign extended to 32 or
  1020. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  1021. corresponding 64 bit register are again zeroed) -> no need to
  1022. change anything (we only have 32 and 64 bit registers), except that
  1023. when loading an OS_S32 to a 32 bit register, we don't need/can't
  1024. use sign extension
  1025. }
  1026. if fromsize in [OS_8,OS_16,OS_32] then
  1027. reg:=makeregsize(reg,OS_32);
  1028. if (ref.alignment<>0) and
  1029. (ref.alignment<tcgsize2size[fromsize]) then
  1030. begin
  1031. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  1032. exit;
  1033. end;
  1034. case fromsize of
  1035. { signed integer registers }
  1036. OS_8:
  1037. oppostfix:=PF_B;
  1038. OS_S8:
  1039. oppostfix:=PF_SB;
  1040. OS_16:
  1041. oppostfix:=PF_H;
  1042. OS_S16:
  1043. oppostfix:=PF_SH;
  1044. OS_S32:
  1045. if getsubreg(reg)=R_SUBD then
  1046. oppostfix:=PF_NONE
  1047. else
  1048. oppostfix:=PF_SW;
  1049. OS_32,
  1050. OS_64,
  1051. OS_S64:
  1052. oppostfix:=PF_None;
  1053. else
  1054. InternalError(200308297);
  1055. end;
  1056. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  1057. { clear upper 16 bits if the value was negative }
  1058. if (fromsize=OS_S8) and (tosize=OS_16) then
  1059. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  1060. end;
  1061. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  1062. var
  1063. href: treference;
  1064. hreg1, hreg2, tmpreg,tmpreg2: tregister;
  1065. i : Integer;
  1066. begin
  1067. case fromsize of
  1068. OS_64,OS_S64:
  1069. begin
  1070. { split into two 32 bit loads }
  1071. hreg1:=getintregister(list,OS_32);
  1072. hreg2:=getintregister(list,OS_32);
  1073. if target_info.endian=endian_big then
  1074. begin
  1075. tmpreg:=hreg1;
  1076. hreg1:=hreg2;
  1077. hreg2:=tmpreg;
  1078. end;
  1079. { can we use LDP? }
  1080. if (ref.alignment=4) and
  1081. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  1082. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  1083. else
  1084. begin
  1085. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  1086. href:=ref;
  1087. inc(href.offset,4);
  1088. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  1089. end;
  1090. a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
  1091. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  1092. end;
  1093. OS_16,OS_S16,
  1094. OS_32,OS_S32:
  1095. begin
  1096. if ref.alignment=2 then
  1097. begin
  1098. href:=ref;
  1099. if target_info.endian=endian_big then
  1100. inc(href.offset,tcgsize2size[fromsize]-2);
  1101. tmpreg:=getintregister(list,OS_32);
  1102. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
  1103. tmpreg2:=getintregister(list,OS_32);
  1104. for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
  1105. begin
  1106. if target_info.endian=endian_big then
  1107. dec(href.offset,2)
  1108. else
  1109. inc(href.offset,2);
  1110. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
  1111. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
  1112. end;
  1113. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  1114. end
  1115. else
  1116. begin
  1117. href:=ref;
  1118. if target_info.endian=endian_big then
  1119. inc(href.offset,tcgsize2size[fromsize]-1);
  1120. tmpreg:=getintregister(list,OS_32);
  1121. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
  1122. tmpreg2:=getintregister(list,OS_32);
  1123. for i:=1 to tcgsize2size[fromsize]-1 do
  1124. begin
  1125. if target_info.endian=endian_big then
  1126. dec(href.offset)
  1127. else
  1128. inc(href.offset);
  1129. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
  1130. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
  1131. end;
  1132. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  1133. end;
  1134. end;
  1135. else
  1136. inherited;
  1137. end;
  1138. end;
  1139. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  1140. var
  1141. instr: taicpu;
  1142. begin
  1143. { we use both 32 and 64 bit registers -> insert conversion when when
  1144. we have to truncate/sign extend inside the (32 or 64 bit) register
  1145. holding the value, and when we sign extend from a 32 to a 64 bit
  1146. register }
  1147. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  1148. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  1149. (fromsize<>tosize) and
  1150. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  1151. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  1152. (tosize in [OS_64,OS_S64])) or
  1153. { needs to mask out the sign in the top 16 bits }
  1154. ((fromsize=OS_S8) and
  1155. (tosize=OS_16)) then
  1156. begin
  1157. case tosize of
  1158. OS_8:
  1159. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  1160. OS_16:
  1161. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  1162. OS_S8:
  1163. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  1164. OS_S16:
  1165. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  1166. { while "mov wN, wM" automatically inserts a zero-extension and
  1167. hence we could encode a 64->32 bit move like that, the problem
  1168. is that we then can't distinguish 64->32 from 32->32 moves, and
  1169. the 64->32 truncation could be removed altogether... So use a
  1170. different instruction }
  1171. OS_32,
  1172. OS_S32:
  1173. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  1174. but because of the way location_force_register() tries to
  1175. avoid superfluous zero/sign extensions, it's not always the
  1176. case -> also force reg1 to to 64 bit }
  1177. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  1178. OS_64,
  1179. OS_S64:
  1180. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
  1181. else
  1182. internalerror(2002090901);
  1183. end;
  1184. end
  1185. else
  1186. begin
  1187. { 32 -> 32 bit move implies zero extension (sign extensions have
  1188. been handled above) -> also use for 32 <-> 64 bit moves }
  1189. if not(fromsize in [OS_64,OS_S64]) or
  1190. not(tosize in [OS_64,OS_S64]) then
  1191. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  1192. else
  1193. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1194. list.Concat(instr);
  1195. { Notify the register allocator that we have written a move instruction so
  1196. it can try to eliminate it. }
  1197. add_move_instruction(instr);
  1198. end;
  1199. end;
  1200. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  1201. var
  1202. href: treference;
  1203. so: tshifterop;
  1204. op: tasmop;
  1205. begin
  1206. op:=A_LDR;
  1207. href:=ref;
  1208. { simplify as if we're going to perform a regular 64 bit load, using
  1209. "r" as the new base register if possible/necessary }
  1210. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  1211. { load literal? }
  1212. if assigned(href.symbol) then
  1213. begin
  1214. if (href.base<>NR_NO) or
  1215. (href.index<>NR_NO) or
  1216. not assigned(href.symboldata) then
  1217. internalerror(2014110912);
  1218. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  1219. end
  1220. else
  1221. begin
  1222. if href.index<>NR_NO then
  1223. begin
  1224. if href.shiftmode<>SM_None then
  1225. begin
  1226. { "add" supports a supperset of the shift modes supported by
  1227. load/store instructions }
  1228. shifterop_reset(so);
  1229. so.shiftmode:=href.shiftmode;
  1230. so.shiftimm:=href.shiftimm;
  1231. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  1232. end
  1233. else
  1234. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  1235. end
  1236. else if href.offset<>0 then
  1237. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  1238. else
  1239. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  1240. end;
  1241. end;
  1242. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  1243. begin
  1244. internalerror(2014122107)
  1245. end;
  1246. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  1247. begin
  1248. internalerror(2014122108)
  1249. end;
  1250. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1251. begin
  1252. internalerror(2014122109)
  1253. end;
  1254. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  1255. var
  1256. instr: taicpu;
  1257. begin
  1258. if assigned(shuffle) and
  1259. not shufflescalar(shuffle) then
  1260. internalerror(2014122104);
  1261. if fromsize=tosize then
  1262. begin
  1263. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  1264. { Notify the register allocator that we have written a move
  1265. instruction so it can try to eliminate it. }
  1266. add_move_instruction(instr);
  1267. { FMOV cannot generate a floating point exception }
  1268. end
  1269. else
  1270. begin
  1271. if (reg_cgsize(reg1)<>fromsize) or
  1272. (reg_cgsize(reg2)<>tosize) then
  1273. internalerror(2014110913);
  1274. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  1275. maybe_check_for_fpu_exception(list);
  1276. end;
  1277. list.Concat(instr);
  1278. end;
  1279. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  1280. var
  1281. tmpreg: tregister;
  1282. begin
  1283. if assigned(shuffle) and
  1284. not shufflescalar(shuffle) then
  1285. internalerror(2014122105);
  1286. tmpreg:=NR_NO;
  1287. if (fromsize<>tosize) then
  1288. begin
  1289. tmpreg:=reg;
  1290. reg:=getmmregister(list,fromsize);
  1291. end;
  1292. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  1293. if (fromsize<>tosize) then
  1294. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1295. end;
  1296. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  1297. var
  1298. tmpreg: tregister;
  1299. begin
  1300. if assigned(shuffle) and
  1301. not shufflescalar(shuffle) then
  1302. internalerror(2014122106);
  1303. if (fromsize<>tosize) then
  1304. begin
  1305. tmpreg:=getmmregister(list,tosize);
  1306. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1307. reg:=tmpreg;
  1308. end;
  1309. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  1310. end;
  1311. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  1312. begin
  1313. if not shufflescalar(shuffle) then
  1314. internalerror(2014122801);
  1315. if not(tcgsize2size[fromsize] in [4,8]) or
  1316. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  1317. internalerror(2014122803);
  1318. list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
  1319. end;
  1320. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  1321. var
  1322. r : tregister;
  1323. begin
  1324. if not shufflescalar(shuffle) then
  1325. internalerror(2014122802);
  1326. if not(tcgsize2size[fromsize] in [4,8]) or
  1327. (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
  1328. internalerror(2014122804);
  1329. if tcgsize2size[fromsize]<tcgsize2size[tosize] then
  1330. r:=makeregsize(intreg,fromsize)
  1331. else
  1332. r:=intreg;
  1333. list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
  1334. end;
  1335. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  1336. begin
  1337. case op of
  1338. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  1339. OP_XOR:
  1340. begin
  1341. if (src<>dst) or
  1342. (reg_cgsize(src)<>size) or
  1343. assigned(shuffle) then
  1344. internalerror(2015011401);
  1345. case size of
  1346. OS_F32,
  1347. OS_F64:
  1348. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  1349. else
  1350. internalerror(2015011402);
  1351. end;
  1352. end
  1353. else
  1354. internalerror(2015011403);
  1355. end;
  1356. end;
  1357. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  1358. var
  1359. bitsize: longint;
  1360. begin
  1361. if srcsize in [OS_64,OS_S64] then
  1362. begin
  1363. bitsize:=64;
  1364. end
  1365. else
  1366. begin
  1367. bitsize:=32;
  1368. end;
  1369. { source is 0 -> dst will have to become 255 }
  1370. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1371. if reverse then
  1372. begin
  1373. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1374. { xor 31/63 is the same as setting the lower 5/6 bits to
  1375. "31/63-(lower 5/6 bits of dst)" }
  1376. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1377. end
  1378. else
  1379. begin
  1380. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1381. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1382. end;
  1383. { set dst to -1 if src was 0 }
  1384. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1385. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1386. branch-free version? All of mine are 3...) }
  1387. list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
  1388. end;
  1389. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1390. var
  1391. href: treference;
  1392. hreg1, hreg2, tmpreg: tregister;
  1393. begin
  1394. if fromsize in [OS_64,OS_S64] then
  1395. begin
  1396. { split into two 32 bit stores }
  1397. hreg1:=getintregister(list,OS_32);
  1398. hreg2:=getintregister(list,OS_32);
  1399. a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
  1400. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1401. if target_info.endian=endian_big then
  1402. begin
  1403. tmpreg:=hreg1;
  1404. hreg1:=hreg2;
  1405. hreg2:=tmpreg;
  1406. end;
  1407. { can we use STP? }
  1408. if (ref.alignment=4) and
  1409. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1410. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1411. else
  1412. begin
  1413. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1414. href:=ref;
  1415. inc(href.offset,4);
  1416. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1417. end;
  1418. end
  1419. else
  1420. inherited;
  1421. end;
  1422. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1423. const
  1424. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1425. begin
  1426. if (op in overflowops) and
  1427. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1428. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1429. end;
  1430. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1431. begin
  1432. optimize_op_const(size,op,a);
  1433. case op of
  1434. OP_NONE:
  1435. exit;
  1436. OP_MOVE:
  1437. a_load_const_reg(list,size,a,reg);
  1438. OP_NEG,OP_NOT:
  1439. internalerror(200306011);
  1440. else
  1441. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1442. end;
  1443. end;
  1444. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1445. begin
  1446. Case op of
  1447. OP_NEG,
  1448. OP_NOT:
  1449. begin
  1450. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1451. maybeadjustresult(list,op,size,dst);
  1452. end
  1453. else
  1454. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1455. end;
  1456. end;
  1457. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1458. var
  1459. l: tlocation;
  1460. begin
  1461. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1462. end;
  1463. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1464. var
  1465. hreg: tregister;
  1466. begin
  1467. { no ROLV opcode... }
  1468. if op=OP_ROL then
  1469. begin
  1470. case size of
  1471. OS_32,OS_S32,
  1472. OS_64,OS_S64:
  1473. begin
  1474. hreg:=getintregister(list,size);
  1475. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1476. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1477. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1478. exit;
  1479. end;
  1480. else
  1481. internalerror(2014111005);
  1482. end;
  1483. end
  1484. else if (op=OP_ROR) and
  1485. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1486. internalerror(2014111006);
  1487. if TOpCG2AsmOpReg[op]=A_NONE then
  1488. internalerror(2014111007);
  1489. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1490. maybeadjustresult(list,op,size,dst);
  1491. end;
  1492. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1493. var
  1494. shiftcountmask: longint;
  1495. constreg: tregister;
  1496. begin
  1497. { add/sub instructions have only positive immediate operands }
  1498. if (op in [OP_ADD,OP_SUB]) and
  1499. (a<0) then
  1500. begin
  1501. if op=OP_ADD then
  1502. op:=op_SUB
  1503. else
  1504. op:=OP_ADD;
  1505. { avoid range/overflow error in case a = low(tcgint) }
  1506. {$push}{$r-}{$q-}
  1507. a:=-a;
  1508. {$pop}
  1509. end;
  1510. ovloc.loc:=LOC_VOID;
  1511. optimize_op_const(size,op,a);
  1512. case op of
  1513. OP_NONE:
  1514. begin
  1515. a_load_reg_reg(list,size,size,src,dst);
  1516. exit;
  1517. end;
  1518. OP_MOVE:
  1519. begin
  1520. a_load_const_reg(list,size,a,dst);
  1521. exit;
  1522. end;
  1523. else
  1524. ;
  1525. end;
  1526. case op of
  1527. OP_ADD,
  1528. OP_SUB:
  1529. begin
  1530. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1531. { on a 64 bit target, overflows with smaller data types
  1532. are handled via range errors }
  1533. if setflags and
  1534. (size in [OS_64,OS_S64]) then
  1535. begin
  1536. location_reset(ovloc,LOC_FLAGS,OS_8);
  1537. if size=OS_64 then
  1538. if op=OP_ADD then
  1539. ovloc.resflags:=F_CS
  1540. else
  1541. ovloc.resflags:=F_CC
  1542. else
  1543. ovloc.resflags:=F_VS;
  1544. end;
  1545. end;
  1546. OP_OR,
  1547. OP_AND,
  1548. OP_XOR:
  1549. begin
  1550. if not(size in [OS_64,OS_S64]) then
  1551. a:=cardinal(a);
  1552. if is_shifter_const(a,size) then
  1553. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1554. else
  1555. begin
  1556. constreg:=getintregister(list,size);
  1557. a_load_const_reg(list,size,a,constreg);
  1558. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1559. end;
  1560. end;
  1561. OP_SHL,
  1562. OP_SHR,
  1563. OP_SAR:
  1564. begin
  1565. if size in [OS_64,OS_S64] then
  1566. shiftcountmask:=63
  1567. else
  1568. shiftcountmask:=31;
  1569. if (a and shiftcountmask)<>0 Then
  1570. list.concat(taicpu.op_reg_reg_const(
  1571. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1572. else
  1573. a_load_reg_reg(list,size,size,src,dst);
  1574. if (a and not(tcgint(shiftcountmask)))<>0 then
  1575. internalError(2014112101);
  1576. end;
  1577. OP_ROL,
  1578. OP_ROR:
  1579. begin
  1580. case size of
  1581. OS_32,OS_S32:
  1582. if (a and not(tcgint(31)))<>0 then
  1583. internalError(2014112102);
  1584. OS_64,OS_S64:
  1585. if (a and not(tcgint(63)))<>0 then
  1586. internalError(2014112103);
  1587. else
  1588. internalError(2014112104);
  1589. end;
  1590. { there's only a ror opcode }
  1591. if op=OP_ROL then
  1592. a:=(tcgsize2size[size]*8)-a;
  1593. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1594. end;
  1595. OP_MUL,
  1596. OP_IMUL,
  1597. OP_DIV,
  1598. OP_IDIV:
  1599. begin
  1600. constreg:=getintregister(list,size);
  1601. a_load_const_reg(list,size,a,constreg);
  1602. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1603. end;
  1604. else
  1605. internalerror(2014111403);
  1606. end;
  1607. maybeadjustresult(list,op,size,dst);
  1608. end;
  1609. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1610. var
  1611. tmpreg1, tmpreg2: tregister;
  1612. begin
  1613. ovloc.loc:=LOC_VOID;
  1614. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1615. if setflags and
  1616. (size in [OS_64,OS_S64]) then
  1617. begin
  1618. case op of
  1619. OP_ADD,
  1620. OP_SUB:
  1621. begin
  1622. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1623. ovloc.loc:=LOC_FLAGS;
  1624. if size=OS_64 then
  1625. if op=OP_ADD then
  1626. ovloc.resflags:=F_CS
  1627. else
  1628. ovloc.resflags:=F_CC
  1629. else
  1630. ovloc.resflags:=F_VS;
  1631. { finished }
  1632. exit;
  1633. end;
  1634. OP_MUL:
  1635. begin
  1636. { check whether the upper 64 bit of the 128 bit product is 0 }
  1637. tmpreg1:=getintregister(list,OS_64);
  1638. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1639. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1640. ovloc.loc:=LOC_FLAGS;
  1641. ovloc.resflags:=F_NE;
  1642. { still have to perform the actual multiplication }
  1643. end;
  1644. OP_IMUL:
  1645. begin
  1646. { check whether the upper 64 bits of the 128 bit multiplication
  1647. result have the same value as the replicated sign bit of the
  1648. lower 64 bits }
  1649. tmpreg1:=getintregister(list,OS_64);
  1650. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1651. { calculate lower 64 bits (afterwards, because dst may be
  1652. equal to src1 or src2) }
  1653. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1654. { replicate sign bit }
  1655. tmpreg2:=getintregister(list,OS_64);
  1656. a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
  1657. list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
  1658. ovloc.loc:=LOC_FLAGS;
  1659. ovloc.resflags:=F_NE;
  1660. { finished }
  1661. exit;
  1662. end;
  1663. OP_IDIV,
  1664. OP_DIV:
  1665. begin
  1666. { not handled here, needs div-by-zero check (dividing by zero
  1667. just gives a 0 result on aarch64), and low(int64) div -1
  1668. check for overflow) }
  1669. internalerror(2014122101);
  1670. end;
  1671. else
  1672. internalerror(2019050936);
  1673. end;
  1674. end;
  1675. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1676. end;
  1677. {*************** compare instructructions ****************}
  1678. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1679. var
  1680. op: tasmop;
  1681. begin
  1682. if a>=0 then
  1683. op:=A_CMP
  1684. else
  1685. op:=A_CMN;
  1686. { avoid range/overflow error in case a=low(tcgint) }
  1687. {$push}{$r-}{$q-}
  1688. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1689. {$pop}
  1690. a_jmp_cond(list,cmp_op,l);
  1691. end;
  1692. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1693. begin
  1694. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1695. a_jmp_cond(list,cmp_op,l);
  1696. end;
  1697. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1698. var
  1699. ai: taicpu;
  1700. begin
  1701. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
  1702. ai.is_jmp:=true;
  1703. list.Concat(ai);
  1704. end;
  1705. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1706. var
  1707. ai: taicpu;
  1708. begin
  1709. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1710. ai.is_jmp:=true;
  1711. list.Concat(ai);
  1712. end;
  1713. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1714. var
  1715. ai: taicpu;
  1716. begin
  1717. ai:=TAiCpu.op_sym(A_B,l);
  1718. ai.is_jmp:=true;
  1719. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1720. list.Concat(ai);
  1721. end;
  1722. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1723. var
  1724. ai : taicpu;
  1725. begin
  1726. ai:=Taicpu.op_sym(A_B,l);
  1727. ai.is_jmp:=true;
  1728. ai.SetCondition(flags_to_cond(f));
  1729. list.Concat(ai);
  1730. end;
  1731. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1732. begin
  1733. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1734. end;
  1735. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1736. begin
  1737. { we need an explicit overflow location, because there are many
  1738. possibilities (not just the overflow flag, which is only used for
  1739. signed add/sub) }
  1740. internalerror(2014112303);
  1741. end;
  1742. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1743. var
  1744. hl : tasmlabel;
  1745. hflags : tresflags;
  1746. begin
  1747. if not(cs_check_overflow in current_settings.localswitches) then
  1748. exit;
  1749. current_asmdata.getjumplabel(hl);
  1750. case ovloc.loc of
  1751. LOC_FLAGS:
  1752. begin
  1753. hflags:=ovloc.resflags;
  1754. inverse_flags(hflags);
  1755. cg.a_jmp_flags(list,hflags,hl);
  1756. end;
  1757. else
  1758. internalerror(2014112304);
  1759. end;
  1760. a_call_name(list,'FPC_OVERFLOW',false);
  1761. a_label(list,hl);
  1762. end;
  1763. { *********** entry/exit code and address loading ************ }
  1764. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1765. var
  1766. ref: treference;
  1767. sr: tsuperregister;
  1768. pairreg: tregister;
  1769. begin
  1770. result:=0;
  1771. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1772. ref.addressmode:=AM_PREINDEXED;
  1773. pairreg:=NR_NO;
  1774. { store all used registers pairwise }
  1775. for sr:=lowsr to highsr do
  1776. if sr in rg[rt].used_in_proc then
  1777. if pairreg=NR_NO then
  1778. pairreg:=newreg(rt,sr,sub)
  1779. else
  1780. begin
  1781. inc(result,16);
  1782. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1783. pairreg:=NR_NO
  1784. end;
  1785. { one left -> store twice (stack must be 16 bytes aligned) }
  1786. if pairreg<>NR_NO then
  1787. begin
  1788. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1789. inc(result,16);
  1790. end;
  1791. end;
  1792. procedure FixupOffsets(p:TObject;arg:pointer);
  1793. var
  1794. sym: tabstractnormalvarsym absolute p;
  1795. begin
  1796. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1797. (sym.localloc.loc=LOC_REFERENCE) and
  1798. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1799. begin
  1800. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1801. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1802. end;
  1803. end;
  1804. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1805. var
  1806. ref: treference;
  1807. totalstackframesize: longint;
  1808. begin
  1809. if nostackframe then
  1810. exit;
  1811. { stack pointer has to be aligned to 16 bytes at all times }
  1812. localsize:=align(localsize,16);
  1813. { save stack pointer and return address }
  1814. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1815. ref.addressmode:=AM_PREINDEXED;
  1816. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1817. { initialise frame pointer }
  1818. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1819. totalstackframesize:=localsize;
  1820. { save modified integer registers }
  1821. inc(totalstackframesize,
  1822. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1823. { only the lower 64 bits of the modified vector registers need to be
  1824. saved; if the caller needs the upper 64 bits, it has to save them
  1825. itself }
  1826. inc(totalstackframesize,
  1827. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1828. { allocate stack space }
  1829. if localsize<>0 then
  1830. begin
  1831. localsize:=align(localsize,16);
  1832. current_procinfo.final_localsize:=localsize;
  1833. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1834. end;
  1835. { By default, we use the frame pointer to access parameters passed via
  1836. the stack and the stack pointer to address local variables and temps
  1837. because
  1838. a) we can use bigger positive than negative offsets (so accessing
  1839. locals via negative offsets from the frame pointer would be less
  1840. efficient)
  1841. b) we don't know the local size while generating the code, so
  1842. accessing the parameters via the stack pointer is not possible
  1843. without copying them
  1844. The problem with this is the get_frame() intrinsic:
  1845. a) it must return the same value as what we pass as parentfp
  1846. parameter, since that's how it's used in the TP-style objects unit
  1847. b) its return value must usable to access all local data from a
  1848. routine (locals and parameters), since it's all the nested
  1849. routines have access to
  1850. c) its return value must be usable to construct a backtrace, as it's
  1851. also used by the exception handling routines
  1852. The solution we use here, based on something similar that's done in
  1853. the MIPS port, is to generate all accesses to locals in the routine
  1854. itself SP-relative, and then after the code is generated and the local
  1855. size is known (namely, here), we change all SP-relative variables/
  1856. parameters into FP-relative ones. This means that they'll be accessed
  1857. less efficiently from nested routines, but those accesses are indirect
  1858. anyway and at least this way they can be accessed at all
  1859. }
  1860. if current_procinfo.has_nestedprocs then
  1861. begin
  1862. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1863. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1864. end;
  1865. end;
  1866. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1867. begin
  1868. { nothing to do on Darwin or Linux }
  1869. end;
  1870. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1871. begin
  1872. { done in g_proc_exit }
  1873. end;
  1874. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1875. var
  1876. ref: treference;
  1877. sr, highestsetsr: tsuperregister;
  1878. pairreg: tregister;
  1879. regcount: longint;
  1880. begin
  1881. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1882. ref.addressmode:=AM_POSTINDEXED;
  1883. { highest reg stored twice? }
  1884. regcount:=0;
  1885. highestsetsr:=RS_NO;
  1886. for sr:=lowsr to highsr do
  1887. if sr in rg[rt].used_in_proc then
  1888. begin
  1889. inc(regcount);
  1890. highestsetsr:=sr;
  1891. end;
  1892. if odd(regcount) then
  1893. begin
  1894. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1895. highestsetsr:=pred(highestsetsr);
  1896. end;
  1897. { load all (other) used registers pairwise }
  1898. pairreg:=NR_NO;
  1899. for sr:=highestsetsr downto lowsr do
  1900. if sr in rg[rt].used_in_proc then
  1901. if pairreg=NR_NO then
  1902. pairreg:=newreg(rt,sr,sub)
  1903. else
  1904. begin
  1905. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1906. pairreg:=NR_NO
  1907. end;
  1908. { There can't be any register left }
  1909. if pairreg<>NR_NO then
  1910. internalerror(2014112602);
  1911. end;
  1912. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1913. var
  1914. ref: treference;
  1915. regsstored: boolean;
  1916. sr: tsuperregister;
  1917. begin
  1918. if not(nostackframe) and
  1919. { we do not need an exit stack frame when we never return
  1920. * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
  1921. * the entry stack frame must be normally generated because the subroutine could be still left by
  1922. an exception and then the unwinding code might need to restore the registers stored by the entry code
  1923. }
  1924. not(po_noreturn in current_procinfo.procdef.procoptions) then
  1925. begin
  1926. { if no registers have been stored, we don't have to subtract the
  1927. allocated temp space from the stack pointer }
  1928. regsstored:=false;
  1929. for sr:=RS_X19 to RS_X28 do
  1930. if sr in rg[R_INTREGISTER].used_in_proc then
  1931. begin
  1932. regsstored:=true;
  1933. break;
  1934. end;
  1935. if not regsstored then
  1936. for sr:=RS_D8 to RS_D15 do
  1937. if sr in rg[R_MMREGISTER].used_in_proc then
  1938. begin
  1939. regsstored:=true;
  1940. break;
  1941. end;
  1942. { restore registers (and stack pointer) }
  1943. if regsstored then
  1944. begin
  1945. if current_procinfo.final_localsize<>0 then
  1946. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1947. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1948. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1949. end
  1950. else if current_procinfo.final_localsize<>0 then
  1951. { restore stack pointer }
  1952. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1953. { restore framepointer and return address }
  1954. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1955. ref.addressmode:=AM_POSTINDEXED;
  1956. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1957. end;
  1958. { return }
  1959. list.concat(taicpu.op_none(A_RET));
  1960. end;
  1961. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1962. begin
  1963. { done in g_proc_entry }
  1964. end;
  1965. { ************* concatcopy ************ }
  1966. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1967. var
  1968. paraloc1,paraloc2,paraloc3 : TCGPara;
  1969. pd : tprocdef;
  1970. begin
  1971. pd:=search_system_proc('MOVE');
  1972. paraloc1.init;
  1973. paraloc2.init;
  1974. paraloc3.init;
  1975. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  1976. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  1977. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  1978. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1979. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1980. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1981. paramanager.freecgpara(list,paraloc3);
  1982. paramanager.freecgpara(list,paraloc2);
  1983. paramanager.freecgpara(list,paraloc1);
  1984. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1985. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1986. a_call_name(list,'FPC_MOVE',false);
  1987. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1988. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1989. paraloc3.done;
  1990. paraloc2.done;
  1991. paraloc1.done;
  1992. end;
  1993. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  1994. var
  1995. sourcebasereplaced, destbasereplaced: boolean;
  1996. { get optimal memory operation to use for loading/storing data
  1997. in an unrolled loop }
  1998. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  1999. begin
  2000. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  2001. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  2002. begin
  2003. memop:=unscaledop;
  2004. needsimplify:=true;
  2005. end
  2006. else if (unscaledop<>A_NONE) and
  2007. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  2008. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  2009. begin
  2010. memop:=unscaledop;
  2011. needsimplify:=false;
  2012. end
  2013. else
  2014. begin
  2015. memop:=scaledop;
  2016. needsimplify:=true;
  2017. end;
  2018. end;
  2019. { adjust the offset and/or addressing mode after a load/store so it's
  2020. correct for the next one of the same size }
  2021. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  2022. begin
  2023. case ref.addressmode of
  2024. AM_OFFSET:
  2025. inc(ref.offset,oplen);
  2026. AM_POSTINDEXED:
  2027. { base register updated by instruction, next offset can remain
  2028. the same }
  2029. ;
  2030. AM_PREINDEXED:
  2031. begin
  2032. { base register updated by instruction -> next instruction can
  2033. use post-indexing with offset = sizeof(operation) }
  2034. ref.offset:=0;
  2035. ref.addressmode:=AM_OFFSET;
  2036. end;
  2037. end;
  2038. end;
  2039. { generate a load/store and adjust the reference offset to the next
  2040. memory location if necessary }
  2041. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  2042. begin
  2043. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  2044. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  2045. end;
  2046. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  2047. the next memory location if necessary }
  2048. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  2049. begin
  2050. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  2051. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  2052. end;
  2053. { turn a reference into a pre- or post-indexed reference for use in a
  2054. load/store of a particular size }
  2055. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  2056. var
  2057. tmpreg: tregister;
  2058. scaledoffset: longint;
  2059. orgaddressmode: taddressmode;
  2060. begin
  2061. scaledoffset:=tcgsize2size[opsize];
  2062. if scaledop in [A_LDP,A_STP] then
  2063. scaledoffset:=scaledoffset*2;
  2064. { can we use the reference as post-indexed without changes? }
  2065. if forcepostindexing then
  2066. begin
  2067. orgaddressmode:=ref.addressmode;
  2068. ref.addressmode:=AM_POSTINDEXED;
  2069. if (orgaddressmode=AM_POSTINDEXED) or
  2070. ((ref.offset=0) and
  2071. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  2072. begin
  2073. { just change the post-indexed offset to the access size }
  2074. ref.offset:=scaledoffset;
  2075. { and replace the base register if that didn't happen yet
  2076. (could be sp or a regvar) }
  2077. if not basereplaced then
  2078. begin
  2079. tmpreg:=getaddressregister(list);
  2080. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  2081. ref.base:=tmpreg;
  2082. basereplaced:=true;
  2083. end;
  2084. exit;
  2085. end;
  2086. ref.addressmode:=orgaddressmode;
  2087. end;
  2088. {$ifdef dummy}
  2089. This could in theory be useful in case you have a concatcopy from
  2090. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  2091. very unlikely. Disabled because it still needs fixes, as it
  2092. also generates pre-indexed loads right now at the very end for the
  2093. left-over gencopies
  2094. { can we turn it into a pre-indexed reference for free? (after the
  2095. first operation, it will be turned into an offset one) }
  2096. if not forcepostindexing and
  2097. (ref.offset<>0) then
  2098. begin
  2099. orgaddressmode:=ref.addressmode;
  2100. ref.addressmode:=AM_PREINDEXED;
  2101. tmpreg:=ref.base;
  2102. if not basereplaced and
  2103. (ref.base=tmpreg) then
  2104. begin
  2105. tmpreg:=getaddressregister(list);
  2106. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  2107. ref.base:=tmpreg;
  2108. basereplaced:=true;
  2109. end;
  2110. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  2111. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  2112. exit;
  2113. end;
  2114. {$endif dummy}
  2115. if not forcepostindexing then
  2116. begin
  2117. ref.addressmode:=AM_OFFSET;
  2118. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  2119. { this may still cause problems if the final offset is no longer
  2120. a simple ref; it's a bit complicated to pass all information
  2121. through at all places and check that here, so play safe: we
  2122. currently never generate unrolled copies for more than 64
  2123. bytes (32 with non-double-register copies) }
  2124. if ref.index=NR_NO then
  2125. begin
  2126. if ((scaledop in [A_LDP,A_STP]) and
  2127. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  2128. ((scaledop in [A_LDUR,A_STUR]) and
  2129. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  2130. ((scaledop in [A_LDR,A_STR]) and
  2131. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  2132. exit;
  2133. end;
  2134. end;
  2135. tmpreg:=getaddressregister(list);
  2136. a_loadaddr_ref_reg(list,ref,tmpreg);
  2137. basereplaced:=true;
  2138. if forcepostindexing then
  2139. begin
  2140. reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
  2141. ref.addressmode:=AM_POSTINDEXED;
  2142. end
  2143. else
  2144. begin
  2145. reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  2146. ref.addressmode:=AM_OFFSET;
  2147. end
  2148. end;
  2149. { prepare a reference for use by gencopy. This is done both after the
  2150. unrolled and regular copy loop -> get rid of post-indexing mode, make
  2151. sure ref is valid }
  2152. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  2153. var
  2154. simplify: boolean;
  2155. begin
  2156. if ref.addressmode=AM_POSTINDEXED then
  2157. ref.offset:=tcgsize2size[opsize];
  2158. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  2159. if simplify then
  2160. begin
  2161. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  2162. op:=scaledop;
  2163. end;
  2164. end;
  2165. { generate a copy from source to dest of size opsize/postfix }
  2166. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  2167. var
  2168. reg: tregister;
  2169. loadop, storeop: tasmop;
  2170. begin
  2171. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  2172. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  2173. reg:=getintregister(list,opsize);
  2174. genloadstore(list,loadop,reg,source,postfix,opsize);
  2175. genloadstore(list,storeop,reg,dest,postfix,opsize);
  2176. end;
  2177. { copy the leftovers after an unrolled or regular copy loop }
  2178. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  2179. begin
  2180. { stop post-indexing if we did so in the loop, since in that case all
  2181. offsets definitely can be represented now }
  2182. if source.addressmode=AM_POSTINDEXED then
  2183. begin
  2184. source.addressmode:=AM_OFFSET;
  2185. source.offset:=0;
  2186. end;
  2187. if dest.addressmode=AM_POSTINDEXED then
  2188. begin
  2189. dest.addressmode:=AM_OFFSET;
  2190. dest.offset:=0;
  2191. end;
  2192. { transfer the leftovers }
  2193. if len>=8 then
  2194. begin
  2195. dec(len,8);
  2196. gencopy(list,source,dest,PF_NONE,OS_64);
  2197. end;
  2198. if len>=4 then
  2199. begin
  2200. dec(len,4);
  2201. gencopy(list,source,dest,PF_NONE,OS_32);
  2202. end;
  2203. if len>=2 then
  2204. begin
  2205. dec(len,2);
  2206. gencopy(list,source,dest,PF_H,OS_16);
  2207. end;
  2208. if len>=1 then
  2209. begin
  2210. dec(len);
  2211. gencopy(list,source,dest,PF_B,OS_8);
  2212. end;
  2213. end;
  2214. const
  2215. { load_length + loop dec + cbnz }
  2216. loopoverhead=12;
  2217. { loop overhead + load + store }
  2218. totallooplen=loopoverhead + 8;
  2219. var
  2220. totalalign: longint;
  2221. maxlenunrolled: tcgint;
  2222. loadop, storeop: tasmop;
  2223. opsize: tcgsize;
  2224. postfix: toppostfix;
  2225. tmpsource, tmpdest: treference;
  2226. scaledstoreop, unscaledstoreop,
  2227. scaledloadop, unscaledloadop: tasmop;
  2228. regs: array[1..8] of tregister;
  2229. countreg: tregister;
  2230. i, regcount: longint;
  2231. hl: tasmlabel;
  2232. simplifysource, simplifydest: boolean;
  2233. begin
  2234. if len=0 then
  2235. exit;
  2236. sourcebasereplaced:=false;
  2237. destbasereplaced:=false;
  2238. { maximum common alignment }
  2239. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  2240. { use a simple load/store? }
  2241. if (len in [1,2,4,8]) and
  2242. ((totalalign>=(len div 2)) or
  2243. (source.alignment=len) or
  2244. (dest.alignment=len)) then
  2245. begin
  2246. opsize:=int_cgsize(len);
  2247. a_load_ref_ref(list,opsize,opsize,source,dest);
  2248. exit;
  2249. end;
  2250. { alignment > length is not useful, and would break some checks below }
  2251. while totalalign>len do
  2252. totalalign:=totalalign div 2;
  2253. { operation sizes to use based on common alignment }
  2254. case totalalign of
  2255. 1:
  2256. begin
  2257. postfix:=PF_B;
  2258. opsize:=OS_8;
  2259. end;
  2260. 2:
  2261. begin
  2262. postfix:=PF_H;
  2263. opsize:=OS_16;
  2264. end;
  2265. 4:
  2266. begin
  2267. postfix:=PF_None;
  2268. opsize:=OS_32;
  2269. end
  2270. else
  2271. begin
  2272. totalalign:=8;
  2273. postfix:=PF_None;
  2274. opsize:=OS_64;
  2275. end;
  2276. end;
  2277. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  2278. maxlenunrolled:=min(totalalign,8)*4;
  2279. { ldp/stp -> 2 registers per instruction }
  2280. if (totalalign>=4) and
  2281. (len>=totalalign*2) then
  2282. begin
  2283. maxlenunrolled:=maxlenunrolled*2;
  2284. scaledstoreop:=A_STP;
  2285. scaledloadop:=A_LDP;
  2286. unscaledstoreop:=A_NONE;
  2287. unscaledloadop:=A_NONE;
  2288. end
  2289. else
  2290. begin
  2291. scaledstoreop:=A_STR;
  2292. scaledloadop:=A_LDR;
  2293. unscaledstoreop:=A_STUR;
  2294. unscaledloadop:=A_LDUR;
  2295. end;
  2296. { we only need 4 instructions extra to call FPC_MOVE }
  2297. if cs_opt_size in current_settings.optimizerswitches then
  2298. maxlenunrolled:=maxlenunrolled div 2;
  2299. if (len>maxlenunrolled) and
  2300. (len>totalalign*8) then
  2301. begin
  2302. g_concatcopy_move(list,source,dest,len);
  2303. exit;
  2304. end;
  2305. simplifysource:=true;
  2306. simplifydest:=true;
  2307. tmpsource:=source;
  2308. tmpdest:=dest;
  2309. { can we directly encode all offsets in an unrolled loop? }
  2310. if len<=maxlenunrolled then
  2311. begin
  2312. {$ifdef extdebug}
  2313. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  2314. {$endif extdebug}
  2315. { the leftovers will be handled separately -> -(len mod opsize) }
  2316. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  2317. { additionally, the last regular load/store will be at
  2318. offset+len-opsize (if len-(len mod opsize)>len) }
  2319. if tmpsource.offset>source.offset then
  2320. dec(tmpsource.offset,tcgsize2size[opsize]);
  2321. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  2322. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  2323. if tmpdest.offset>dest.offset then
  2324. dec(tmpdest.offset,tcgsize2size[opsize]);
  2325. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  2326. tmpsource:=source;
  2327. tmpdest:=dest;
  2328. { if we can't directly encode all offsets, simplify }
  2329. if simplifysource then
  2330. begin
  2331. loadop:=scaledloadop;
  2332. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  2333. end;
  2334. if simplifydest then
  2335. begin
  2336. storeop:=scaledstoreop;
  2337. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  2338. end;
  2339. regcount:=len div tcgsize2size[opsize];
  2340. { in case we transfer two registers at a time, we copy an even
  2341. number of registers }
  2342. if loadop=A_LDP then
  2343. regcount:=regcount and not(1);
  2344. { initialise for dfa }
  2345. regs[low(regs)]:=NR_NO;
  2346. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  2347. for i:=1 to regcount do
  2348. regs[i]:=getintregister(list,opsize);
  2349. if loadop=A_LDP then
  2350. begin
  2351. { load registers }
  2352. for i:=1 to (regcount div 2) do
  2353. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  2354. { store registers }
  2355. for i:=1 to (regcount div 2) do
  2356. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  2357. end
  2358. else
  2359. begin
  2360. for i:=1 to regcount do
  2361. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  2362. for i:=1 to regcount do
  2363. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  2364. end;
  2365. { leftover }
  2366. len:=len-regcount*tcgsize2size[opsize];
  2367. {$ifdef extdebug}
  2368. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2369. {$endif extdebug}
  2370. end
  2371. else
  2372. begin
  2373. {$ifdef extdebug}
  2374. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2375. {$endif extdebug}
  2376. { regular loop -> definitely use post-indexing }
  2377. loadop:=scaledloadop;
  2378. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2379. storeop:=scaledstoreop;
  2380. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2381. current_asmdata.getjumplabel(hl);
  2382. countreg:=getintregister(list,OS_32);
  2383. if loadop=A_LDP then
  2384. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
  2385. else
  2386. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2387. a_label(list,hl);
  2388. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2389. if loadop=A_LDP then
  2390. begin
  2391. regs[1]:=getintregister(list,opsize);
  2392. regs[2]:=getintregister(list,opsize);
  2393. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2394. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2395. end
  2396. else
  2397. begin
  2398. regs[1]:=getintregister(list,opsize);
  2399. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2400. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2401. end;
  2402. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2403. len:=len mod tcgsize2size[opsize];
  2404. end;
  2405. gencopyleftovers(list,tmpsource,tmpdest,len);
  2406. end;
  2407. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2408. begin
  2409. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2410. InternalError(2013020102);
  2411. end;
  2412. procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  2413. var
  2414. r : TRegister;
  2415. ai: taicpu;
  2416. l1,l2: TAsmLabel;
  2417. begin
  2418. { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
  2419. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  2420. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  2421. begin
  2422. r:=getintregister(list,OS_INT);
  2423. list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
  2424. list.concat(taicpu.op_reg_const(A_TST,r,$1f));
  2425. current_asmdata.getjumplabel(l1);
  2426. current_asmdata.getjumplabel(l2);
  2427. ai:=taicpu.op_sym(A_B,l1);
  2428. ai.is_jmp:=true;
  2429. ai.condition:=C_NE;
  2430. list.concat(ai);
  2431. list.concat(taicpu.op_reg_const(A_TST,r,$80));
  2432. ai:=taicpu.op_sym(A_B,l2);
  2433. ai.is_jmp:=true;
  2434. ai.condition:=C_EQ;
  2435. list.concat(ai);
  2436. a_label(list,l1);
  2437. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2438. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  2439. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2440. a_label(list,l2);
  2441. if clear then
  2442. current_procinfo.FPUExceptionCheckNeeded:=false;
  2443. end;
  2444. end;
  2445. procedure tcgaarch64.g_profilecode(list : TAsmList);
  2446. begin
  2447. if target_info.system = system_aarch64_linux then
  2448. begin
  2449. list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
  2450. a_call_name(list,'_mcount',false);
  2451. end
  2452. else
  2453. internalerror(2020021901);
  2454. end;
  2455. {$endif dummy}
  2456. {$warnings off}
  2457. procedure create_codegen;
  2458. begin
  2459. cg:=tcgcpu.Create;
  2460. cg64:=tcg64fxtensa.Create;
  2461. end;
  2462. end.