cgcpu.pas 86 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for AArch64
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu;
  27. type
  28. tcgaarch64=class(tcg)
  29. protected
  30. { changes register size without adding register allocation info }
  31. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  32. public
  33. { simplifies "ref" so it can be used with "op". If "ref" can be used
  34. with a different load/Store operation that has the same meaning as the
  35. original one, "op" will be replaced with the alternative }
  36. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  37. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  38. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  39. procedure init_register_allocators;override;
  40. procedure done_register_allocators;override;
  41. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  42. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  43. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  44. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  45. { General purpose instructions }
  46. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  47. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  48. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  49. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  50. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  51. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  52. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  53. { move instructions }
  54. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  55. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  56. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  57. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  58. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  59. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  60. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  61. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  62. { fpu move instructions (not used, all floating point is vector unit-based) }
  63. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  64. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  65. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  66. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
  67. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
  68. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
  69. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  70. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
  71. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
  72. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  73. { comparison operations }
  74. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  75. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  76. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  77. procedure a_jmp_name(list: TAsmList; const s: string);override;
  78. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  79. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  80. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  81. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  82. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  83. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  84. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  85. procedure g_maybe_got_init(list: TAsmList); override;
  86. procedure g_restore_registers(list: TAsmList);override;
  87. procedure g_save_registers(list: TAsmList);override;
  88. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  89. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  90. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  91. private
  92. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  93. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  94. end;
  95. procedure create_codegen;
  96. const
  97. TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  98. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  99. );
  100. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  101. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  102. );
  103. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  104. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  105. );
  106. implementation
  107. uses
  108. globals,verbose,systems,cutils,
  109. paramgr,fmodule,
  110. symtable,symsym,
  111. tgobj,
  112. procinfo,cpupi;
  113. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  114. var
  115. href: treference;
  116. so: tshifterop;
  117. accesssize: longint;
  118. begin
  119. if (ref.base=NR_NO) then
  120. begin
  121. if ref.shiftmode<>SM_None then
  122. internalerror(2014110701);
  123. ref.base:=ref.index;
  124. ref.index:=NR_NO;
  125. end;
  126. { no abitrary scale factor support (the generic code doesn't set it,
  127. AArch-specific code shouldn't either) }
  128. if not(ref.scalefactor in [0,1]) then
  129. internalerror(2014111002);
  130. case simple_ref_type(op,size,oppostfix,ref) of
  131. sr_simple:
  132. exit;
  133. sr_internal_illegal:
  134. internalerror(2014121702);
  135. sr_complex:
  136. { continue } ;
  137. end;
  138. if assigned(ref.symbol) then
  139. begin
  140. { internal "load symbol" instructions should already be valid }
  141. if assigned(ref.symboldata) or
  142. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  143. internalerror(2014110802);
  144. { no relative symbol support (needed) yet }
  145. if assigned(ref.relsymbol) then
  146. internalerror(2014111001);
  147. { on Darwin: load the address from the GOT. There does not appear to
  148. be a non-GOT variant. This consists of first loading the address
  149. of the page containing the GOT entry for this variable, and then
  150. the address of the entry itself from that page (can be relaxed by
  151. the linker in case the variable itself can be stored directly in
  152. the GOT) }
  153. if target_info.system in systems_darwin then
  154. begin
  155. if (preferred_newbasereg=NR_NO) or
  156. (ref.base=preferred_newbasereg) or
  157. (ref.index=preferred_newbasereg) then
  158. preferred_newbasereg:=getaddressregister(list);
  159. { load the (GOT) page }
  160. reference_reset_symbol(href,ref.symbol,0,8);
  161. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  162. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  163. ((ref.symbol.typ=AT_DATA) and
  164. (ref.symbol.bind=AB_LOCAL)) then
  165. href.refaddr:=addr_page
  166. else
  167. href.refaddr:=addr_gotpage;
  168. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  169. { load the GOT entry (= address of the variable) }
  170. reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
  171. href.symbol:=ref.symbol;
  172. { code symbols defined in the current compilation unit do not
  173. have to be accessed via the GOT }
  174. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  175. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  176. ((ref.symbol.typ=AT_DATA) and
  177. (ref.symbol.bind=AB_LOCAL)) then
  178. begin
  179. href.base:=NR_NO;
  180. href.refaddr:=addr_pageoffset;
  181. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  182. end
  183. else
  184. begin
  185. href.refaddr:=addr_gotpageoffset;
  186. { use a_load_ref_reg() rather than directly encoding the LDR,
  187. so that we'll check the validity of the reference }
  188. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  189. end;
  190. { set as new base register }
  191. if ref.base=NR_NO then
  192. ref.base:=preferred_newbasereg
  193. else if ref.index=NR_NO then
  194. ref.index:=preferred_newbasereg
  195. else
  196. begin
  197. { make sure it's valid in case ref.base is SP -> make it
  198. the second operand}
  199. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  200. ref.base:=preferred_newbasereg
  201. end;
  202. ref.symbol:=nil;
  203. end
  204. else
  205. { todo }
  206. internalerror(2014111003);
  207. end;
  208. { base & index }
  209. if (ref.base<>NR_NO) and
  210. (ref.index<>NR_NO) then
  211. begin
  212. case op of
  213. A_LDR, A_STR:
  214. begin
  215. if (ref.shiftmode=SM_None) and
  216. (ref.shiftimm<>0) then
  217. internalerror(2014110805);
  218. { wrong shift? (possible in case of something like
  219. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  220. the final load is a 1 byte -> can't use shift after all }
  221. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  222. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  223. (ref.offset<>0)) then
  224. begin
  225. if preferred_newbasereg=NR_NO then
  226. preferred_newbasereg:=getaddressregister(list);
  227. { "add" supports a superset of the shift modes supported by
  228. load/store instructions }
  229. shifterop_reset(so);
  230. so.shiftmode:=ref.shiftmode;
  231. so.shiftimm:=ref.shiftimm;
  232. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  233. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  234. { possibly still an invalid offset -> fall through }
  235. end
  236. else if ref.offset<>0 then
  237. begin
  238. if (preferred_newbasereg=NR_NO) or
  239. { we keep ref.index, so it must not be overwritten }
  240. (ref.index=preferred_newbasereg) then
  241. preferred_newbasereg:=getaddressregister(list);
  242. { add to the base and not to the index, because the index
  243. may be scaled; this works even if the base is SP }
  244. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  245. ref.offset:=0;
  246. ref.base:=preferred_newbasereg;
  247. { finished }
  248. exit;
  249. end
  250. else
  251. { valid -> exit }
  252. exit;
  253. end;
  254. { todo }
  255. A_LD1,A_LD2,A_LD3,A_LD4,
  256. A_ST1,A_ST2,A_ST3,A_ST4:
  257. internalerror(2014110704);
  258. { these don't support base+index }
  259. A_LDUR,A_STUR,
  260. A_LDP,A_STP:
  261. begin
  262. { these either don't support pre-/post-indexing, or don't
  263. support it with base+index }
  264. if ref.addressmode<>AM_OFFSET then
  265. internalerror(2014110911);
  266. if preferred_newbasereg=NR_NO then
  267. preferred_newbasereg:=getaddressregister(list);
  268. if ref.shiftmode<>SM_None then
  269. begin
  270. { "add" supports a superset of the shift modes supported by
  271. load/store instructions }
  272. shifterop_reset(so);
  273. so.shiftmode:=ref.shiftmode;
  274. so.shiftimm:=ref.shiftimm;
  275. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  276. end
  277. else
  278. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  279. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  280. { fall through to the handling of base + offset, since the
  281. offset may still be too big }
  282. end;
  283. else
  284. internalerror(2014110901);
  285. end;
  286. end;
  287. { base + offset }
  288. if ref.base<>NR_NO then
  289. begin
  290. { valid offset for LDUR/STUR -> use that }
  291. if (ref.addressmode=AM_OFFSET) and
  292. (op in [A_LDR,A_STR]) and
  293. (ref.offset>=-256) and
  294. (ref.offset<=255) then
  295. begin
  296. if op=A_LDR then
  297. op:=A_LDUR
  298. else
  299. op:=A_STUR
  300. end
  301. { if it's not a valid LDUR/STUR, use LDR/STR }
  302. else if (op in [A_LDUR,A_STUR]) and
  303. ((ref.offset<-256) or
  304. (ref.offset>255) or
  305. (ref.addressmode<>AM_OFFSET)) then
  306. begin
  307. if op=A_LDUR then
  308. op:=A_LDR
  309. else
  310. op:=A_STR
  311. end;
  312. case op of
  313. A_LDR,A_STR:
  314. begin
  315. case ref.addressmode of
  316. AM_PREINDEXED:
  317. begin
  318. { since the loaded/stored register cannot be the same
  319. as the base register, we can safely add the
  320. offset to the base if it doesn't fit}
  321. if (ref.offset<-256) or
  322. (ref.offset>255) then
  323. begin
  324. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  325. ref.offset:=0;
  326. end;
  327. end;
  328. AM_POSTINDEXED:
  329. begin
  330. { cannot emulate post-indexing if we have to fold the
  331. offset into the base register }
  332. if (ref.offset<-256) or
  333. (ref.offset>255) then
  334. internalerror(2014110909);
  335. { ok }
  336. end;
  337. AM_OFFSET:
  338. begin
  339. { unsupported offset -> fold into base register }
  340. accesssize:=1 shl tcgsizep2size[size];
  341. if (ref.offset<0) or
  342. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  343. ((ref.offset mod accesssize)<>0) then
  344. begin
  345. if preferred_newbasereg=NR_NO then
  346. preferred_newbasereg:=getaddressregister(list);
  347. { can we split the offset beween an
  348. "add/sub (imm12 shl 12)" and the load (also an
  349. imm12)?
  350. -- the offset from the load will always be added,
  351. that's why the lower bound has a smaller range
  352. than the upper bound; it must also be a multiple
  353. of the access size }
  354. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  355. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  356. ((ref.offset mod accesssize)=0) then
  357. begin
  358. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  359. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  360. end
  361. else
  362. begin
  363. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  364. ref.offset:=0;
  365. end;
  366. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  367. end;
  368. end
  369. else
  370. internalerror(2014110904);
  371. end;
  372. end;
  373. A_LDP,A_STP:
  374. begin
  375. { unsupported offset -> fold into base register (these
  376. instructions support all addressmodes) }
  377. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  378. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  379. begin
  380. case ref.addressmode of
  381. AM_POSTINDEXED:
  382. { don't emulate post-indexing if we have to fold the
  383. offset into the base register }
  384. internalerror(2014110910);
  385. AM_PREINDEXED:
  386. { this means the offset must be added to the current
  387. base register }
  388. preferred_newbasereg:=ref.base;
  389. AM_OFFSET:
  390. if preferred_newbasereg=NR_NO then
  391. preferred_newbasereg:=getaddressregister(list);
  392. end;
  393. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  394. reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
  395. end
  396. end;
  397. A_LDUR,A_STUR:
  398. begin
  399. { valid, checked above }
  400. end;
  401. { todo }
  402. A_LD1,A_LD2,A_LD3,A_LD4,
  403. A_ST1,A_ST2,A_ST3,A_ST4:
  404. internalerror(2014110908);
  405. else
  406. internalerror(2014110708);
  407. end;
  408. { done }
  409. exit;
  410. end;
  411. { only an offset -> change to base (+ offset 0) }
  412. if preferred_newbasereg=NR_NO then
  413. preferred_newbasereg:=getaddressregister(list);
  414. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  415. reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
  416. end;
  417. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  418. var
  419. subreg:Tsubregister;
  420. begin
  421. subreg:=cgsize2subreg(getregtype(reg),size);
  422. result:=reg;
  423. setsubreg(result,subreg);
  424. end;
  425. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  426. begin
  427. internalerror(2014122110);
  428. { squash warning }
  429. result:=NR_NO;
  430. end;
  431. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  432. begin
  433. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  434. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  435. result:=ref;
  436. end;
  437. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  438. var
  439. instr: taicpu;
  440. so: tshifterop;
  441. hadtmpreg: boolean;
  442. begin
  443. { imm12 }
  444. if (a>=0) and
  445. (a<=((1 shl 12)-1)) then
  446. if usedest then
  447. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  448. else
  449. instr:=taicpu.op_reg_const(op,src,a)
  450. { imm12 lsl 12 }
  451. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  452. begin
  453. so.shiftmode:=SM_LSL;
  454. so.shiftimm:=12;
  455. if usedest then
  456. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  457. else
  458. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  459. end
  460. else
  461. begin
  462. { todo: other possible optimizations (e.g. load 16 bit constant in
  463. register and then add/sub/cmp/cmn shifted the rest) }
  464. if tmpreg=NR_NO then
  465. begin
  466. hadtmpreg:=false;
  467. tmpreg:=getintregister(list,size);
  468. end
  469. else
  470. begin
  471. hadtmpreg:=true;
  472. getcpuregister(list,tmpreg);
  473. end;
  474. a_load_const_reg(list,size,a,tmpreg);
  475. if usedest then
  476. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  477. else
  478. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  479. if hadtmpreg then
  480. ungetcpuregister(list,tmpreg);
  481. end;
  482. if setflags then
  483. setoppostfix(instr,PF_S);
  484. list.concat(instr);
  485. end;
  486. {****************************************************************************
  487. Assembler code
  488. ****************************************************************************}
  489. procedure tcgaarch64.init_register_allocators;
  490. begin
  491. inherited init_register_allocators;
  492. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  493. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  494. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  495. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  496. { maybe we can enable this in the future for leaf functions (it's
  497. the frame pointer)
  498. ,RS_X29 }],
  499. first_int_imreg,[]);
  500. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  501. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  502. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  503. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  504. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  505. first_mm_imreg,[]);
  506. end;
  507. procedure tcgaarch64.done_register_allocators;
  508. begin
  509. rg[R_INTREGISTER].free;
  510. rg[R_FPUREGISTER].free;
  511. rg[R_MMREGISTER].free;
  512. inherited done_register_allocators;
  513. end;
  514. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  515. begin
  516. case size of
  517. OS_F32:
  518. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  519. OS_F64:
  520. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  521. else
  522. internalerror(2014102701);
  523. end;
  524. end;
  525. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  526. begin
  527. if not weak then
  528. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
  529. else
  530. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
  531. end;
  532. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  533. begin
  534. list.concat(taicpu.op_reg(A_BLR,reg));
  535. end;
  536. {********************** load instructions ********************}
  537. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  538. var
  539. preva: tcgint;
  540. opc: tasmop;
  541. shift,maxshift: byte;
  542. so: tshifterop;
  543. reginited: boolean;
  544. mask: tcgint;
  545. begin
  546. { if we load a value into a 32 bit register, it is automatically
  547. zero-extended to 64 bit }
  548. if (high(a)=0) and
  549. (size in [OS_64,OS_S64]) then
  550. begin
  551. size:=OS_32;
  552. reg:=makeregsize(reg,size);
  553. end;
  554. { values <= 32 bit are stored in a 32 bit register }
  555. if not(size in [OS_64,OS_S64]) then
  556. a:=cardinal(a);
  557. if size in [OS_64,OS_S64] then
  558. begin
  559. mask:=-1;
  560. maxshift:=64;
  561. end
  562. else
  563. begin
  564. mask:=$ffffffff;
  565. maxshift:=32;
  566. end;
  567. { single movn enough? (to be extended) }
  568. shift:=16;
  569. preva:=a;
  570. repeat
  571. if (a shr shift)=(mask shr shift) then
  572. begin
  573. if shift=16 then
  574. list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
  575. else
  576. begin
  577. shifterop_reset(so);
  578. so.shiftmode:=SM_LSL;
  579. so.shiftimm:=shift-16;
  580. list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
  581. end;
  582. exit;
  583. end;
  584. { only try the next 16 bits if the current one is all 1 bits, since
  585. the movn will set all lower bits to 1 }
  586. if word(a shr (shift-16))<>$ffff then
  587. break;
  588. inc(shift,16);
  589. until shift=maxshift;
  590. reginited:=false;
  591. shift:=0;
  592. { can be optimized later to use more movn }
  593. repeat
  594. { leftover is shifterconst? (don't check if we can represent it just
  595. as effectively with movz/movk, as this check is expensive) }
  596. if ((shift<tcgsize2size[size]*(8 div 2)) and
  597. (word(a)<>0) and
  598. ((a shr 16)<>0)) and
  599. is_shifter_const(a shl shift,size) then
  600. begin
  601. if reginited then
  602. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  603. else
  604. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  605. exit;
  606. end;
  607. { set all 16 bit parts <> 0 }
  608. if (word(a)<>0) or
  609. ((shift=0) and
  610. (a=0)) then
  611. if shift=0 then
  612. begin
  613. list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
  614. reginited:=true;
  615. end
  616. else
  617. begin
  618. shifterop_reset(so);
  619. so.shiftmode:=SM_LSL;
  620. so.shiftimm:=shift;
  621. if not reginited then
  622. begin
  623. opc:=A_MOVZ;
  624. reginited:=true;
  625. end
  626. else
  627. opc:=A_MOVK;
  628. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  629. end;
  630. preva:=a;
  631. a:=a shr 16;
  632. inc(shift,16);
  633. until word(preva)=preva;
  634. if not reginited then
  635. internalerror(2014102702);
  636. end;
  637. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  638. var
  639. reg: tregister;
  640. begin
  641. { use the zero register if possible }
  642. if a=0 then
  643. begin
  644. if size in [OS_64,OS_S64] then
  645. reg:=NR_XZR
  646. else
  647. reg:=NR_WZR;
  648. a_load_reg_ref(list,size,size,reg,ref);
  649. end
  650. else
  651. inherited;
  652. end;
  653. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  654. var
  655. oppostfix:toppostfix;
  656. hreg: tregister;
  657. begin
  658. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  659. fromsize:=tosize
  660. { have a 32 bit register but need a 64 bit one? }
  661. else if tosize in [OS_64,OS_S64] then
  662. begin
  663. { sign extend if necessary }
  664. if fromsize in [OS_S8,OS_S16,OS_S32] then
  665. begin
  666. { can't overwrite reg, may be a constant reg }
  667. hreg:=getintregister(list,tosize);
  668. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  669. reg:=hreg;
  670. end
  671. else
  672. { top 32 bit are zero by default }
  673. reg:=makeregsize(reg,OS_64);
  674. fromsize:=tosize;
  675. end;
  676. if (ref.alignment<>0) and
  677. (ref.alignment<tcgsize2size[tosize]) then
  678. begin
  679. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  680. end
  681. else
  682. begin
  683. case tosize of
  684. { signed integer registers }
  685. OS_8,
  686. OS_S8:
  687. oppostfix:=PF_B;
  688. OS_16,
  689. OS_S16:
  690. oppostfix:=PF_H;
  691. OS_32,
  692. OS_S32,
  693. OS_64,
  694. OS_S64:
  695. oppostfix:=PF_None;
  696. else
  697. InternalError(200308299);
  698. end;
  699. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  700. end;
  701. end;
  702. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  703. var
  704. oppostfix:toppostfix;
  705. begin
  706. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  707. fromsize:=tosize;
  708. { ensure that all bits of the 32/64 register are always correctly set:
  709. * default behaviour is always to zero-extend to the entire (64 bit)
  710. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  711. target register, as the upper 32 bit will be zeroed implicitly
  712. -> always make target register 32 bit
  713. * signed loads exist both with 32 and 64 bit target registers,
  714. depending on whether the value should be sign extended to 32 or
  715. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  716. corresponding 64 bit register are again zeroed) -> no need to
  717. change anything (we only have 32 and 64 bit registers), except that
  718. when loading an OS_S32 to a 32 bit register, we don't need/can't
  719. use sign extension
  720. }
  721. if fromsize in [OS_8,OS_16,OS_32] then
  722. reg:=makeregsize(reg,OS_32);
  723. if (ref.alignment<>0) and
  724. (ref.alignment<tcgsize2size[fromsize]) then
  725. begin
  726. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  727. exit;
  728. end;
  729. case fromsize of
  730. { signed integer registers }
  731. OS_8:
  732. oppostfix:=PF_B;
  733. OS_S8:
  734. oppostfix:=PF_SB;
  735. OS_16:
  736. oppostfix:=PF_H;
  737. OS_S16:
  738. oppostfix:=PF_SH;
  739. OS_S32:
  740. if getsubreg(reg)=R_SUBD then
  741. oppostfix:=PF_NONE
  742. else
  743. oppostfix:=PF_SW;
  744. OS_32,
  745. OS_64,
  746. OS_S64:
  747. oppostfix:=PF_None;
  748. else
  749. InternalError(200308297);
  750. end;
  751. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  752. { clear upper 16 bits if the value was negative }
  753. if (fromsize=OS_S8) and (tosize=OS_16) then
  754. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  755. end;
  756. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  757. var
  758. href: treference;
  759. hreg1, hreg2, tmpreg: tregister;
  760. begin
  761. if fromsize in [OS_64,OS_S64] then
  762. begin
  763. { split into two 32 bit loads }
  764. hreg1:=makeregsize(register,OS_32);
  765. hreg2:=getintregister(list,OS_32);
  766. if target_info.endian=endian_big then
  767. begin
  768. tmpreg:=hreg1;
  769. hreg1:=hreg2;
  770. hreg2:=tmpreg;
  771. end;
  772. { can we use LDP? }
  773. if (ref.alignment=4) and
  774. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  775. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  776. else
  777. begin
  778. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  779. href:=ref;
  780. inc(href.offset,4);
  781. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  782. end;
  783. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  784. end
  785. else
  786. inherited;
  787. end;
  788. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  789. var
  790. instr: taicpu;
  791. begin
  792. { we use both 32 and 64 bit registers -> insert conversion when when
  793. we have to truncate/sign extend inside the (32 or 64 bit) register
  794. holding the value, and when we sign extend from a 32 to a 64 bit
  795. register }
  796. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  797. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  798. (fromsize<>tosize) and
  799. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  800. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  801. (tosize in [OS_64,OS_S64])) or
  802. { needs to mask out the sign in the top 16 bits }
  803. ((fromsize=OS_S8) and
  804. (tosize=OS_16)) then
  805. begin
  806. case tosize of
  807. OS_8:
  808. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  809. OS_16:
  810. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  811. OS_S8:
  812. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  813. OS_S16:
  814. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  815. { while "mov wN, wM" automatically inserts a zero-extension and
  816. hence we could encode a 64->32 bit move like that, the problem
  817. is that we then can't distinguish 64->32 from 32->32 moves, and
  818. the 64->32 truncation could be removed altogether... So use a
  819. different instruction }
  820. OS_32,
  821. OS_S32:
  822. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  823. but because of the way location_force_register() tries to
  824. avoid superfluous zero/sign extensions, it's not always the
  825. case -> also force reg1 to to 64 bit }
  826. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  827. OS_64,
  828. OS_S64:
  829. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
  830. else
  831. internalerror(2002090901);
  832. end;
  833. end
  834. else
  835. begin
  836. { 32 -> 32 bit move implies zero extension (sign extensions have
  837. been handled above) -> also use for 32 <-> 64 bit moves }
  838. if not(fromsize in [OS_64,OS_S64]) or
  839. not(tosize in [OS_64,OS_S64]) then
  840. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  841. else
  842. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  843. list.Concat(instr);
  844. { Notify the register allocator that we have written a move instruction so
  845. it can try to eliminate it. }
  846. add_move_instruction(instr);
  847. end;
  848. end;
  849. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  850. var
  851. href: treference;
  852. so: tshifterop;
  853. op: tasmop;
  854. begin
  855. op:=A_LDR;
  856. href:=ref;
  857. { simplify as if we're going to perform a regular 64 bit load, using
  858. "r" as the new base register if possible/necessary }
  859. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  860. { load literal? }
  861. if assigned(href.symbol) then
  862. begin
  863. if (href.base<>NR_NO) or
  864. (href.index<>NR_NO) or
  865. not assigned(href.symboldata) then
  866. internalerror(2014110912);
  867. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  868. end
  869. else
  870. begin
  871. if href.index<>NR_NO then
  872. begin
  873. if href.shiftmode<>SM_None then
  874. begin
  875. { "add" supports a supperset of the shift modes supported by
  876. load/store instructions }
  877. shifterop_reset(so);
  878. so.shiftmode:=href.shiftmode;
  879. so.shiftimm:=href.shiftimm;
  880. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  881. end
  882. else
  883. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  884. end
  885. else if href.offset<>0 then
  886. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  887. else
  888. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  889. end;
  890. end;
  891. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  892. begin
  893. internalerror(2014122107)
  894. end;
  895. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  896. begin
  897. internalerror(2014122108)
  898. end;
  899. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  900. begin
  901. internalerror(2014122109)
  902. end;
  903. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  904. var
  905. instr: taicpu;
  906. begin
  907. if assigned(shuffle) and
  908. not shufflescalar(shuffle) then
  909. internalerror(2014122104);
  910. if fromsize=tosize then
  911. begin
  912. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  913. { Notify the register allocator that we have written a move
  914. instruction so it can try to eliminate it. }
  915. add_move_instruction(instr);
  916. end
  917. else
  918. begin
  919. if (reg_cgsize(reg1)<>fromsize) or
  920. (reg_cgsize(reg2)<>tosize) then
  921. internalerror(2014110913);
  922. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  923. end;
  924. list.Concat(instr);
  925. end;
  926. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  927. var
  928. tmpreg: tregister;
  929. begin
  930. if assigned(shuffle) and
  931. not shufflescalar(shuffle) then
  932. internalerror(2014122105);
  933. tmpreg:=NR_NO;
  934. if (fromsize<>tosize) then
  935. begin
  936. tmpreg:=reg;
  937. reg:=getmmregister(list,fromsize);
  938. end;
  939. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  940. if (fromsize<>tosize) then
  941. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  942. end;
  943. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  944. var
  945. tmpreg: tregister;
  946. begin
  947. if assigned(shuffle) and
  948. not shufflescalar(shuffle) then
  949. internalerror(2014122106);
  950. if (fromsize<>tosize) then
  951. begin
  952. tmpreg:=getmmregister(list,tosize);
  953. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  954. reg:=tmpreg;
  955. end;
  956. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  957. end;
  958. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  959. begin
  960. if not shufflescalar(shuffle) then
  961. internalerror(2014122801);
  962. if not(tcgsize2size[fromsize] in [4,8]) or
  963. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  964. internalerror(2014122803);
  965. list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
  966. end;
  967. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  968. begin
  969. if not shufflescalar(shuffle) then
  970. internalerror(2014122802);
  971. if not(tcgsize2size[fromsize] in [4,8]) or
  972. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  973. internalerror(2014122804);
  974. list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
  975. end;
  976. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  977. begin
  978. case op of
  979. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  980. OP_XOR:
  981. begin
  982. if (src<>dst) or
  983. (reg_cgsize(src)<>size) or
  984. assigned(shuffle) then
  985. internalerror(2015011401);
  986. case size of
  987. OS_F32,
  988. OS_F64:
  989. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  990. else
  991. internalerror(2015011402);
  992. end;
  993. end
  994. else
  995. internalerror(2015011403);
  996. end;
  997. end;
  998. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  999. var
  1000. bitsize,
  1001. signbit: longint;
  1002. begin
  1003. if srcsize in [OS_64,OS_S64] then
  1004. begin
  1005. bitsize:=64;
  1006. signbit:=6;
  1007. end
  1008. else
  1009. begin
  1010. bitsize:=32;
  1011. signbit:=5;
  1012. end;
  1013. { source is 0 -> dst will have to become 255 }
  1014. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1015. if reverse then
  1016. begin
  1017. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1018. { xor 31/63 is the same as setting the lower 5/6 bits to
  1019. "31/63-(lower 5/6 bits of dst)" }
  1020. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1021. end
  1022. else
  1023. begin
  1024. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1025. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1026. end;
  1027. { set dst to -1 if src was 0 }
  1028. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1029. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1030. branch-free version? All of mine are 3...) }
  1031. list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
  1032. end;
  1033. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1034. var
  1035. href: treference;
  1036. hreg1, hreg2, tmpreg: tregister;
  1037. begin
  1038. if fromsize in [OS_64,OS_S64] then
  1039. begin
  1040. { split into two 32 bit stores }
  1041. hreg1:=makeregsize(register,OS_32);
  1042. hreg2:=getintregister(list,OS_32);
  1043. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1044. if target_info.endian=endian_big then
  1045. begin
  1046. tmpreg:=hreg1;
  1047. hreg1:=hreg2;
  1048. hreg2:=tmpreg;
  1049. end;
  1050. { can we use STP? }
  1051. if (ref.alignment=4) and
  1052. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1053. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1054. else
  1055. begin
  1056. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1057. href:=ref;
  1058. inc(href.offset,4);
  1059. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1060. end;
  1061. end
  1062. else
  1063. inherited;
  1064. end;
  1065. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1066. const
  1067. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1068. begin
  1069. if (op in overflowops) and
  1070. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1071. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1072. end;
  1073. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1074. begin
  1075. optimize_op_const(size,op,a);
  1076. case op of
  1077. OP_NONE:
  1078. exit;
  1079. OP_MOVE:
  1080. a_load_const_reg(list,size,a,reg);
  1081. OP_NEG,OP_NOT:
  1082. internalerror(200306011);
  1083. else
  1084. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1085. end;
  1086. end;
  1087. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1088. begin
  1089. Case op of
  1090. OP_NEG,
  1091. OP_NOT:
  1092. begin
  1093. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1094. maybeadjustresult(list,op,size,dst);
  1095. end
  1096. else
  1097. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1098. end;
  1099. end;
  1100. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1101. var
  1102. l: tlocation;
  1103. begin
  1104. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1105. end;
  1106. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1107. var
  1108. hreg: tregister;
  1109. begin
  1110. { no ROLV opcode... }
  1111. if op=OP_ROL then
  1112. begin
  1113. case size of
  1114. OS_32,OS_S32,
  1115. OS_64,OS_S64:
  1116. begin
  1117. hreg:=getintregister(list,size);
  1118. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1119. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1120. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1121. exit;
  1122. end;
  1123. else
  1124. internalerror(2014111005);
  1125. end;
  1126. end
  1127. else if (op=OP_ROR) and
  1128. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1129. internalerror(2014111006);
  1130. if TOpCG2AsmOpReg[op]=A_NONE then
  1131. internalerror(2014111007);
  1132. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1133. maybeadjustresult(list,op,size,dst);
  1134. end;
  1135. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1136. var
  1137. shiftcountmask: longint;
  1138. constreg: tregister;
  1139. begin
  1140. { add/sub instructions have only positive immediate operands }
  1141. if (op in [OP_ADD,OP_SUB]) and
  1142. (a<0) then
  1143. begin
  1144. if op=OP_ADD then
  1145. op:=op_SUB
  1146. else
  1147. op:=OP_ADD;
  1148. { avoid range/overflow error in case a = low(tcgint) }
  1149. {$push}{$r-}{$q-}
  1150. a:=-a;
  1151. {$pop}
  1152. end;
  1153. ovloc.loc:=LOC_VOID;
  1154. optimize_op_const(size,op,a);
  1155. case op of
  1156. OP_NONE:
  1157. begin
  1158. a_load_reg_reg(list,size,size,src,dst);
  1159. exit;
  1160. end;
  1161. OP_MOVE:
  1162. begin
  1163. a_load_const_reg(list,size,a,dst);
  1164. exit;
  1165. end;
  1166. end;
  1167. case op of
  1168. OP_ADD,
  1169. OP_SUB:
  1170. begin
  1171. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1172. { on a 64 bit target, overflows with smaller data types
  1173. are handled via range errors }
  1174. if setflags and
  1175. (size in [OS_64,OS_S64]) then
  1176. begin
  1177. location_reset(ovloc,LOC_FLAGS,OS_8);
  1178. if size=OS_64 then
  1179. if op=OP_ADD then
  1180. ovloc.resflags:=F_CS
  1181. else
  1182. ovloc.resflags:=F_CC
  1183. else
  1184. ovloc.resflags:=F_VS;
  1185. end;
  1186. end;
  1187. OP_OR,
  1188. OP_AND,
  1189. OP_XOR:
  1190. begin
  1191. if not(size in [OS_64,OS_S64]) then
  1192. a:=cardinal(a);
  1193. if is_shifter_const(a,size) then
  1194. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1195. else
  1196. begin
  1197. constreg:=getintregister(list,size);
  1198. a_load_const_reg(list,size,a,constreg);
  1199. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1200. end;
  1201. end;
  1202. OP_SHL,
  1203. OP_SHR,
  1204. OP_SAR:
  1205. begin
  1206. if size in [OS_64,OS_S64] then
  1207. shiftcountmask:=63
  1208. else
  1209. shiftcountmask:=31;
  1210. if (a and shiftcountmask)<>0 Then
  1211. list.concat(taicpu.op_reg_reg_const(
  1212. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1213. else
  1214. a_load_reg_reg(list,size,size,src,dst);
  1215. if (a and not(tcgint(shiftcountmask)))<>0 then
  1216. internalError(2014112101);
  1217. end;
  1218. OP_ROL,
  1219. OP_ROR:
  1220. begin
  1221. case size of
  1222. OS_32,OS_S32:
  1223. if (a and not(tcgint(31)))<>0 then
  1224. internalError(2014112102);
  1225. OS_64,OS_S64:
  1226. if (a and not(tcgint(63)))<>0 then
  1227. internalError(2014112103);
  1228. else
  1229. internalError(2014112104);
  1230. end;
  1231. { there's only a ror opcode }
  1232. if op=OP_ROL then
  1233. a:=(tcgsize2size[size]*8)-a;
  1234. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1235. end;
  1236. OP_MUL,
  1237. OP_IMUL,
  1238. OP_DIV,
  1239. OP_IDIV:
  1240. begin
  1241. constreg:=getintregister(list,size);
  1242. a_load_const_reg(list,size,a,constreg);
  1243. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1244. end;
  1245. else
  1246. internalerror(2014111403);
  1247. end;
  1248. maybeadjustresult(list,op,size,dst);
  1249. end;
  1250. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1251. var
  1252. tmpreg1: tregister;
  1253. begin
  1254. ovloc.loc:=LOC_VOID;
  1255. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1256. if setflags and
  1257. (size in [OS_64,OS_S64]) then
  1258. begin
  1259. case op of
  1260. OP_ADD,
  1261. OP_SUB:
  1262. begin
  1263. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1264. ovloc.loc:=LOC_FLAGS;
  1265. if size=OS_64 then
  1266. if op=OP_ADD then
  1267. ovloc.resflags:=F_CS
  1268. else
  1269. ovloc.resflags:=F_CC
  1270. else
  1271. ovloc.resflags:=F_VS;
  1272. { finished; since we won't call through to a_op_reg_reg_reg,
  1273. adjust the result here if necessary }
  1274. maybeadjustresult(list,op,size,dst);
  1275. exit;
  1276. end;
  1277. OP_MUL:
  1278. begin
  1279. { check whether the upper 64 bit of the 128 bit product is 0 }
  1280. tmpreg1:=getintregister(list,OS_64);
  1281. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1282. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1283. ovloc.loc:=LOC_FLAGS;
  1284. ovloc.resflags:=F_NE;
  1285. { still have to perform the actual multiplication }
  1286. end;
  1287. OP_IMUL:
  1288. begin
  1289. { check whether the sign bit of the (128 bit) result is the
  1290. same as "sign bit of src1" xor "signbit of src2" (if so, no
  1291. overflow and the xor-product of all sign bits is 0) }
  1292. tmpreg1:=getintregister(list,OS_64);
  1293. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1294. list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
  1295. list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
  1296. list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
  1297. ovloc.loc:=LOC_FLAGS;
  1298. ovloc.resflags:=F_NE;
  1299. { still have to perform the actual multiplication }
  1300. end;
  1301. OP_IDIV,
  1302. OP_DIV:
  1303. begin
  1304. { not handled here, needs div-by-zero check (dividing by zero
  1305. just gives a 0 result on aarch64), and low(int64) div -1
  1306. check for overflow) }
  1307. internalerror(2014122101);
  1308. end;
  1309. end;
  1310. end;
  1311. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1312. end;
  1313. {*************** compare instructructions ****************}
  1314. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1315. var
  1316. op: tasmop;
  1317. begin
  1318. if a>=0 then
  1319. op:=A_CMP
  1320. else
  1321. op:=A_CMN;
  1322. { avoid range/overflow error in case a=low(tcgint) }
  1323. {$push}{$r-}{$q-}
  1324. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1325. {$pop}
  1326. a_jmp_cond(list,cmp_op,l);
  1327. end;
  1328. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1329. begin
  1330. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1331. a_jmp_cond(list,cmp_op,l);
  1332. end;
  1333. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1334. var
  1335. ai: taicpu;
  1336. begin
  1337. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
  1338. ai.is_jmp:=true;
  1339. list.Concat(ai);
  1340. end;
  1341. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1342. var
  1343. ai: taicpu;
  1344. begin
  1345. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
  1346. ai.is_jmp:=true;
  1347. list.Concat(ai);
  1348. end;
  1349. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1350. var
  1351. ai: taicpu;
  1352. begin
  1353. ai:=TAiCpu.op_sym(A_B,l);
  1354. ai.is_jmp:=true;
  1355. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1356. list.Concat(ai);
  1357. end;
  1358. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1359. var
  1360. ai : taicpu;
  1361. begin
  1362. ai:=Taicpu.op_sym(A_B,l);
  1363. ai.is_jmp:=true;
  1364. ai.SetCondition(flags_to_cond(f));
  1365. list.Concat(ai);
  1366. end;
  1367. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1368. begin
  1369. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1370. end;
  1371. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1372. begin
  1373. { we need an explicit overflow location, because there are many
  1374. possibilities (not just the overflow flag, which is only used for
  1375. signed add/sub) }
  1376. internalerror(2014112303);
  1377. end;
  1378. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1379. var
  1380. hl : tasmlabel;
  1381. hflags : tresflags;
  1382. begin
  1383. if not(cs_check_overflow in current_settings.localswitches) then
  1384. exit;
  1385. current_asmdata.getjumplabel(hl);
  1386. case ovloc.loc of
  1387. LOC_FLAGS:
  1388. begin
  1389. hflags:=ovloc.resflags;
  1390. inverse_flags(hflags);
  1391. cg.a_jmp_flags(list,hflags,hl);
  1392. end;
  1393. else
  1394. internalerror(2014112304);
  1395. end;
  1396. a_call_name(list,'FPC_OVERFLOW',false);
  1397. a_label(list,hl);
  1398. end;
  1399. { *********** entry/exit code and address loading ************ }
  1400. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1401. var
  1402. ref: treference;
  1403. sr: tsuperregister;
  1404. pairreg: tregister;
  1405. begin
  1406. result:=0;
  1407. reference_reset_base(ref,NR_SP,-16,16);
  1408. ref.addressmode:=AM_PREINDEXED;
  1409. pairreg:=NR_NO;
  1410. { store all used registers pairwise }
  1411. for sr:=lowsr to highsr do
  1412. if sr in rg[rt].used_in_proc then
  1413. if pairreg=NR_NO then
  1414. pairreg:=newreg(rt,sr,sub)
  1415. else
  1416. begin
  1417. inc(result,16);
  1418. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1419. pairreg:=NR_NO
  1420. end;
  1421. { one left -> store twice (stack must be 16 bytes aligned) }
  1422. if pairreg<>NR_NO then
  1423. begin
  1424. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1425. inc(result,16);
  1426. end;
  1427. end;
  1428. procedure FixupOffsets(p:TObject;arg:pointer);
  1429. var
  1430. sym: tabstractnormalvarsym absolute p;
  1431. begin
  1432. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1433. (sym.localloc.loc=LOC_REFERENCE) and
  1434. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1435. begin
  1436. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1437. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1438. end;
  1439. end;
  1440. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1441. var
  1442. ref: treference;
  1443. totalstackframesize: longint;
  1444. begin
  1445. if nostackframe then
  1446. exit;
  1447. { stack pointer has to be aligned to 16 bytes at all times }
  1448. localsize:=align(localsize,16);
  1449. { save stack pointer and return address }
  1450. reference_reset_base(ref,NR_SP,-16,16);
  1451. ref.addressmode:=AM_PREINDEXED;
  1452. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1453. { initialise frame pointer }
  1454. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1455. totalstackframesize:=localsize;
  1456. { save modified integer registers }
  1457. inc(totalstackframesize,
  1458. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1459. { only the lower 64 bits of the modified vector registers need to be
  1460. saved; if the caller needs the upper 64 bits, it has to save them
  1461. itself }
  1462. inc(totalstackframesize,
  1463. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1464. { allocate stack space }
  1465. if localsize<>0 then
  1466. begin
  1467. localsize:=align(localsize,16);
  1468. current_procinfo.final_localsize:=localsize;
  1469. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1470. end;
  1471. { By default, we use the frame pointer to access parameters passed via
  1472. the stack and the stack pointer to address local variables and temps
  1473. because
  1474. a) we can use bigger positive than negative offsets (so accessing
  1475. locals via negative offsets from the frame pointer would be less
  1476. efficient)
  1477. b) we don't know the local size while generating the code, so
  1478. accessing the parameters via the stack pointer is not possible
  1479. without copying them
  1480. The problem with this is the get_frame() intrinsic:
  1481. a) it must return the same value as what we pass as parentfp
  1482. parameter, since that's how it's used in the TP-style objects unit
  1483. b) its return value must usable to access all local data from a
  1484. routine (locals and parameters), since it's all the nested
  1485. routines have access to
  1486. c) its return value must be usable to construct a backtrace, as it's
  1487. also used by the exception handling routines
  1488. The solution we use here, based on something similar that's done in
  1489. the MIPS port, is to generate all accesses to locals in the routine
  1490. itself SP-relative, and then after the code is generated and the local
  1491. size is known (namely, here), we change all SP-relative variables/
  1492. parameters into FP-relative ones. This means that they'll be accessed
  1493. less efficiently from nested routines, but those accesses are indirect
  1494. anyway and at least this way they can be accessed at all
  1495. }
  1496. if current_procinfo.has_nestedprocs then
  1497. begin
  1498. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1499. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1500. end;
  1501. end;
  1502. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1503. begin
  1504. { nothing to do on Darwin; check on ELF targets }
  1505. if not(target_info.system in systems_darwin) then
  1506. internalerror(2014112601);
  1507. end;
  1508. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1509. begin
  1510. { done in g_proc_exit }
  1511. end;
  1512. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1513. var
  1514. ref: treference;
  1515. sr, highestsetsr: tsuperregister;
  1516. pairreg: tregister;
  1517. regcount: longint;
  1518. begin
  1519. reference_reset_base(ref,NR_SP,16,16);
  1520. ref.addressmode:=AM_POSTINDEXED;
  1521. { highest reg stored twice? }
  1522. regcount:=0;
  1523. highestsetsr:=RS_NO;
  1524. for sr:=lowsr to highsr do
  1525. if sr in rg[rt].used_in_proc then
  1526. begin
  1527. inc(regcount);
  1528. highestsetsr:=sr;
  1529. end;
  1530. if odd(regcount) then
  1531. begin
  1532. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1533. highestsetsr:=pred(highestsetsr);
  1534. end;
  1535. { load all (other) used registers pairwise }
  1536. pairreg:=NR_NO;
  1537. for sr:=highestsetsr downto lowsr do
  1538. if sr in rg[rt].used_in_proc then
  1539. if pairreg=NR_NO then
  1540. pairreg:=newreg(rt,sr,sub)
  1541. else
  1542. begin
  1543. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1544. pairreg:=NR_NO
  1545. end;
  1546. { There can't be any register left }
  1547. if pairreg<>NR_NO then
  1548. internalerror(2014112602);
  1549. end;
  1550. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1551. var
  1552. ref: treference;
  1553. regsstored: boolean;
  1554. sr: tsuperregister;
  1555. begin
  1556. if not nostackframe then
  1557. begin
  1558. { if no registers have been stored, we don't have to subtract the
  1559. allocated temp space from the stack pointer }
  1560. regsstored:=false;
  1561. for sr:=RS_X19 to RS_X28 do
  1562. if sr in rg[R_INTREGISTER].used_in_proc then
  1563. begin
  1564. regsstored:=true;
  1565. break;
  1566. end;
  1567. if not regsstored then
  1568. for sr:=RS_D8 to RS_D15 do
  1569. if sr in rg[R_MMREGISTER].used_in_proc then
  1570. begin
  1571. regsstored:=true;
  1572. break;
  1573. end;
  1574. { restore registers (and stack pointer) }
  1575. if regsstored then
  1576. begin
  1577. if current_procinfo.final_localsize<>0 then
  1578. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1579. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1580. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1581. end
  1582. else if current_procinfo.final_localsize<>0 then
  1583. { restore stack pointer }
  1584. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1585. { restore framepointer and return address }
  1586. reference_reset_base(ref,NR_SP,16,16);
  1587. ref.addressmode:=AM_POSTINDEXED;
  1588. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1589. end;
  1590. { return }
  1591. list.concat(taicpu.op_none(A_RET));
  1592. end;
  1593. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1594. begin
  1595. { done in g_proc_entry }
  1596. end;
  1597. { ************* concatcopy ************ }
  1598. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1599. var
  1600. paraloc1,paraloc2,paraloc3 : TCGPara;
  1601. pd : tprocdef;
  1602. begin
  1603. pd:=search_system_proc('MOVE');
  1604. paraloc1.init;
  1605. paraloc2.init;
  1606. paraloc3.init;
  1607. paramanager.getintparaloc(list,pd,1,paraloc1);
  1608. paramanager.getintparaloc(list,pd,2,paraloc2);
  1609. paramanager.getintparaloc(list,pd,3,paraloc3);
  1610. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1611. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1612. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1613. paramanager.freecgpara(list,paraloc3);
  1614. paramanager.freecgpara(list,paraloc2);
  1615. paramanager.freecgpara(list,paraloc1);
  1616. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1617. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1618. a_call_name(list,'FPC_MOVE',false);
  1619. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1620. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1621. paraloc3.done;
  1622. paraloc2.done;
  1623. paraloc1.done;
  1624. end;
  1625. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  1626. var
  1627. sourcebasereplaced, destbasereplaced: boolean;
  1628. { get optimal memory operation to use for loading/storing data
  1629. in an unrolled loop }
  1630. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  1631. begin
  1632. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  1633. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  1634. begin
  1635. memop:=unscaledop;
  1636. needsimplify:=true;
  1637. end
  1638. else if (unscaledop<>A_NONE) and
  1639. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  1640. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  1641. begin
  1642. memop:=unscaledop;
  1643. needsimplify:=false;
  1644. end
  1645. else
  1646. begin
  1647. memop:=scaledop;
  1648. needsimplify:=true;
  1649. end;
  1650. end;
  1651. { adjust the offset and/or addressing mode after a load/store so it's
  1652. correct for the next one of the same size }
  1653. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  1654. begin
  1655. case ref.addressmode of
  1656. AM_OFFSET:
  1657. inc(ref.offset,oplen);
  1658. AM_POSTINDEXED:
  1659. { base register updated by instruction, next offset can remain
  1660. the same }
  1661. ;
  1662. AM_PREINDEXED:
  1663. begin
  1664. { base register updated by instruction -> next instruction can
  1665. use post-indexing with offset = sizeof(operation) }
  1666. ref.offset:=0;
  1667. ref.addressmode:=AM_OFFSET;
  1668. end;
  1669. end;
  1670. end;
  1671. { generate a load/store and adjust the reference offset to the next
  1672. memory location if necessary }
  1673. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1674. begin
  1675. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  1676. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  1677. end;
  1678. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  1679. the next memory location if necessary }
  1680. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1681. begin
  1682. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  1683. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  1684. end;
  1685. { turn a reference into a pre- or post-indexed reference for use in a
  1686. load/store of a particular size }
  1687. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  1688. var
  1689. tmpreg: tregister;
  1690. scaledoffset: longint;
  1691. orgaddressmode: taddressmode;
  1692. begin
  1693. scaledoffset:=tcgsize2size[opsize];
  1694. if scaledop in [A_LDP,A_STP] then
  1695. scaledoffset:=scaledoffset*2;
  1696. { can we use the reference as post-indexed without changes? }
  1697. if forcepostindexing then
  1698. begin
  1699. orgaddressmode:=ref.addressmode;
  1700. ref.addressmode:=AM_POSTINDEXED;
  1701. if (orgaddressmode=AM_POSTINDEXED) or
  1702. ((ref.offset=0) and
  1703. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  1704. begin
  1705. { just change the post-indexed offset to the access size }
  1706. ref.offset:=scaledoffset;
  1707. { and replace the base register if that didn't happen yet
  1708. (could be sp or a regvar) }
  1709. if not basereplaced then
  1710. begin
  1711. tmpreg:=getaddressregister(list);
  1712. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1713. ref.base:=tmpreg;
  1714. basereplaced:=true;
  1715. end;
  1716. exit;
  1717. end;
  1718. ref.addressmode:=orgaddressmode;
  1719. end;
  1720. {$ifdef dummy}
  1721. This could in theory be useful in case you have a concatcopy from
  1722. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  1723. very unlikely. Disabled because it still needs fixes, as it
  1724. also generates pre-indexed loads right now at the very end for the
  1725. left-over gencopies
  1726. { can we turn it into a pre-indexed reference for free? (after the
  1727. first operation, it will be turned into an offset one) }
  1728. if not forcepostindexing and
  1729. (ref.offset<>0) then
  1730. begin
  1731. orgaddressmode:=ref.addressmode;
  1732. ref.addressmode:=AM_PREINDEXED;
  1733. tmpreg:=ref.base;
  1734. if not basereplaced and
  1735. (ref.base=tmpreg) then
  1736. begin
  1737. tmpreg:=getaddressregister(list);
  1738. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1739. ref.base:=tmpreg;
  1740. basereplaced:=true;
  1741. end;
  1742. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  1743. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1744. exit;
  1745. end;
  1746. {$endif dummy}
  1747. if not forcepostindexing then
  1748. begin
  1749. ref.addressmode:=AM_OFFSET;
  1750. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1751. { this may still cause problems if the final offset is no longer
  1752. a simple ref; it's a bit complicated to pass all information
  1753. through at all places and check that here, so play safe: we
  1754. currently never generate unrolled copies for more than 64
  1755. bytes (32 with non-double-register copies) }
  1756. if ref.index=NR_NO then
  1757. begin
  1758. if ((scaledop in [A_LDP,A_STP]) and
  1759. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  1760. ((scaledop in [A_LDUR,A_STUR]) and
  1761. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  1762. ((scaledop in [A_LDR,A_STR]) and
  1763. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  1764. exit;
  1765. end;
  1766. end;
  1767. tmpreg:=getaddressregister(list);
  1768. a_loadaddr_ref_reg(list,ref,tmpreg);
  1769. basereplaced:=true;
  1770. if forcepostindexing then
  1771. begin
  1772. reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
  1773. ref.addressmode:=AM_POSTINDEXED;
  1774. end
  1775. else
  1776. begin
  1777. reference_reset_base(ref,tmpreg,0,ref.alignment);
  1778. ref.addressmode:=AM_OFFSET;
  1779. end
  1780. end;
  1781. { prepare a reference for use by gencopy. This is done both after the
  1782. unrolled and regular copy loop -> get rid of post-indexing mode, make
  1783. sure ref is valid }
  1784. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  1785. var
  1786. simplify: boolean;
  1787. begin
  1788. if ref.addressmode=AM_POSTINDEXED then
  1789. ref.offset:=tcgsize2size[opsize];
  1790. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  1791. if simplify then
  1792. begin
  1793. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  1794. op:=scaledop;
  1795. end;
  1796. end;
  1797. { generate a copy from source to dest of size opsize/postfix }
  1798. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  1799. var
  1800. reg: tregister;
  1801. loadop, storeop: tasmop;
  1802. begin
  1803. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  1804. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  1805. reg:=getintregister(list,opsize);
  1806. genloadstore(list,loadop,reg,source,postfix,opsize);
  1807. genloadstore(list,storeop,reg,dest,postfix,opsize);
  1808. end;
  1809. { copy the leftovers after an unrolled or regular copy loop }
  1810. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  1811. begin
  1812. { stop post-indexing if we did so in the loop, since in that case all
  1813. offsets definitely can be represented now }
  1814. if source.addressmode=AM_POSTINDEXED then
  1815. begin
  1816. source.addressmode:=AM_OFFSET;
  1817. source.offset:=0;
  1818. end;
  1819. if dest.addressmode=AM_POSTINDEXED then
  1820. begin
  1821. dest.addressmode:=AM_OFFSET;
  1822. dest.offset:=0;
  1823. end;
  1824. { transfer the leftovers }
  1825. if len>=8 then
  1826. begin
  1827. dec(len,8);
  1828. gencopy(list,source,dest,PF_NONE,OS_64);
  1829. end;
  1830. if len>=4 then
  1831. begin
  1832. dec(len,4);
  1833. gencopy(list,source,dest,PF_NONE,OS_32);
  1834. end;
  1835. if len>=2 then
  1836. begin
  1837. dec(len,2);
  1838. gencopy(list,source,dest,PF_H,OS_16);
  1839. end;
  1840. if len>=1 then
  1841. begin
  1842. dec(len);
  1843. gencopy(list,source,dest,PF_B,OS_8);
  1844. end;
  1845. end;
  1846. const
  1847. { load_length + loop dec + cbnz }
  1848. loopoverhead=12;
  1849. { loop overhead + load + store }
  1850. totallooplen=loopoverhead + 8;
  1851. var
  1852. totalalign: longint;
  1853. maxlenunrolled: tcgint;
  1854. loadop, storeop: tasmop;
  1855. opsize: tcgsize;
  1856. postfix: toppostfix;
  1857. tmpsource, tmpdest: treference;
  1858. scaledstoreop, unscaledstoreop,
  1859. scaledloadop, unscaledloadop: tasmop;
  1860. regs: array[1..8] of tregister;
  1861. countreg: tregister;
  1862. i, regcount: longint;
  1863. hl: tasmlabel;
  1864. simplifysource, simplifydest: boolean;
  1865. begin
  1866. if len=0 then
  1867. exit;
  1868. sourcebasereplaced:=false;
  1869. destbasereplaced:=false;
  1870. { maximum common alignment }
  1871. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  1872. { use a simple load/store? }
  1873. if (len in [1,2,4,8]) and
  1874. ((totalalign>=(len div 2)) or
  1875. (source.alignment=len) or
  1876. (dest.alignment=len)) then
  1877. begin
  1878. opsize:=int_cgsize(len);
  1879. a_load_ref_ref(list,opsize,opsize,source,dest);
  1880. exit;
  1881. end;
  1882. { alignment > length is not useful, and would break some checks below }
  1883. while totalalign>len do
  1884. totalalign:=totalalign div 2;
  1885. { operation sizes to use based on common alignment }
  1886. case totalalign of
  1887. 1:
  1888. begin
  1889. postfix:=PF_B;
  1890. opsize:=OS_8;
  1891. end;
  1892. 2:
  1893. begin
  1894. postfix:=PF_H;
  1895. opsize:=OS_16;
  1896. end;
  1897. 4:
  1898. begin
  1899. postfix:=PF_None;
  1900. opsize:=OS_32;
  1901. end
  1902. else
  1903. begin
  1904. totalalign:=8;
  1905. postfix:=PF_None;
  1906. opsize:=OS_64;
  1907. end;
  1908. end;
  1909. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  1910. maxlenunrolled:=min(totalalign,8)*4;
  1911. { ldp/stp -> 2 registers per instruction }
  1912. if (totalalign>=4) and
  1913. (len>=totalalign*2) then
  1914. begin
  1915. maxlenunrolled:=maxlenunrolled*2;
  1916. scaledstoreop:=A_STP;
  1917. scaledloadop:=A_LDP;
  1918. unscaledstoreop:=A_NONE;
  1919. unscaledloadop:=A_NONE;
  1920. end
  1921. else
  1922. begin
  1923. scaledstoreop:=A_STR;
  1924. scaledloadop:=A_LDR;
  1925. unscaledstoreop:=A_STUR;
  1926. unscaledloadop:=A_LDUR;
  1927. end;
  1928. { we only need 4 instructions extra to call FPC_MOVE }
  1929. if cs_opt_size in current_settings.optimizerswitches then
  1930. maxlenunrolled:=maxlenunrolled div 2;
  1931. if (len>maxlenunrolled) and
  1932. (len>totalalign*8) then
  1933. begin
  1934. g_concatcopy_move(list,source,dest,len);
  1935. exit;
  1936. end;
  1937. simplifysource:=true;
  1938. simplifydest:=true;
  1939. tmpsource:=source;
  1940. tmpdest:=dest;
  1941. { can we directly encode all offsets in an unrolled loop? }
  1942. if len<=maxlenunrolled then
  1943. begin
  1944. {$ifdef extdebug}
  1945. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  1946. {$endif extdebug}
  1947. { the leftovers will be handled separately -> -(len mod opsize) }
  1948. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  1949. { additionally, the last regular load/store will be at
  1950. offset+len-opsize (if len-(len mod opsize)>len) }
  1951. if tmpsource.offset>source.offset then
  1952. dec(tmpsource.offset,tcgsize2size[opsize]);
  1953. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  1954. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  1955. if tmpdest.offset>dest.offset then
  1956. dec(tmpdest.offset,tcgsize2size[opsize]);
  1957. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  1958. tmpsource:=source;
  1959. tmpdest:=dest;
  1960. { if we can't directly encode all offsets, simplify }
  1961. if simplifysource then
  1962. begin
  1963. loadop:=scaledloadop;
  1964. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  1965. end;
  1966. if simplifydest then
  1967. begin
  1968. storeop:=scaledstoreop;
  1969. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  1970. end;
  1971. regcount:=len div tcgsize2size[opsize];
  1972. { in case we transfer two registers at a time, we copy an even
  1973. number of registers }
  1974. if loadop=A_LDP then
  1975. regcount:=regcount and not(1);
  1976. { initialise for dfa }
  1977. regs[low(regs)]:=NR_NO;
  1978. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  1979. for i:=1 to regcount do
  1980. regs[i]:=getintregister(list,opsize);
  1981. if loadop=A_LDP then
  1982. begin
  1983. { load registers }
  1984. for i:=1 to (regcount div 2) do
  1985. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  1986. { store registers }
  1987. for i:=1 to (regcount div 2) do
  1988. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  1989. end
  1990. else
  1991. begin
  1992. for i:=1 to regcount do
  1993. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  1994. for i:=1 to regcount do
  1995. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  1996. end;
  1997. { leftover }
  1998. len:=len-regcount*tcgsize2size[opsize];
  1999. {$ifdef extdebug}
  2000. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2001. {$endif extdebug}
  2002. end
  2003. else
  2004. begin
  2005. {$ifdef extdebug}
  2006. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2007. {$endif extdebug}
  2008. { regular loop -> definitely use post-indexing }
  2009. loadop:=scaledloadop;
  2010. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2011. storeop:=scaledstoreop;
  2012. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2013. current_asmdata.getjumplabel(hl);
  2014. countreg:=getintregister(list,OS_32);
  2015. if loadop=A_LDP then
  2016. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
  2017. else
  2018. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2019. a_label(list,hl);
  2020. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2021. if loadop=A_LDP then
  2022. begin
  2023. regs[1]:=getintregister(list,opsize);
  2024. regs[2]:=getintregister(list,opsize);
  2025. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2026. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2027. end
  2028. else
  2029. begin
  2030. regs[1]:=getintregister(list,opsize);
  2031. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2032. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2033. end;
  2034. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2035. len:=len mod tcgsize2size[opsize];
  2036. end;
  2037. gencopyleftovers(list,tmpsource,tmpdest,len);
  2038. end;
  2039. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2040. begin
  2041. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2042. InternalError(2013020102);
  2043. end;
  2044. procedure create_codegen;
  2045. begin
  2046. cg:=tcgaarch64.Create;
  2047. cg128:=tcg128.Create;
  2048. end;
  2049. end.