cgcpu.pas 89 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for AArch64
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu;
  27. type
  28. tcgaarch64=class(tcg)
  29. protected
  30. { simplifies "ref" so it can be used with "op". If "ref" can be used
  31. with a different load/Store operation that has the same meaning as the
  32. original one, "op" will be replaced with the alternative }
  33. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  34. { changes register size without adding register allocation info }
  35. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  36. public
  37. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  38. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  39. procedure init_register_allocators;override;
  40. procedure done_register_allocators;override;
  41. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  42. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  43. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  44. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  45. { General purpose instructions }
  46. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  47. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  48. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  49. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  50. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  51. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  52. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  53. { move instructions }
  54. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  55. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  56. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  57. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  58. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  59. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  60. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  61. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  62. { fpu move instructions (not used, all floating point is vector unit-based) }
  63. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  64. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  65. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  66. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
  67. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
  68. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
  69. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  70. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
  71. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
  72. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  73. { comparison operations }
  74. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  75. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  76. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  77. procedure a_jmp_name(list: TAsmList; const s: string);override;
  78. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  79. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  80. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  81. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  82. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  83. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  84. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  85. procedure g_maybe_got_init(list: TAsmList); override;
  86. procedure g_restore_registers(list: TAsmList);override;
  87. procedure g_save_registers(list: TAsmList);override;
  88. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  89. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  90. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  91. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
  92. private
  93. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  94. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  95. end;
  96. procedure create_codegen;
  97. const
  98. TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  99. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  100. );
  101. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  102. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  103. );
  104. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  105. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  106. );
  107. implementation
  108. uses
  109. globals,verbose,systems,cutils,
  110. paramgr,fmodule,
  111. symtable,symsym,
  112. tgobj,
  113. procinfo,cpupi;
  114. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  115. var
  116. href: treference;
  117. so: tshifterop;
  118. accesssize: longint;
  119. begin
  120. if (ref.base=NR_NO) then
  121. begin
  122. if ref.shiftmode<>SM_None then
  123. internalerror(2014110701);
  124. ref.base:=ref.index;
  125. ref.index:=NR_NO;
  126. end;
  127. { no abitrary scale factor support (the generic code doesn't set it,
  128. AArch-specific code shouldn't either) }
  129. if not(ref.scalefactor in [0,1]) then
  130. internalerror(2014111002);
  131. case simple_ref_type(op,size,oppostfix,ref) of
  132. sr_simple:
  133. exit;
  134. sr_internal_illegal:
  135. internalerror(2014121702);
  136. sr_complex:
  137. { continue } ;
  138. end;
  139. if assigned(ref.symbol) then
  140. begin
  141. { internal "load symbol" instructions should already be valid }
  142. if assigned(ref.symboldata) or
  143. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  144. internalerror(2014110802);
  145. { no relative symbol support (needed) yet }
  146. if assigned(ref.relsymbol) then
  147. internalerror(2014111001);
  148. { on Darwin: load the address from the GOT. There does not appear to
  149. be a non-GOT variant. This consists of first loading the address
  150. of the page containing the GOT entry for this variable, and then
  151. the address of the entry itself from that page (can be relaxed by
  152. the linker in case the variable itself can be stored directly in
  153. the GOT) }
  154. if target_info.system in systems_darwin then
  155. begin
  156. if (preferred_newbasereg=NR_NO) or
  157. (ref.base=preferred_newbasereg) or
  158. (ref.index=preferred_newbasereg) then
  159. preferred_newbasereg:=getaddressregister(list);
  160. { load the (GOT) page }
  161. reference_reset_symbol(href,ref.symbol,0,8);
  162. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  163. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  164. ((ref.symbol.typ=AT_DATA) and
  165. (ref.symbol.bind=AB_LOCAL)) then
  166. href.refaddr:=addr_page
  167. else
  168. href.refaddr:=addr_gotpage;
  169. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  170. { load the GOT entry (= address of the variable) }
  171. reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
  172. href.symbol:=ref.symbol;
  173. { code symbols defined in the current compilation unit do not
  174. have to be accessed via the GOT }
  175. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  176. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  177. ((ref.symbol.typ=AT_DATA) and
  178. (ref.symbol.bind=AB_LOCAL)) then
  179. begin
  180. href.base:=NR_NO;
  181. href.refaddr:=addr_pageoffset;
  182. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  183. end
  184. else
  185. begin
  186. href.refaddr:=addr_gotpageoffset;
  187. { use a_load_ref_reg() rather than directly encoding the LDR,
  188. so that we'll check the validity of the reference }
  189. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  190. end;
  191. { set as new base register }
  192. if ref.base=NR_NO then
  193. ref.base:=preferred_newbasereg
  194. else if ref.index=NR_NO then
  195. ref.index:=preferred_newbasereg
  196. else
  197. begin
  198. { make sure it's valid in case ref.base is SP -> make it
  199. the second operand}
  200. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  201. ref.base:=preferred_newbasereg
  202. end;
  203. ref.symbol:=nil;
  204. end
  205. else
  206. { todo }
  207. internalerror(2014111003);
  208. end;
  209. { base & index }
  210. if (ref.base<>NR_NO) and
  211. (ref.index<>NR_NO) then
  212. begin
  213. case op of
  214. A_LDR, A_STR:
  215. begin
  216. if (ref.shiftmode=SM_None) and
  217. (ref.shiftimm<>0) then
  218. internalerror(2014110805);
  219. { wrong shift? (possible in case of something like
  220. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  221. the final load is a 1 byte -> can't use shift after all }
  222. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  223. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  224. (ref.offset<>0)) then
  225. begin
  226. if preferred_newbasereg=NR_NO then
  227. preferred_newbasereg:=getaddressregister(list);
  228. { "add" supports a superset of the shift modes supported by
  229. load/store instructions }
  230. shifterop_reset(so);
  231. so.shiftmode:=ref.shiftmode;
  232. so.shiftimm:=ref.shiftimm;
  233. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  234. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  235. { possibly still an invalid offset -> fall through }
  236. end
  237. else if ref.offset<>0 then
  238. begin
  239. if (preferred_newbasereg=NR_NO) or
  240. { we keep ref.index, so it must not be overwritten }
  241. (ref.index=preferred_newbasereg) then
  242. preferred_newbasereg:=getaddressregister(list);
  243. { add to the base and not to the index, because the index
  244. may be scaled; this works even if the base is SP }
  245. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  246. ref.offset:=0;
  247. ref.base:=preferred_newbasereg;
  248. { finished }
  249. exit;
  250. end
  251. else
  252. { valid -> exit }
  253. exit;
  254. end;
  255. { todo }
  256. A_LD1,A_LD2,A_LD3,A_LD4,
  257. A_ST1,A_ST2,A_ST3,A_ST4:
  258. internalerror(2014110704);
  259. { these don't support base+index }
  260. A_LDUR,A_STUR,
  261. A_LDP,A_STP:
  262. begin
  263. { these either don't support pre-/post-indexing, or don't
  264. support it with base+index }
  265. if ref.addressmode<>AM_OFFSET then
  266. internalerror(2014110911);
  267. if preferred_newbasereg=NR_NO then
  268. preferred_newbasereg:=getaddressregister(list);
  269. if ref.shiftmode<>SM_None then
  270. begin
  271. { "add" supports a superset of the shift modes supported by
  272. load/store instructions }
  273. shifterop_reset(so);
  274. so.shiftmode:=ref.shiftmode;
  275. so.shiftimm:=ref.shiftimm;
  276. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  277. end
  278. else
  279. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  280. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  281. { fall through to the handling of base + offset, since the
  282. offset may still be too big }
  283. end;
  284. else
  285. internalerror(2014110901);
  286. end;
  287. end;
  288. { base + offset }
  289. if ref.base<>NR_NO then
  290. begin
  291. { valid offset for LDUR/STUR -> use that }
  292. if (ref.addressmode=AM_OFFSET) and
  293. (op in [A_LDR,A_STR]) and
  294. (ref.offset>=-256) and
  295. (ref.offset<=255) then
  296. begin
  297. if op=A_LDR then
  298. op:=A_LDUR
  299. else
  300. op:=A_STUR
  301. end
  302. { if it's not a valid LDUR/STUR, use LDR/STR }
  303. else if (op in [A_LDUR,A_STUR]) and
  304. ((ref.offset<-256) or
  305. (ref.offset>255) or
  306. (ref.addressmode<>AM_OFFSET)) then
  307. begin
  308. if op=A_LDUR then
  309. op:=A_LDR
  310. else
  311. op:=A_STR
  312. end;
  313. case op of
  314. A_LDR,A_STR:
  315. begin
  316. case ref.addressmode of
  317. AM_PREINDEXED:
  318. begin
  319. { since the loaded/stored register cannot be the same
  320. as the base register, we can safely add the
  321. offset to the base if it doesn't fit}
  322. if (ref.offset<-256) or
  323. (ref.offset>255) then
  324. begin
  325. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  326. ref.offset:=0;
  327. end;
  328. end;
  329. AM_POSTINDEXED:
  330. begin
  331. { cannot emulate post-indexing if we have to fold the
  332. offset into the base register }
  333. if (ref.offset<-256) or
  334. (ref.offset>255) then
  335. internalerror(2014110909);
  336. { ok }
  337. end;
  338. AM_OFFSET:
  339. begin
  340. { unsupported offset -> fold into base register }
  341. accesssize:=1 shl tcgsizep2size[size];
  342. if (ref.offset<0) or
  343. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  344. ((ref.offset mod accesssize)<>0) then
  345. begin
  346. if preferred_newbasereg=NR_NO then
  347. preferred_newbasereg:=getaddressregister(list);
  348. { can we split the offset beween an
  349. "add/sub (imm12 shl 12)" and the load (also an
  350. imm12)?
  351. -- the offset from the load will always be added,
  352. that's why the lower bound has a smaller range
  353. than the upper bound; it must also be a multiple
  354. of the access size }
  355. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  356. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  357. ((ref.offset mod accesssize)=0) then
  358. begin
  359. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  360. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  361. end
  362. else
  363. begin
  364. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  365. ref.offset:=0;
  366. end;
  367. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
  368. end;
  369. end
  370. else
  371. internalerror(2014110904);
  372. end;
  373. end;
  374. A_LDP,A_STP:
  375. begin
  376. { unsupported offset -> fold into base register (these
  377. instructions support all addressmodes) }
  378. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  379. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  380. begin
  381. case ref.addressmode of
  382. AM_POSTINDEXED:
  383. { don't emulate post-indexing if we have to fold the
  384. offset into the base register }
  385. internalerror(2014110910);
  386. AM_PREINDEXED:
  387. { this means the offset must be added to the current
  388. base register }
  389. preferred_newbasereg:=ref.base;
  390. AM_OFFSET:
  391. if preferred_newbasereg=NR_NO then
  392. preferred_newbasereg:=getaddressregister(list);
  393. end;
  394. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  395. reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
  396. end
  397. end;
  398. A_LDUR,A_STUR:
  399. begin
  400. { valid, checked above }
  401. end;
  402. { todo }
  403. A_LD1,A_LD2,A_LD3,A_LD4,
  404. A_ST1,A_ST2,A_ST3,A_ST4:
  405. internalerror(2014110908);
  406. else
  407. internalerror(2014110708);
  408. end;
  409. { done }
  410. exit;
  411. end;
  412. { only an offset -> change to base (+ offset 0) }
  413. if preferred_newbasereg=NR_NO then
  414. preferred_newbasereg:=getaddressregister(list);
  415. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  416. reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
  417. end;
  418. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  419. var
  420. subreg:Tsubregister;
  421. begin
  422. subreg:=cgsize2subreg(getregtype(reg),size);
  423. result:=reg;
  424. setsubreg(result,subreg);
  425. end;
  426. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  427. begin
  428. internalerror(2014122110);
  429. { squash warning }
  430. result:=NR_NO;
  431. end;
  432. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  433. begin
  434. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  435. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  436. result:=ref;
  437. end;
  438. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  439. var
  440. instr: taicpu;
  441. so: tshifterop;
  442. hadtmpreg: boolean;
  443. begin
  444. { imm12 }
  445. if (a>=0) and
  446. (a<=((1 shl 12)-1)) then
  447. if usedest then
  448. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  449. else
  450. instr:=taicpu.op_reg_const(op,src,a)
  451. { imm12 lsl 12 }
  452. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  453. begin
  454. so.shiftmode:=SM_LSL;
  455. so.shiftimm:=12;
  456. if usedest then
  457. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  458. else
  459. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  460. end
  461. else
  462. begin
  463. { todo: other possible optimizations (e.g. load 16 bit constant in
  464. register and then add/sub/cmp/cmn shifted the rest) }
  465. if tmpreg=NR_NO then
  466. begin
  467. hadtmpreg:=false;
  468. tmpreg:=getintregister(list,size);
  469. end
  470. else
  471. begin
  472. hadtmpreg:=true;
  473. getcpuregister(list,tmpreg);
  474. end;
  475. a_load_const_reg(list,size,a,tmpreg);
  476. if usedest then
  477. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  478. else
  479. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  480. if hadtmpreg then
  481. ungetcpuregister(list,tmpreg);
  482. end;
  483. if setflags then
  484. setoppostfix(instr,PF_S);
  485. list.concat(instr);
  486. end;
  487. {****************************************************************************
  488. Assembler code
  489. ****************************************************************************}
  490. procedure tcgaarch64.init_register_allocators;
  491. begin
  492. inherited init_register_allocators;
  493. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  494. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  495. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  496. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  497. { maybe we can enable this in the future for leaf functions (it's
  498. the frame pointer)
  499. ,RS_X29 }],
  500. first_int_imreg,[]);
  501. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  502. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  503. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  504. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  505. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  506. first_mm_imreg,[]);
  507. end;
  508. procedure tcgaarch64.done_register_allocators;
  509. begin
  510. rg[R_INTREGISTER].free;
  511. rg[R_FPUREGISTER].free;
  512. rg[R_MMREGISTER].free;
  513. inherited done_register_allocators;
  514. end;
  515. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  516. begin
  517. case size of
  518. OS_F32:
  519. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  520. OS_F64:
  521. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  522. else
  523. internalerror(2014102701);
  524. end;
  525. end;
  526. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  527. begin
  528. if not weak then
  529. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
  530. else
  531. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
  532. end;
  533. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  534. begin
  535. list.concat(taicpu.op_reg(A_BLR,reg));
  536. end;
  537. {********************** load instructions ********************}
  538. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  539. var
  540. preva: tcgint;
  541. opc: tasmop;
  542. shift,maxshift: byte;
  543. so: tshifterop;
  544. reginited: boolean;
  545. mask: tcgint;
  546. begin
  547. { if we load a value into a 32 bit register, it is automatically
  548. zero-extended to 64 bit }
  549. if (high(a)=0) and
  550. (size in [OS_64,OS_S64]) then
  551. begin
  552. size:=OS_32;
  553. reg:=makeregsize(reg,size);
  554. end;
  555. { values <= 32 bit are stored in a 32 bit register }
  556. if not(size in [OS_64,OS_S64]) then
  557. a:=cardinal(a);
  558. if size in [OS_64,OS_S64] then
  559. begin
  560. mask:=-1;
  561. maxshift:=64;
  562. end
  563. else
  564. begin
  565. mask:=$ffffffff;
  566. maxshift:=32;
  567. end;
  568. { single movn enough? (to be extended) }
  569. shift:=16;
  570. preva:=a;
  571. repeat
  572. if (a shr shift)=(mask shr shift) then
  573. begin
  574. if shift=16 then
  575. list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
  576. else
  577. begin
  578. shifterop_reset(so);
  579. so.shiftmode:=SM_LSL;
  580. so.shiftimm:=shift-16;
  581. list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
  582. end;
  583. exit;
  584. end;
  585. { only try the next 16 bits if the current one is all 1 bits, since
  586. the movn will set all lower bits to 1 }
  587. if word(a shr (shift-16))<>$ffff then
  588. break;
  589. inc(shift,16);
  590. until shift=maxshift;
  591. reginited:=false;
  592. shift:=0;
  593. { can be optimized later to use more movn }
  594. repeat
  595. { leftover is shifterconst? (don't check if we can represent it just
  596. as effectively with movz/movk, as this check is expensive) }
  597. if ((shift<tcgsize2size[size]*(8 div 2)) and
  598. (word(a)<>0) and
  599. ((a shr 16)<>0)) and
  600. is_shifter_const(a shl shift,size) then
  601. begin
  602. if reginited then
  603. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  604. else
  605. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  606. exit;
  607. end;
  608. { set all 16 bit parts <> 0 }
  609. if (word(a)<>0) or
  610. ((shift=0) and
  611. (a=0)) then
  612. if shift=0 then
  613. begin
  614. list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
  615. reginited:=true;
  616. end
  617. else
  618. begin
  619. shifterop_reset(so);
  620. so.shiftmode:=SM_LSL;
  621. so.shiftimm:=shift;
  622. if not reginited then
  623. begin
  624. opc:=A_MOVZ;
  625. reginited:=true;
  626. end
  627. else
  628. opc:=A_MOVK;
  629. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  630. end;
  631. preva:=a;
  632. a:=a shr 16;
  633. inc(shift,16);
  634. until word(preva)=preva;
  635. if not reginited then
  636. internalerror(2014102702);
  637. end;
  638. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  639. var
  640. reg: tregister;
  641. begin
  642. { use the zero register if possible }
  643. if a=0 then
  644. begin
  645. if size in [OS_64,OS_S64] then
  646. reg:=NR_XZR
  647. else
  648. reg:=NR_WZR;
  649. a_load_reg_ref(list,size,size,reg,ref);
  650. end
  651. else
  652. inherited;
  653. end;
  654. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  655. var
  656. oppostfix:toppostfix;
  657. hreg: tregister;
  658. begin
  659. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  660. fromsize:=tosize
  661. { have a 32 bit register but need a 64 bit one? }
  662. else if tosize in [OS_64,OS_S64] then
  663. begin
  664. { sign extend if necessary }
  665. if fromsize in [OS_S8,OS_S16,OS_S32] then
  666. begin
  667. { can't overwrite reg, may be a constant reg }
  668. hreg:=getintregister(list,tosize);
  669. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  670. reg:=hreg;
  671. end
  672. else
  673. { top 32 bit are zero by default }
  674. reg:=makeregsize(reg,OS_64);
  675. fromsize:=tosize;
  676. end;
  677. if (ref.alignment<>0) and
  678. (ref.alignment<tcgsize2size[tosize]) then
  679. begin
  680. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  681. end
  682. else
  683. begin
  684. case tosize of
  685. { signed integer registers }
  686. OS_8,
  687. OS_S8:
  688. oppostfix:=PF_B;
  689. OS_16,
  690. OS_S16:
  691. oppostfix:=PF_H;
  692. OS_32,
  693. OS_S32,
  694. OS_64,
  695. OS_S64:
  696. oppostfix:=PF_None;
  697. else
  698. InternalError(200308299);
  699. end;
  700. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  701. end;
  702. end;
  703. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  704. var
  705. oppostfix:toppostfix;
  706. begin
  707. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  708. fromsize:=tosize;
  709. { ensure that all bits of the 32/64 register are always correctly set:
  710. * default behaviour is always to zero-extend to the entire (64 bit)
  711. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  712. target register, as the upper 32 bit will be zeroed implicitly
  713. -> always make target register 32 bit
  714. * signed loads exist both with 32 and 64 bit target registers,
  715. depending on whether the value should be sign extended to 32 or
  716. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  717. corresponding 64 bit register are again zeroed) -> no need to
  718. change anything (we only have 32 and 64 bit registers), except that
  719. when loading an OS_S32 to a 32 bit register, we don't need/can't
  720. use sign extension
  721. }
  722. if fromsize in [OS_8,OS_16,OS_32] then
  723. reg:=makeregsize(reg,OS_32);
  724. if (ref.alignment<>0) and
  725. (ref.alignment<tcgsize2size[fromsize]) then
  726. begin
  727. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  728. exit;
  729. end;
  730. case fromsize of
  731. { signed integer registers }
  732. OS_8:
  733. oppostfix:=PF_B;
  734. OS_S8:
  735. oppostfix:=PF_SB;
  736. OS_16:
  737. oppostfix:=PF_H;
  738. OS_S16:
  739. oppostfix:=PF_SH;
  740. OS_S32:
  741. if getsubreg(reg)=R_SUBD then
  742. oppostfix:=PF_NONE
  743. else
  744. oppostfix:=PF_SW;
  745. OS_32,
  746. OS_64,
  747. OS_S64:
  748. oppostfix:=PF_None;
  749. else
  750. InternalError(200308297);
  751. end;
  752. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  753. { clear upper 16 bits if the value was negative }
  754. if (fromsize=OS_S8) and (tosize=OS_16) then
  755. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  756. end;
  757. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  758. var
  759. href: treference;
  760. hreg1, hreg2, tmpreg: tregister;
  761. begin
  762. if fromsize in [OS_64,OS_S64] then
  763. begin
  764. { split into two 32 bit loads }
  765. hreg1:=makeregsize(register,OS_32);
  766. hreg2:=getintregister(list,OS_32);
  767. if target_info.endian=endian_big then
  768. begin
  769. tmpreg:=hreg1;
  770. hreg1:=hreg2;
  771. hreg2:=tmpreg;
  772. end;
  773. { can we use LDP? }
  774. if (ref.alignment=4) and
  775. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  776. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  777. else
  778. begin
  779. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  780. href:=ref;
  781. inc(href.offset,4);
  782. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  783. end;
  784. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  785. end
  786. else
  787. inherited;
  788. end;
  789. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  790. var
  791. instr: taicpu;
  792. begin
  793. { we use both 32 and 64 bit registers -> insert conversion when when
  794. we have to truncate/sign extend inside the (32 or 64 bit) register
  795. holding the value, and when we sign extend from a 32 to a 64 bit
  796. register }
  797. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  798. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  799. (fromsize<>tosize) and
  800. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  801. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  802. (tosize in [OS_64,OS_S64])) or
  803. { needs to mask out the sign in the top 16 bits }
  804. ((fromsize=OS_S8) and
  805. (tosize=OS_16)) then
  806. begin
  807. case tosize of
  808. OS_8:
  809. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  810. OS_16:
  811. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  812. OS_S8:
  813. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  814. OS_S16:
  815. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  816. { while "mov wN, wM" automatically inserts a zero-extension and
  817. hence we could encode a 64->32 bit move like that, the problem
  818. is that we then can't distinguish 64->32 from 32->32 moves, and
  819. the 64->32 truncation could be removed altogether... So use a
  820. different instruction }
  821. OS_32,
  822. OS_S32:
  823. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  824. but because of the way location_force_register() tries to
  825. avoid superfluous zero/sign extensions, it's not always the
  826. case -> also force reg1 to to 64 bit }
  827. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  828. OS_64,
  829. OS_S64:
  830. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
  831. else
  832. internalerror(2002090901);
  833. end;
  834. end
  835. else
  836. begin
  837. { 32 -> 32 bit move implies zero extension (sign extensions have
  838. been handled above) -> also use for 32 <-> 64 bit moves }
  839. if not(fromsize in [OS_64,OS_S64]) or
  840. not(tosize in [OS_64,OS_S64]) then
  841. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  842. else
  843. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  844. list.Concat(instr);
  845. { Notify the register allocator that we have written a move instruction so
  846. it can try to eliminate it. }
  847. add_move_instruction(instr);
  848. end;
  849. end;
  850. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  851. var
  852. href: treference;
  853. so: tshifterop;
  854. op: tasmop;
  855. begin
  856. op:=A_LDR;
  857. href:=ref;
  858. { simplify as if we're going to perform a regular 64 bit load, using
  859. "r" as the new base register if possible/necessary }
  860. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  861. { load literal? }
  862. if assigned(href.symbol) then
  863. begin
  864. if (href.base<>NR_NO) or
  865. (href.index<>NR_NO) or
  866. not assigned(href.symboldata) then
  867. internalerror(2014110912);
  868. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  869. end
  870. else
  871. begin
  872. if href.index<>NR_NO then
  873. begin
  874. if href.shiftmode<>SM_None then
  875. begin
  876. { "add" supports a supperset of the shift modes supported by
  877. load/store instructions }
  878. shifterop_reset(so);
  879. so.shiftmode:=href.shiftmode;
  880. so.shiftimm:=href.shiftimm;
  881. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  882. end
  883. else
  884. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  885. end
  886. else if href.offset<>0 then
  887. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  888. else
  889. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  890. end;
  891. end;
  892. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  893. begin
  894. internalerror(2014122107)
  895. end;
  896. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  897. begin
  898. internalerror(2014122108)
  899. end;
  900. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  901. begin
  902. internalerror(2014122109)
  903. end;
  904. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  905. var
  906. instr: taicpu;
  907. begin
  908. if assigned(shuffle) and
  909. not shufflescalar(shuffle) then
  910. internalerror(2014122104);
  911. if fromsize=tosize then
  912. begin
  913. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  914. { Notify the register allocator that we have written a move
  915. instruction so it can try to eliminate it. }
  916. add_move_instruction(instr);
  917. end
  918. else
  919. begin
  920. if (reg_cgsize(reg1)<>fromsize) or
  921. (reg_cgsize(reg2)<>tosize) then
  922. internalerror(2014110913);
  923. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  924. end;
  925. list.Concat(instr);
  926. end;
  927. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  928. var
  929. tmpreg: tregister;
  930. begin
  931. if assigned(shuffle) and
  932. not shufflescalar(shuffle) then
  933. internalerror(2014122105);
  934. tmpreg:=NR_NO;
  935. if (fromsize<>tosize) then
  936. begin
  937. tmpreg:=reg;
  938. reg:=getmmregister(list,fromsize);
  939. end;
  940. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  941. if (fromsize<>tosize) then
  942. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  943. end;
  944. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  945. var
  946. tmpreg: tregister;
  947. begin
  948. if assigned(shuffle) and
  949. not shufflescalar(shuffle) then
  950. internalerror(2014122106);
  951. if (fromsize<>tosize) then
  952. begin
  953. tmpreg:=getmmregister(list,tosize);
  954. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  955. reg:=tmpreg;
  956. end;
  957. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  958. end;
  959. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  960. begin
  961. if not shufflescalar(shuffle) then
  962. internalerror(2014122801);
  963. if not(tcgsize2size[fromsize] in [4,8]) or
  964. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  965. internalerror(2014122803);
  966. list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
  967. end;
  968. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  969. begin
  970. if not shufflescalar(shuffle) then
  971. internalerror(2014122802);
  972. if not(tcgsize2size[fromsize] in [4,8]) or
  973. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  974. internalerror(2014122804);
  975. list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
  976. end;
  977. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  978. begin
  979. case op of
  980. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  981. OP_XOR:
  982. begin
  983. if (src<>dst) or
  984. (reg_cgsize(src)<>size) or
  985. assigned(shuffle) then
  986. internalerror(2015011401);
  987. case size of
  988. OS_F32,
  989. OS_F64:
  990. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  991. else
  992. internalerror(2015011402);
  993. end;
  994. end
  995. else
  996. internalerror(2015011403);
  997. end;
  998. end;
  999. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  1000. var
  1001. bitsize,
  1002. signbit: longint;
  1003. begin
  1004. if srcsize in [OS_64,OS_S64] then
  1005. begin
  1006. bitsize:=64;
  1007. signbit:=6;
  1008. end
  1009. else
  1010. begin
  1011. bitsize:=32;
  1012. signbit:=5;
  1013. end;
  1014. { source is 0 -> dst will have to become 255 }
  1015. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1016. if reverse then
  1017. begin
  1018. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1019. { xor 31/63 is the same as setting the lower 5/6 bits to
  1020. "31/63-(lower 5/6 bits of dst)" }
  1021. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1022. end
  1023. else
  1024. begin
  1025. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1026. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1027. end;
  1028. { set dst to -1 if src was 0 }
  1029. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1030. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1031. branch-free version? All of mine are 3...) }
  1032. list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
  1033. end;
  1034. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1035. var
  1036. href: treference;
  1037. hreg1, hreg2, tmpreg: tregister;
  1038. begin
  1039. if fromsize in [OS_64,OS_S64] then
  1040. begin
  1041. { split into two 32 bit stores }
  1042. hreg1:=makeregsize(register,OS_32);
  1043. hreg2:=getintregister(list,OS_32);
  1044. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1045. if target_info.endian=endian_big then
  1046. begin
  1047. tmpreg:=hreg1;
  1048. hreg1:=hreg2;
  1049. hreg2:=tmpreg;
  1050. end;
  1051. { can we use STP? }
  1052. if (ref.alignment=4) and
  1053. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1054. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1055. else
  1056. begin
  1057. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1058. href:=ref;
  1059. inc(href.offset,4);
  1060. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1061. end;
  1062. end
  1063. else
  1064. inherited;
  1065. end;
  1066. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1067. const
  1068. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1069. begin
  1070. if (op in overflowops) and
  1071. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1072. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1073. end;
  1074. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1075. begin
  1076. optimize_op_const(size,op,a);
  1077. case op of
  1078. OP_NONE:
  1079. exit;
  1080. OP_MOVE:
  1081. a_load_const_reg(list,size,a,reg);
  1082. OP_NEG,OP_NOT:
  1083. internalerror(200306011);
  1084. else
  1085. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1086. end;
  1087. end;
  1088. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1089. begin
  1090. Case op of
  1091. OP_NEG,
  1092. OP_NOT:
  1093. begin
  1094. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1095. maybeadjustresult(list,op,size,dst);
  1096. end
  1097. else
  1098. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1099. end;
  1100. end;
  1101. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1102. var
  1103. l: tlocation;
  1104. begin
  1105. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1106. end;
  1107. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1108. var
  1109. hreg: tregister;
  1110. begin
  1111. { no ROLV opcode... }
  1112. if op=OP_ROL then
  1113. begin
  1114. case size of
  1115. OS_32,OS_S32,
  1116. OS_64,OS_S64:
  1117. begin
  1118. hreg:=getintregister(list,size);
  1119. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1120. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1121. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1122. exit;
  1123. end;
  1124. else
  1125. internalerror(2014111005);
  1126. end;
  1127. end
  1128. else if (op=OP_ROR) and
  1129. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1130. internalerror(2014111006);
  1131. if TOpCG2AsmOpReg[op]=A_NONE then
  1132. internalerror(2014111007);
  1133. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1134. maybeadjustresult(list,op,size,dst);
  1135. end;
  1136. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1137. var
  1138. shiftcountmask: longint;
  1139. constreg: tregister;
  1140. begin
  1141. { add/sub instructions have only positive immediate operands }
  1142. if (op in [OP_ADD,OP_SUB]) and
  1143. (a<0) then
  1144. begin
  1145. if op=OP_ADD then
  1146. op:=op_SUB
  1147. else
  1148. op:=OP_ADD;
  1149. { avoid range/overflow error in case a = low(tcgint) }
  1150. {$push}{$r-}{$q-}
  1151. a:=-a;
  1152. {$pop}
  1153. end;
  1154. ovloc.loc:=LOC_VOID;
  1155. optimize_op_const(size,op,a);
  1156. case op of
  1157. OP_NONE:
  1158. begin
  1159. a_load_reg_reg(list,size,size,src,dst);
  1160. exit;
  1161. end;
  1162. OP_MOVE:
  1163. begin
  1164. a_load_const_reg(list,size,a,dst);
  1165. exit;
  1166. end;
  1167. end;
  1168. case op of
  1169. OP_ADD,
  1170. OP_SUB:
  1171. begin
  1172. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1173. { on a 64 bit target, overflows with smaller data types
  1174. are handled via range errors }
  1175. if setflags and
  1176. (size in [OS_64,OS_S64]) then
  1177. begin
  1178. location_reset(ovloc,LOC_FLAGS,OS_8);
  1179. if size=OS_64 then
  1180. if op=OP_ADD then
  1181. ovloc.resflags:=F_CS
  1182. else
  1183. ovloc.resflags:=F_CC
  1184. else
  1185. ovloc.resflags:=F_VS;
  1186. end;
  1187. end;
  1188. OP_OR,
  1189. OP_AND,
  1190. OP_XOR:
  1191. begin
  1192. if not(size in [OS_64,OS_S64]) then
  1193. a:=cardinal(a);
  1194. if is_shifter_const(a,size) then
  1195. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1196. else
  1197. begin
  1198. constreg:=getintregister(list,size);
  1199. a_load_const_reg(list,size,a,constreg);
  1200. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1201. end;
  1202. end;
  1203. OP_SHL,
  1204. OP_SHR,
  1205. OP_SAR:
  1206. begin
  1207. if size in [OS_64,OS_S64] then
  1208. shiftcountmask:=63
  1209. else
  1210. shiftcountmask:=31;
  1211. if (a and shiftcountmask)<>0 Then
  1212. list.concat(taicpu.op_reg_reg_const(
  1213. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1214. else
  1215. a_load_reg_reg(list,size,size,src,dst);
  1216. if (a and not(tcgint(shiftcountmask)))<>0 then
  1217. internalError(2014112101);
  1218. end;
  1219. OP_ROL,
  1220. OP_ROR:
  1221. begin
  1222. case size of
  1223. OS_32,OS_S32:
  1224. if (a and not(tcgint(31)))<>0 then
  1225. internalError(2014112102);
  1226. OS_64,OS_S64:
  1227. if (a and not(tcgint(63)))<>0 then
  1228. internalError(2014112103);
  1229. else
  1230. internalError(2014112104);
  1231. end;
  1232. { there's only a ror opcode }
  1233. if op=OP_ROL then
  1234. a:=(tcgsize2size[size]*8)-a;
  1235. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1236. end;
  1237. OP_MUL,
  1238. OP_IMUL,
  1239. OP_DIV,
  1240. OP_IDIV:
  1241. begin
  1242. constreg:=getintregister(list,size);
  1243. a_load_const_reg(list,size,a,constreg);
  1244. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1245. end;
  1246. else
  1247. internalerror(2014111403);
  1248. end;
  1249. maybeadjustresult(list,op,size,dst);
  1250. end;
  1251. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1252. var
  1253. tmpreg1: tregister;
  1254. begin
  1255. ovloc.loc:=LOC_VOID;
  1256. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1257. if setflags and
  1258. (size in [OS_64,OS_S64]) then
  1259. begin
  1260. case op of
  1261. OP_ADD,
  1262. OP_SUB:
  1263. begin
  1264. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1265. ovloc.loc:=LOC_FLAGS;
  1266. if size=OS_64 then
  1267. if op=OP_ADD then
  1268. ovloc.resflags:=F_CS
  1269. else
  1270. ovloc.resflags:=F_CC
  1271. else
  1272. ovloc.resflags:=F_VS;
  1273. { finished; since we won't call through to a_op_reg_reg_reg,
  1274. adjust the result here if necessary }
  1275. maybeadjustresult(list,op,size,dst);
  1276. exit;
  1277. end;
  1278. OP_MUL:
  1279. begin
  1280. { check whether the upper 64 bit of the 128 bit product is 0 }
  1281. tmpreg1:=getintregister(list,OS_64);
  1282. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1283. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1284. ovloc.loc:=LOC_FLAGS;
  1285. ovloc.resflags:=F_NE;
  1286. { still have to perform the actual multiplication }
  1287. end;
  1288. OP_IMUL:
  1289. begin
  1290. { check whether the sign bit of the (128 bit) result is the
  1291. same as "sign bit of src1" xor "signbit of src2" (if so, no
  1292. overflow and the xor-product of all sign bits is 0) }
  1293. tmpreg1:=getintregister(list,OS_64);
  1294. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1295. list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
  1296. list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
  1297. list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
  1298. ovloc.loc:=LOC_FLAGS;
  1299. ovloc.resflags:=F_NE;
  1300. { still have to perform the actual multiplication }
  1301. end;
  1302. OP_IDIV,
  1303. OP_DIV:
  1304. begin
  1305. { not handled here, needs div-by-zero check (dividing by zero
  1306. just gives a 0 result on aarch64), and low(int64) div -1
  1307. check for overflow) }
  1308. internalerror(2014122101);
  1309. end;
  1310. end;
  1311. end;
  1312. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1313. end;
  1314. {*************** compare instructructions ****************}
  1315. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1316. var
  1317. op: tasmop;
  1318. begin
  1319. if a>=0 then
  1320. op:=A_CMP
  1321. else
  1322. op:=A_CMN;
  1323. { avoid range/overflow error in case a=low(tcgint) }
  1324. {$push}{$r-}{$q-}
  1325. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1326. {$pop}
  1327. a_jmp_cond(list,cmp_op,l);
  1328. end;
  1329. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1330. begin
  1331. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1332. a_jmp_cond(list,cmp_op,l);
  1333. end;
  1334. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1335. var
  1336. ai: taicpu;
  1337. begin
  1338. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
  1339. ai.is_jmp:=true;
  1340. list.Concat(ai);
  1341. end;
  1342. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1343. var
  1344. ai: taicpu;
  1345. begin
  1346. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
  1347. ai.is_jmp:=true;
  1348. list.Concat(ai);
  1349. end;
  1350. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1351. var
  1352. ai: taicpu;
  1353. begin
  1354. ai:=TAiCpu.op_sym(A_B,l);
  1355. ai.is_jmp:=true;
  1356. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1357. list.Concat(ai);
  1358. end;
  1359. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1360. var
  1361. ai : taicpu;
  1362. begin
  1363. ai:=Taicpu.op_sym(A_B,l);
  1364. ai.is_jmp:=true;
  1365. ai.SetCondition(flags_to_cond(f));
  1366. list.Concat(ai);
  1367. end;
  1368. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1369. begin
  1370. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1371. end;
  1372. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1373. begin
  1374. { we need an explicit overflow location, because there are many
  1375. possibilities (not just the overflow flag, which is only used for
  1376. signed add/sub) }
  1377. internalerror(2014112303);
  1378. end;
  1379. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1380. var
  1381. hl : tasmlabel;
  1382. hflags : tresflags;
  1383. begin
  1384. if not(cs_check_overflow in current_settings.localswitches) then
  1385. exit;
  1386. current_asmdata.getjumplabel(hl);
  1387. case ovloc.loc of
  1388. LOC_FLAGS:
  1389. begin
  1390. hflags:=ovloc.resflags;
  1391. inverse_flags(hflags);
  1392. cg.a_jmp_flags(list,hflags,hl);
  1393. end;
  1394. else
  1395. internalerror(2014112304);
  1396. end;
  1397. a_call_name(list,'FPC_OVERFLOW',false);
  1398. a_label(list,hl);
  1399. end;
  1400. { *********** entry/exit code and address loading ************ }
  1401. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1402. var
  1403. ref: treference;
  1404. sr: tsuperregister;
  1405. pairreg: tregister;
  1406. begin
  1407. result:=0;
  1408. reference_reset_base(ref,NR_SP,-16,16);
  1409. ref.addressmode:=AM_PREINDEXED;
  1410. pairreg:=NR_NO;
  1411. { store all used registers pairwise }
  1412. for sr:=lowsr to highsr do
  1413. if sr in rg[rt].used_in_proc then
  1414. if pairreg=NR_NO then
  1415. pairreg:=newreg(rt,sr,sub)
  1416. else
  1417. begin
  1418. inc(result,16);
  1419. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1420. pairreg:=NR_NO
  1421. end;
  1422. { one left -> store twice (stack must be 16 bytes aligned) }
  1423. if pairreg<>NR_NO then
  1424. begin
  1425. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1426. inc(result,16);
  1427. end;
  1428. end;
  1429. procedure FixupOffsets(p:TObject;arg:pointer);
  1430. var
  1431. sym: tabstractnormalvarsym absolute p;
  1432. begin
  1433. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1434. (sym.localloc.loc=LOC_REFERENCE) and
  1435. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1436. begin
  1437. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1438. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1439. end;
  1440. end;
  1441. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1442. var
  1443. ref: treference;
  1444. totalstackframesize: longint;
  1445. begin
  1446. if nostackframe then
  1447. exit;
  1448. { stack pointer has to be aligned to 16 bytes at all times }
  1449. localsize:=align(localsize,16);
  1450. { save stack pointer and return address }
  1451. reference_reset_base(ref,NR_SP,-16,16);
  1452. ref.addressmode:=AM_PREINDEXED;
  1453. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1454. { initialise frame pointer }
  1455. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1456. totalstackframesize:=localsize;
  1457. { save modified integer registers }
  1458. inc(totalstackframesize,
  1459. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1460. { only the lower 64 bits of the modified vector registers need to be
  1461. saved; if the caller needs the upper 64 bits, it has to save them
  1462. itself }
  1463. inc(totalstackframesize,
  1464. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1465. { allocate stack space }
  1466. if localsize<>0 then
  1467. begin
  1468. localsize:=align(localsize,16);
  1469. current_procinfo.final_localsize:=localsize;
  1470. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1471. end;
  1472. { By default, we use the frame pointer to access parameters passed via
  1473. the stack and the stack pointer to address local variables and temps
  1474. because
  1475. a) we can use bigger positive than negative offsets (so accessing
  1476. locals via negative offsets from the frame pointer would be less
  1477. efficient)
  1478. b) we don't know the local size while generating the code, so
  1479. accessing the parameters via the stack pointer is not possible
  1480. without copying them
  1481. The problem with this is the get_frame() intrinsic:
  1482. a) it must return the same value as what we pass as parentfp
  1483. parameter, since that's how it's used in the TP-style objects unit
  1484. b) its return value must usable to access all local data from a
  1485. routine (locals and parameters), since it's all the nested
  1486. routines have access to
  1487. c) its return value must be usable to construct a backtrace, as it's
  1488. also used by the exception handling routines
  1489. The solution we use here, based on something similar that's done in
  1490. the MIPS port, is to generate all accesses to locals in the routine
  1491. itself SP-relative, and then after the code is generated and the local
  1492. size is known (namely, here), we change all SP-relative variables/
  1493. parameters into FP-relative ones. This means that they'll be accessed
  1494. less efficiently from nested routines, but those accesses are indirect
  1495. anyway and at least this way they can be accessed at all
  1496. }
  1497. if current_procinfo.has_nestedprocs then
  1498. begin
  1499. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1500. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1501. end;
  1502. end;
  1503. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1504. begin
  1505. { nothing to do on Darwin; check on ELF targets }
  1506. if not(target_info.system in systems_darwin) then
  1507. internalerror(2014112601);
  1508. end;
  1509. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1510. begin
  1511. { done in g_proc_exit }
  1512. end;
  1513. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1514. var
  1515. ref: treference;
  1516. sr, highestsetsr: tsuperregister;
  1517. pairreg: tregister;
  1518. regcount: longint;
  1519. begin
  1520. reference_reset_base(ref,NR_SP,16,16);
  1521. ref.addressmode:=AM_POSTINDEXED;
  1522. { highest reg stored twice? }
  1523. regcount:=0;
  1524. highestsetsr:=RS_NO;
  1525. for sr:=lowsr to highsr do
  1526. if sr in rg[rt].used_in_proc then
  1527. begin
  1528. inc(regcount);
  1529. highestsetsr:=sr;
  1530. end;
  1531. if odd(regcount) then
  1532. begin
  1533. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1534. highestsetsr:=pred(highestsetsr);
  1535. end;
  1536. { load all (other) used registers pairwise }
  1537. pairreg:=NR_NO;
  1538. for sr:=highestsetsr downto lowsr do
  1539. if sr in rg[rt].used_in_proc then
  1540. if pairreg=NR_NO then
  1541. pairreg:=newreg(rt,sr,sub)
  1542. else
  1543. begin
  1544. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1545. pairreg:=NR_NO
  1546. end;
  1547. { There can't be any register left }
  1548. if pairreg<>NR_NO then
  1549. internalerror(2014112602);
  1550. end;
  1551. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1552. var
  1553. ref: treference;
  1554. regsstored: boolean;
  1555. sr: tsuperregister;
  1556. begin
  1557. if not nostackframe then
  1558. begin
  1559. { if no registers have been stored, we don't have to subtract the
  1560. allocated temp space from the stack pointer }
  1561. regsstored:=false;
  1562. for sr:=RS_X19 to RS_X28 do
  1563. if sr in rg[R_INTREGISTER].used_in_proc then
  1564. begin
  1565. regsstored:=true;
  1566. break;
  1567. end;
  1568. if not regsstored then
  1569. for sr:=RS_D8 to RS_D15 do
  1570. if sr in rg[R_MMREGISTER].used_in_proc then
  1571. begin
  1572. regsstored:=true;
  1573. break;
  1574. end;
  1575. { restore registers (and stack pointer) }
  1576. if regsstored then
  1577. begin
  1578. if current_procinfo.final_localsize<>0 then
  1579. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1580. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1581. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1582. end
  1583. else if current_procinfo.final_localsize<>0 then
  1584. { restore stack pointer }
  1585. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1586. { restore framepointer and return address }
  1587. reference_reset_base(ref,NR_SP,16,16);
  1588. ref.addressmode:=AM_POSTINDEXED;
  1589. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1590. end;
  1591. { return }
  1592. list.concat(taicpu.op_none(A_RET));
  1593. end;
  1594. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1595. begin
  1596. { done in g_proc_entry }
  1597. end;
  1598. { ************* concatcopy ************ }
  1599. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1600. var
  1601. paraloc1,paraloc2,paraloc3 : TCGPara;
  1602. pd : tprocdef;
  1603. begin
  1604. pd:=search_system_proc('MOVE');
  1605. paraloc1.init;
  1606. paraloc2.init;
  1607. paraloc3.init;
  1608. paramanager.getintparaloc(pd,1,paraloc1);
  1609. paramanager.getintparaloc(pd,2,paraloc2);
  1610. paramanager.getintparaloc(pd,3,paraloc3);
  1611. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1612. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1613. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1614. paramanager.freecgpara(list,paraloc3);
  1615. paramanager.freecgpara(list,paraloc2);
  1616. paramanager.freecgpara(list,paraloc1);
  1617. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1618. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1619. a_call_name(list,'FPC_MOVE',false);
  1620. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1621. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1622. paraloc3.done;
  1623. paraloc2.done;
  1624. paraloc1.done;
  1625. end;
  1626. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  1627. var
  1628. sourcebasereplaced, destbasereplaced: boolean;
  1629. { get optimal memory operation to use for loading/storing data
  1630. in an unrolled loop }
  1631. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  1632. begin
  1633. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  1634. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  1635. begin
  1636. memop:=unscaledop;
  1637. needsimplify:=true;
  1638. end
  1639. else if (unscaledop<>A_NONE) and
  1640. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  1641. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  1642. begin
  1643. memop:=unscaledop;
  1644. needsimplify:=false;
  1645. end
  1646. else
  1647. begin
  1648. memop:=scaledop;
  1649. needsimplify:=true;
  1650. end;
  1651. end;
  1652. { adjust the offset and/or addressing mode after a load/store so it's
  1653. correct for the next one of the same size }
  1654. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  1655. begin
  1656. case ref.addressmode of
  1657. AM_OFFSET:
  1658. inc(ref.offset,oplen);
  1659. AM_POSTINDEXED:
  1660. { base register updated by instruction, next offset can remain
  1661. the same }
  1662. ;
  1663. AM_PREINDEXED:
  1664. begin
  1665. { base register updated by instruction -> next instruction can
  1666. use post-indexing with offset = sizeof(operation) }
  1667. ref.offset:=0;
  1668. ref.addressmode:=AM_OFFSET;
  1669. end;
  1670. end;
  1671. end;
  1672. { generate a load/store and adjust the reference offset to the next
  1673. memory location if necessary }
  1674. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1675. begin
  1676. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  1677. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  1678. end;
  1679. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  1680. the next memory location if necessary }
  1681. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1682. begin
  1683. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  1684. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  1685. end;
  1686. { turn a reference into a pre- or post-indexed reference for use in a
  1687. load/store of a particular size }
  1688. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  1689. var
  1690. tmpreg: tregister;
  1691. scaledoffset: longint;
  1692. orgaddressmode: taddressmode;
  1693. begin
  1694. scaledoffset:=tcgsize2size[opsize];
  1695. if scaledop in [A_LDP,A_STP] then
  1696. scaledoffset:=scaledoffset*2;
  1697. { can we use the reference as post-indexed without changes? }
  1698. if forcepostindexing then
  1699. begin
  1700. orgaddressmode:=ref.addressmode;
  1701. ref.addressmode:=AM_POSTINDEXED;
  1702. if (orgaddressmode=AM_POSTINDEXED) or
  1703. ((ref.offset=0) and
  1704. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  1705. begin
  1706. { just change the post-indexed offset to the access size }
  1707. ref.offset:=scaledoffset;
  1708. { and replace the base register if that didn't happen yet
  1709. (could be sp or a regvar) }
  1710. if not basereplaced then
  1711. begin
  1712. tmpreg:=getaddressregister(list);
  1713. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1714. ref.base:=tmpreg;
  1715. basereplaced:=true;
  1716. end;
  1717. exit;
  1718. end;
  1719. ref.addressmode:=orgaddressmode;
  1720. end;
  1721. {$ifdef dummy}
  1722. This could in theory be useful in case you have a concatcopy from
  1723. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  1724. very unlikely. Disabled because it still needs fixes, as it
  1725. also generates pre-indexed loads right now at the very end for the
  1726. left-over gencopies
  1727. { can we turn it into a pre-indexed reference for free? (after the
  1728. first operation, it will be turned into an offset one) }
  1729. if not forcepostindexing and
  1730. (ref.offset<>0) then
  1731. begin
  1732. orgaddressmode:=ref.addressmode;
  1733. ref.addressmode:=AM_PREINDEXED;
  1734. tmpreg:=ref.base;
  1735. if not basereplaced and
  1736. (ref.base=tmpreg) then
  1737. begin
  1738. tmpreg:=getaddressregister(list);
  1739. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1740. ref.base:=tmpreg;
  1741. basereplaced:=true;
  1742. end;
  1743. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  1744. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1745. exit;
  1746. end;
  1747. {$endif dummy}
  1748. if not forcepostindexing then
  1749. begin
  1750. ref.addressmode:=AM_OFFSET;
  1751. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1752. { this may still cause problems if the final offset is no longer
  1753. a simple ref; it's a bit complicated to pass all information
  1754. through at all places and check that here, so play safe: we
  1755. currently never generate unrolled copies for more than 64
  1756. bytes (32 with non-double-register copies) }
  1757. if ref.index=NR_NO then
  1758. begin
  1759. if ((scaledop in [A_LDP,A_STP]) and
  1760. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  1761. ((scaledop in [A_LDUR,A_STUR]) and
  1762. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  1763. ((scaledop in [A_LDR,A_STR]) and
  1764. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  1765. exit;
  1766. end;
  1767. end;
  1768. tmpreg:=getaddressregister(list);
  1769. a_loadaddr_ref_reg(list,ref,tmpreg);
  1770. basereplaced:=true;
  1771. if forcepostindexing then
  1772. begin
  1773. reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
  1774. ref.addressmode:=AM_POSTINDEXED;
  1775. end
  1776. else
  1777. begin
  1778. reference_reset_base(ref,tmpreg,0,ref.alignment);
  1779. ref.addressmode:=AM_OFFSET;
  1780. end
  1781. end;
  1782. { prepare a reference for use by gencopy. This is done both after the
  1783. unrolled and regular copy loop -> get rid of post-indexing mode, make
  1784. sure ref is valid }
  1785. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  1786. var
  1787. simplify: boolean;
  1788. begin
  1789. if ref.addressmode=AM_POSTINDEXED then
  1790. ref.offset:=tcgsize2size[opsize];
  1791. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  1792. if simplify then
  1793. begin
  1794. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  1795. op:=scaledop;
  1796. end;
  1797. end;
  1798. { generate a copy from source to dest of size opsize/postfix }
  1799. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  1800. var
  1801. reg: tregister;
  1802. loadop, storeop: tasmop;
  1803. begin
  1804. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  1805. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  1806. reg:=getintregister(list,opsize);
  1807. genloadstore(list,loadop,reg,source,postfix,opsize);
  1808. genloadstore(list,storeop,reg,dest,postfix,opsize);
  1809. end;
  1810. { copy the leftovers after an unrolled or regular copy loop }
  1811. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  1812. begin
  1813. { stop post-indexing if we did so in the loop, since in that case all
  1814. offsets definitely can be represented now }
  1815. if source.addressmode=AM_POSTINDEXED then
  1816. begin
  1817. source.addressmode:=AM_OFFSET;
  1818. source.offset:=0;
  1819. end;
  1820. if dest.addressmode=AM_POSTINDEXED then
  1821. begin
  1822. dest.addressmode:=AM_OFFSET;
  1823. dest.offset:=0;
  1824. end;
  1825. { transfer the leftovers }
  1826. if len>=8 then
  1827. begin
  1828. dec(len,8);
  1829. gencopy(list,source,dest,PF_NONE,OS_64);
  1830. end;
  1831. if len>=4 then
  1832. begin
  1833. dec(len,4);
  1834. gencopy(list,source,dest,PF_NONE,OS_32);
  1835. end;
  1836. if len>=2 then
  1837. begin
  1838. dec(len,2);
  1839. gencopy(list,source,dest,PF_H,OS_16);
  1840. end;
  1841. if len>=1 then
  1842. begin
  1843. dec(len);
  1844. gencopy(list,source,dest,PF_B,OS_8);
  1845. end;
  1846. end;
  1847. const
  1848. { load_length + loop dec + cbnz }
  1849. loopoverhead=12;
  1850. { loop overhead + load + store }
  1851. totallooplen=loopoverhead + 8;
  1852. var
  1853. totalalign: longint;
  1854. maxlenunrolled: tcgint;
  1855. loadop, storeop: tasmop;
  1856. opsize: tcgsize;
  1857. postfix: toppostfix;
  1858. tmpsource, tmpdest: treference;
  1859. scaledstoreop, unscaledstoreop,
  1860. scaledloadop, unscaledloadop: tasmop;
  1861. regs: array[1..8] of tregister;
  1862. countreg: tregister;
  1863. i, regcount: longint;
  1864. hl: tasmlabel;
  1865. simplifysource, simplifydest: boolean;
  1866. begin
  1867. if len=0 then
  1868. exit;
  1869. sourcebasereplaced:=false;
  1870. destbasereplaced:=false;
  1871. { maximum common alignment }
  1872. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  1873. { use a simple load/store? }
  1874. if (len in [1,2,4,8]) and
  1875. ((totalalign>=(len div 2)) or
  1876. (source.alignment=len) or
  1877. (dest.alignment=len)) then
  1878. begin
  1879. opsize:=int_cgsize(len);
  1880. a_load_ref_ref(list,opsize,opsize,source,dest);
  1881. exit;
  1882. end;
  1883. { alignment > length is not useful, and would break some checks below }
  1884. while totalalign>len do
  1885. totalalign:=totalalign div 2;
  1886. { operation sizes to use based on common alignment }
  1887. case totalalign of
  1888. 1:
  1889. begin
  1890. postfix:=PF_B;
  1891. opsize:=OS_8;
  1892. end;
  1893. 2:
  1894. begin
  1895. postfix:=PF_H;
  1896. opsize:=OS_16;
  1897. end;
  1898. 4:
  1899. begin
  1900. postfix:=PF_None;
  1901. opsize:=OS_32;
  1902. end
  1903. else
  1904. begin
  1905. totalalign:=8;
  1906. postfix:=PF_None;
  1907. opsize:=OS_64;
  1908. end;
  1909. end;
  1910. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  1911. maxlenunrolled:=min(totalalign,8)*4;
  1912. { ldp/stp -> 2 registers per instruction }
  1913. if (totalalign>=4) and
  1914. (len>=totalalign*2) then
  1915. begin
  1916. maxlenunrolled:=maxlenunrolled*2;
  1917. scaledstoreop:=A_STP;
  1918. scaledloadop:=A_LDP;
  1919. unscaledstoreop:=A_NONE;
  1920. unscaledloadop:=A_NONE;
  1921. end
  1922. else
  1923. begin
  1924. scaledstoreop:=A_STR;
  1925. scaledloadop:=A_LDR;
  1926. unscaledstoreop:=A_STUR;
  1927. unscaledloadop:=A_LDUR;
  1928. end;
  1929. { we only need 4 instructions extra to call FPC_MOVE }
  1930. if cs_opt_size in current_settings.optimizerswitches then
  1931. maxlenunrolled:=maxlenunrolled div 2;
  1932. if (len>maxlenunrolled) and
  1933. (len>totalalign*8) then
  1934. begin
  1935. g_concatcopy_move(list,source,dest,len);
  1936. exit;
  1937. end;
  1938. simplifysource:=true;
  1939. simplifydest:=true;
  1940. tmpsource:=source;
  1941. tmpdest:=dest;
  1942. { can we directly encode all offsets in an unrolled loop? }
  1943. if len<=maxlenunrolled then
  1944. begin
  1945. {$ifdef extdebug}
  1946. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  1947. {$endif extdebug}
  1948. { the leftovers will be handled separately -> -(len mod opsize) }
  1949. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  1950. { additionally, the last regular load/store will be at
  1951. offset+len-opsize (if len-(len mod opsize)>len) }
  1952. if tmpsource.offset>source.offset then
  1953. dec(tmpsource.offset,tcgsize2size[opsize]);
  1954. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  1955. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  1956. if tmpdest.offset>dest.offset then
  1957. dec(tmpdest.offset,tcgsize2size[opsize]);
  1958. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  1959. tmpsource:=source;
  1960. tmpdest:=dest;
  1961. { if we can't directly encode all offsets, simplify }
  1962. if simplifysource then
  1963. begin
  1964. loadop:=scaledloadop;
  1965. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  1966. end;
  1967. if simplifydest then
  1968. begin
  1969. storeop:=scaledstoreop;
  1970. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  1971. end;
  1972. regcount:=len div tcgsize2size[opsize];
  1973. { in case we transfer two registers at a time, we copy an even
  1974. number of registers }
  1975. if loadop=A_LDP then
  1976. regcount:=regcount and not(1);
  1977. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  1978. for i:=1 to regcount do
  1979. regs[i]:=getintregister(list,opsize);
  1980. if loadop=A_LDP then
  1981. begin
  1982. { load registers }
  1983. for i:=1 to (regcount div 2) do
  1984. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  1985. { store registers }
  1986. for i:=1 to (regcount div 2) do
  1987. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  1988. end
  1989. else
  1990. begin
  1991. for i:=1 to regcount do
  1992. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  1993. for i:=1 to regcount do
  1994. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  1995. end;
  1996. { leftover }
  1997. len:=len-regcount*tcgsize2size[opsize];
  1998. {$ifdef extdebug}
  1999. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2000. {$endif extdebug}
  2001. end
  2002. else
  2003. begin
  2004. {$ifdef extdebug}
  2005. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2006. {$endif extdebug}
  2007. { regular loop -> definitely use post-indexing }
  2008. loadop:=scaledloadop;
  2009. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2010. storeop:=scaledstoreop;
  2011. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2012. current_asmdata.getjumplabel(hl);
  2013. countreg:=getintregister(list,OS_32);
  2014. if loadop=A_LDP then
  2015. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
  2016. else
  2017. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2018. a_label(list,hl);
  2019. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2020. if loadop=A_LDP then
  2021. begin
  2022. regs[1]:=getintregister(list,opsize);
  2023. regs[2]:=getintregister(list,opsize);
  2024. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2025. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2026. end
  2027. else
  2028. begin
  2029. regs[1]:=getintregister(list,opsize);
  2030. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2031. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2032. end;
  2033. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2034. len:=len mod tcgsize2size[opsize];
  2035. end;
  2036. gencopyleftovers(list,tmpsource,tmpdest,len);
  2037. end;
  2038. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2039. begin
  2040. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2041. InternalError(2013020102);
  2042. end;
  2043. procedure tcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
  2044. var
  2045. make_global: boolean;
  2046. href: treference;
  2047. hsym: tsym;
  2048. paraloc: pcgparalocation;
  2049. op: tasmop;
  2050. begin
  2051. if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
  2052. Internalerror(200006137);
  2053. if not assigned(procdef.struct) or
  2054. (procdef.procoptions*[po_classmethod, po_staticmethod,
  2055. po_methodpointer, po_interrupt, po_iocheck]<>[]) then
  2056. Internalerror(200006138);
  2057. if procdef.owner.symtabletype<>ObjectSymtable then
  2058. Internalerror(200109191);
  2059. make_global:=false;
  2060. if (not current_module.is_unit) or create_smartlink_library or
  2061. (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
  2062. make_global:=true;
  2063. if make_global then
  2064. list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
  2065. else
  2066. list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
  2067. { set param1 interface to self }
  2068. procdef.init_paraloc_info(callerside);
  2069. hsym:=tsym(procdef.parast.Find('self'));
  2070. if not(assigned(hsym) and
  2071. (hsym.typ=paravarsym)) then
  2072. internalerror(2010103101);
  2073. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  2074. if assigned(paraloc^.next) then
  2075. InternalError(2013020101);
  2076. case paraloc^.loc of
  2077. LOC_REGISTER:
  2078. handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
  2079. else
  2080. internalerror(2010103102);
  2081. end;
  2082. if (po_virtualmethod in procdef.procoptions) and
  2083. not is_objectpascal_helper(procdef.struct) then
  2084. begin
  2085. if (procdef.extnumber=$ffff) then
  2086. Internalerror(200006139);
  2087. { mov 0(%rdi),%rax ; load vmt}
  2088. reference_reset_base(href,paraloc^.register,0,sizeof(pint));
  2089. getcpuregister(list,NR_IP0);
  2090. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_IP0);
  2091. { jmp *vmtoffs(%eax) ; method offs }
  2092. reference_reset_base(href,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
  2093. op:=A_LDR;
  2094. make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
  2095. list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
  2096. ungetcpuregister(list,NR_IP0);
  2097. list.concat(taicpu.op_reg(A_BR,NR_IP0));
  2098. end
  2099. else
  2100. a_jmp_name(list,procdef.mangledname);
  2101. list.concat(Tai_symbol_end.Createname(labelname));
  2102. end;
  2103. procedure create_codegen;
  2104. begin
  2105. cg:=tcgaarch64.Create;
  2106. cg128:=tcg128.Create;
  2107. end;
  2108. end.