cgcpu.pas 96 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for Xtensa
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu,
  27. cg64f32;
  28. type
  29. tcgcpu=class(tcg)
  30. public
  31. procedure init_register_allocators;override;
  32. procedure done_register_allocators;override;
  33. { move instructions }
  34. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  35. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  36. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  37. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  38. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  39. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  40. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  41. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  42. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  43. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  44. procedure a_jmp_name(list: TAsmList; const s: string);override;
  45. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  46. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  47. { comparison operations }
  48. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  49. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  50. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  51. {$ifdef dummy}
  52. protected
  53. { changes register size without adding register allocation info }
  54. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  55. public
  56. { simplifies "ref" so it can be used with "op". If "ref" can be used
  57. with a different load/Store operation that has the same meaning as the
  58. original one, "op" will be replaced with the alternative }
  59. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  60. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  61. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  62. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  63. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  64. { General purpose instructions }
  65. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  66. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  67. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  68. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  69. { move instructions }
  70. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  71. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  72. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  73. { fpu move instructions (not used, all floating point is vector unit-based) }
  74. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  75. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  76. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  77. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  78. { comparison operations }
  79. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  80. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  81. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  82. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  83. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  84. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  85. procedure g_maybe_got_init(list: TAsmList); override;
  86. procedure g_restore_registers(list: TAsmList);override;
  87. procedure g_save_registers(list: TAsmList);override;
  88. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  89. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  90. procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
  91. procedure g_profilecode(list: TAsmList);override;
  92. private
  93. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  94. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  95. {$endif dummy}
  96. end;
  97. tcg64fxtensa = class(tcg64f32)
  98. procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
  99. procedure a_op64_const_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64);override;
  100. procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
  101. //procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  102. //procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
  103. //procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  104. //procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
  105. //procedure a_loadmm_intreg64_reg(list: TAsmList; mmsize: tcgsize; intreg: tregister64; mmreg: tregister);override;
  106. //procedure a_loadmm_reg_intreg64(list: TAsmList; mmsize: tcgsize; mmreg: tregister; intreg: tregister64);override;
  107. end;
  108. procedure create_codegen;
  109. {
  110. const
  111. TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  112. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  113. );
  114. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  115. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  116. );
  117. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  118. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  119. );
  120. }
  121. implementation
  122. uses
  123. globals,verbose,systems,cutils,
  124. paramgr,fmodule,
  125. symtable,symsym,
  126. tgobj,
  127. procinfo,cpupi;
  128. procedure tcgcpu.init_register_allocators;
  129. begin
  130. inherited init_register_allocators;
  131. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  132. [RS_A2,RS_A3,RS_A4,RS_A5,RS_A6,RS_A7,RS_A8,RS_A9,
  133. RS_A10,RS_A11,RS_A12,RS_A13,RS_A14,RS_A15],first_int_imreg,[]);
  134. rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
  135. [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7,RS_F8,RS_F9,
  136. RS_F10,RS_F11,RS_F12,RS_F13,RS_F14,RS_F15],first_fpu_imreg,[]);
  137. end;
  138. procedure tcgcpu.done_register_allocators;
  139. begin
  140. rg[R_INTREGISTER].free;
  141. rg[R_FPUREGISTER].free;
  142. inherited done_register_allocators;
  143. end;
  144. procedure tcgcpu.a_load_reg_reg(list : TAsmList; fromsize,tosize : tcgsize;
  145. reg1,reg2 : tregister);
  146. begin
  147. list.Concat(taicpu.op_none(A_NOP));
  148. end;
  149. procedure tcgcpu.a_load_reg_ref(list : TAsmList; fromsize,tosize : tcgsize;
  150. reg : tregister; const ref : TReference);
  151. begin
  152. list.Concat(taicpu.op_none(A_NOP));
  153. end;
  154. procedure tcgcpu.a_load_ref_reg(list : TAsmList; fromsize,tosize : tcgsize;
  155. const ref : TReference; reg : tregister);
  156. begin
  157. list.Concat(taicpu.op_none(A_NOP));
  158. end;
  159. procedure tcgcpu.a_load_const_reg(list : TAsmList; size : tcgsize;
  160. a : tcgint; reg : tregister);
  161. begin
  162. list.Concat(taicpu.op_none(A_NOP));
  163. end;
  164. procedure tcgcpu.a_loadaddr_ref_reg(list : TAsmList;
  165. const ref : TReference; r : tregister);
  166. begin
  167. list.Concat(taicpu.op_none(A_NOP));
  168. end;
  169. procedure tcgcpu.a_op_reg_reg(list : TAsmList; op : topcg; size : tcgsize;
  170. src,dst : tregister);
  171. begin
  172. list.Concat(taicpu.op_none(A_NOP));
  173. end;
  174. procedure tcgcpu.a_op_const_reg(list : TAsmList; op : topcg;
  175. size : tcgsize; a : tcgint; reg : tregister);
  176. begin
  177. list.Concat(taicpu.op_none(A_NOP));
  178. end;
  179. procedure tcgcpu.a_op_reg_reg_reg(list : TAsmList; op : topcg;
  180. size : tcgsize; src1,src2,dst : tregister);
  181. begin
  182. list.Concat(taicpu.op_none(A_NOP));
  183. end;
  184. procedure tcgcpu.a_call_name(list : TAsmList; const s : string;
  185. weak : boolean);
  186. begin
  187. list.Concat(taicpu.op_none(A_NOP));
  188. end;
  189. procedure tcgcpu.a_call_reg(list : TAsmList; Reg : tregister);
  190. begin
  191. list.Concat(taicpu.op_none(A_NOP));
  192. end;
  193. procedure tcgcpu.a_jmp_name(list : TAsmList; const s : string);
  194. begin
  195. list.Concat(taicpu.op_none(A_NOP));
  196. end;
  197. procedure tcgcpu.g_proc_entry(list : TAsmList; localsize : longint;
  198. nostackframe : boolean);
  199. begin
  200. list.Concat(taicpu.op_reg_const(A_ENTRY,NR_STACK_POINTER_REG,32));
  201. end;
  202. procedure tcgcpu.g_proc_exit(list : TAsmList; parasize : longint;
  203. nostackframe : boolean);
  204. begin
  205. list.Concat(taicpu.op_none(A_RETW));
  206. end;
  207. procedure tcgcpu.a_cmp_reg_reg_label(list : TAsmList; size : tcgsize;
  208. cmp_op : topcmp; reg1,reg2 : tregister; l : tasmlabel);
  209. begin
  210. list.Concat(taicpu.op_none(A_NOP));
  211. end;
  212. procedure tcgcpu.a_jmp_always(list : TAsmList; l : TAsmLabel);
  213. begin
  214. list.Concat(taicpu.op_none(A_NOP));
  215. end;
  216. procedure tcgcpu.g_concatcopy(list : TAsmList; const source,
  217. dest : treference; len : tcgint);
  218. begin
  219. list.Concat(taicpu.op_none(A_NOP));
  220. end;
  221. procedure tcg64fxtensa.a_op64_reg_reg(list : TAsmList; op : TOpCG;
  222. size : tcgsize; regsrc,regdst : tregister64);
  223. begin
  224. list.Concat(taicpu.op_none(A_NOP));
  225. end;
  226. procedure tcg64fxtensa.a_op64_const_reg_reg(list : TAsmList; op : TOpCG;
  227. size : tcgsize; value : int64; regsrc,regdst : tregister64);
  228. begin
  229. list.Concat(taicpu.op_none(A_NOP));
  230. end;
  231. procedure tcg64fxtensa.a_op64_const_reg(list : TAsmList; op : TOpCG;
  232. size : tcgsize; value : int64; reg : tregister64);
  233. begin
  234. list.Concat(taicpu.op_none(A_NOP));
  235. end;
  236. {$ifdef dummy}
  237. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  238. var
  239. href: treference;
  240. so: tshifterop;
  241. accesssize: longint;
  242. begin
  243. if (ref.base=NR_NO) then
  244. begin
  245. if ref.shiftmode<>SM_None then
  246. internalerror(2014110701);
  247. ref.base:=ref.index;
  248. ref.index:=NR_NO;
  249. end;
  250. { no abitrary scale factor support (the generic code doesn't set it,
  251. AArch-specific code shouldn't either) }
  252. if not(ref.scalefactor in [0,1]) then
  253. internalerror(2014111002);
  254. case simple_ref_type(op,size,oppostfix,ref) of
  255. sr_simple:
  256. exit;
  257. sr_internal_illegal:
  258. internalerror(2014121702);
  259. sr_complex:
  260. { continue } ;
  261. end;
  262. if assigned(ref.symbol) then
  263. begin
  264. { internal "load symbol" instructions should already be valid }
  265. if assigned(ref.symboldata) or
  266. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  267. internalerror(2014110802);
  268. { no relative symbol support (needed) yet }
  269. if assigned(ref.relsymbol) then
  270. internalerror(2014111001);
  271. { loading a symbol address (whether it's in the GOT or not) consists
  272. of two parts: first load the page on which it is located, then
  273. either the offset in the page or load the value at that offset in
  274. the page. This final GOT-load can be relaxed by the linker in case
  275. the variable itself can be stored directly in the GOT }
  276. if (preferred_newbasereg=NR_NO) or
  277. (ref.base=preferred_newbasereg) or
  278. (ref.index=preferred_newbasereg) then
  279. preferred_newbasereg:=getaddressregister(list);
  280. { load the (GOT) page }
  281. reference_reset_symbol(href,ref.symbol,0,8,[]);
  282. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  283. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  284. ((ref.symbol.typ=AT_DATA) and
  285. (ref.symbol.bind=AB_LOCAL)) then
  286. href.refaddr:=addr_page
  287. else
  288. href.refaddr:=addr_gotpage;
  289. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  290. { load the GOT entry (= address of the variable) }
  291. reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
  292. href.symbol:=ref.symbol;
  293. { code symbols defined in the current compilation unit do not
  294. have to be accessed via the GOT }
  295. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  296. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  297. ((ref.symbol.typ=AT_DATA) and
  298. (ref.symbol.bind=AB_LOCAL)) then
  299. begin
  300. href.base:=NR_NO;
  301. href.refaddr:=addr_pageoffset;
  302. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  303. end
  304. else
  305. begin
  306. href.refaddr:=addr_gotpageoffset;
  307. { use a_load_ref_reg() rather than directly encoding the LDR,
  308. so that we'll check the validity of the reference }
  309. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  310. end;
  311. { set as new base register }
  312. if ref.base=NR_NO then
  313. ref.base:=preferred_newbasereg
  314. else if ref.index=NR_NO then
  315. ref.index:=preferred_newbasereg
  316. else
  317. begin
  318. { make sure it's valid in case ref.base is SP -> make it
  319. the second operand}
  320. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  321. ref.base:=preferred_newbasereg
  322. end;
  323. ref.symbol:=nil;
  324. end;
  325. { base & index }
  326. if (ref.base<>NR_NO) and
  327. (ref.index<>NR_NO) then
  328. begin
  329. case op of
  330. A_LDR, A_STR:
  331. begin
  332. if (ref.shiftmode=SM_None) and
  333. (ref.shiftimm<>0) then
  334. internalerror(2014110805);
  335. { wrong shift? (possible in case of something like
  336. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  337. the final load is a 1 byte -> can't use shift after all }
  338. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  339. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  340. (ref.offset<>0)) then
  341. begin
  342. if preferred_newbasereg=NR_NO then
  343. preferred_newbasereg:=getaddressregister(list);
  344. { "add" supports a superset of the shift modes supported by
  345. load/store instructions }
  346. shifterop_reset(so);
  347. so.shiftmode:=ref.shiftmode;
  348. so.shiftimm:=ref.shiftimm;
  349. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  350. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  351. { possibly still an invalid offset -> fall through }
  352. end
  353. else if ref.offset<>0 then
  354. begin
  355. if (preferred_newbasereg=NR_NO) or
  356. { we keep ref.index, so it must not be overwritten }
  357. (ref.index=preferred_newbasereg) then
  358. preferred_newbasereg:=getaddressregister(list);
  359. { add to the base and not to the index, because the index
  360. may be scaled; this works even if the base is SP }
  361. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  362. ref.offset:=0;
  363. ref.base:=preferred_newbasereg;
  364. { finished }
  365. exit;
  366. end
  367. else
  368. { valid -> exit }
  369. exit;
  370. end;
  371. { todo }
  372. A_LD1,A_LD2,A_LD3,A_LD4,
  373. A_ST1,A_ST2,A_ST3,A_ST4:
  374. internalerror(2014110704);
  375. { these don't support base+index }
  376. A_LDUR,A_STUR,
  377. A_LDP,A_STP:
  378. begin
  379. { these either don't support pre-/post-indexing, or don't
  380. support it with base+index }
  381. if ref.addressmode<>AM_OFFSET then
  382. internalerror(2014110911);
  383. if preferred_newbasereg=NR_NO then
  384. preferred_newbasereg:=getaddressregister(list);
  385. if ref.shiftmode<>SM_None then
  386. begin
  387. { "add" supports a superset of the shift modes supported by
  388. load/store instructions }
  389. shifterop_reset(so);
  390. so.shiftmode:=ref.shiftmode;
  391. so.shiftimm:=ref.shiftimm;
  392. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  393. end
  394. else
  395. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  396. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  397. { fall through to the handling of base + offset, since the
  398. offset may still be too big }
  399. end;
  400. else
  401. internalerror(2014110901);
  402. end;
  403. end;
  404. { base + offset }
  405. if ref.base<>NR_NO then
  406. begin
  407. { valid offset for LDUR/STUR -> use that }
  408. if (ref.addressmode=AM_OFFSET) and
  409. (op in [A_LDR,A_STR]) and
  410. (ref.offset>=-256) and
  411. (ref.offset<=255) then
  412. begin
  413. if op=A_LDR then
  414. op:=A_LDUR
  415. else
  416. op:=A_STUR
  417. end
  418. { if it's not a valid LDUR/STUR, use LDR/STR }
  419. else if (op in [A_LDUR,A_STUR]) and
  420. ((ref.offset<-256) or
  421. (ref.offset>255) or
  422. (ref.addressmode<>AM_OFFSET)) then
  423. begin
  424. if op=A_LDUR then
  425. op:=A_LDR
  426. else
  427. op:=A_STR
  428. end;
  429. case op of
  430. A_LDR,A_STR:
  431. begin
  432. case ref.addressmode of
  433. AM_PREINDEXED:
  434. begin
  435. { since the loaded/stored register cannot be the same
  436. as the base register, we can safely add the
  437. offset to the base if it doesn't fit}
  438. if (ref.offset<-256) or
  439. (ref.offset>255) then
  440. begin
  441. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  442. ref.offset:=0;
  443. end;
  444. end;
  445. AM_POSTINDEXED:
  446. begin
  447. { cannot emulate post-indexing if we have to fold the
  448. offset into the base register }
  449. if (ref.offset<-256) or
  450. (ref.offset>255) then
  451. internalerror(2014110909);
  452. { ok }
  453. end;
  454. AM_OFFSET:
  455. begin
  456. { unsupported offset -> fold into base register }
  457. accesssize:=1 shl tcgsizep2size[size];
  458. if (ref.offset<0) or
  459. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  460. ((ref.offset mod accesssize)<>0) then
  461. begin
  462. if preferred_newbasereg=NR_NO then
  463. preferred_newbasereg:=getaddressregister(list);
  464. { can we split the offset beween an
  465. "add/sub (imm12 shl 12)" and the load (also an
  466. imm12)?
  467. -- the offset from the load will always be added,
  468. that's why the lower bound has a smaller range
  469. than the upper bound; it must also be a multiple
  470. of the access size }
  471. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  472. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  473. ((ref.offset mod accesssize)=0) then
  474. begin
  475. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  476. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  477. end
  478. else
  479. begin
  480. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  481. ref.offset:=0;
  482. end;
  483. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  484. end;
  485. end
  486. end;
  487. end;
  488. A_LDP,A_STP:
  489. begin
  490. { unsupported offset -> fold into base register (these
  491. instructions support all addressmodes) }
  492. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  493. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  494. begin
  495. case ref.addressmode of
  496. AM_POSTINDEXED:
  497. { don't emulate post-indexing if we have to fold the
  498. offset into the base register }
  499. internalerror(2014110910);
  500. AM_PREINDEXED:
  501. { this means the offset must be added to the current
  502. base register }
  503. preferred_newbasereg:=ref.base;
  504. AM_OFFSET:
  505. if preferred_newbasereg=NR_NO then
  506. preferred_newbasereg:=getaddressregister(list);
  507. end;
  508. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  509. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
  510. end
  511. end;
  512. A_LDUR,A_STUR:
  513. begin
  514. { valid, checked above }
  515. end;
  516. { todo }
  517. A_LD1,A_LD2,A_LD3,A_LD4,
  518. A_ST1,A_ST2,A_ST3,A_ST4:
  519. internalerror(2014110908);
  520. else
  521. internalerror(2014110708);
  522. end;
  523. { done }
  524. exit;
  525. end;
  526. { only an offset -> change to base (+ offset 0) }
  527. if preferred_newbasereg=NR_NO then
  528. preferred_newbasereg:=getaddressregister(list);
  529. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  530. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
  531. end;
  532. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  533. var
  534. subreg:Tsubregister;
  535. begin
  536. subreg:=cgsize2subreg(getregtype(reg),size);
  537. result:=reg;
  538. setsubreg(result,subreg);
  539. end;
  540. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  541. begin
  542. internalerror(2014122110);
  543. { squash warning }
  544. result:=NR_NO;
  545. end;
  546. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  547. begin
  548. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  549. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  550. result:=ref;
  551. end;
  552. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  553. var
  554. instr: taicpu;
  555. so: tshifterop;
  556. hadtmpreg: boolean;
  557. begin
  558. { imm12 }
  559. if (a>=0) and
  560. (a<=((1 shl 12)-1)) then
  561. if usedest then
  562. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  563. else
  564. instr:=taicpu.op_reg_const(op,src,a)
  565. { imm12 lsl 12 }
  566. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  567. begin
  568. so.shiftmode:=SM_LSL;
  569. so.shiftimm:=12;
  570. if usedest then
  571. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  572. else
  573. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  574. end
  575. else
  576. begin
  577. { todo: other possible optimizations (e.g. load 16 bit constant in
  578. register and then add/sub/cmp/cmn shifted the rest) }
  579. if tmpreg=NR_NO then
  580. begin
  581. hadtmpreg:=false;
  582. tmpreg:=getintregister(list,size);
  583. end
  584. else
  585. begin
  586. hadtmpreg:=true;
  587. getcpuregister(list,tmpreg);
  588. end;
  589. a_load_const_reg(list,size,a,tmpreg);
  590. if usedest then
  591. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  592. else
  593. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  594. if hadtmpreg then
  595. ungetcpuregister(list,tmpreg);
  596. end;
  597. if setflags then
  598. setoppostfix(instr,PF_S);
  599. list.concat(instr);
  600. end;
  601. {****************************************************************************
  602. Assembler code
  603. ****************************************************************************}
  604. procedure tcgaarch64.init_register_allocators;
  605. begin
  606. inherited init_register_allocators;
  607. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  608. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  609. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  610. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  611. { maybe we can enable this in the future for leaf functions (it's
  612. the frame pointer)
  613. ,RS_X29 }],
  614. first_int_imreg,[]);
  615. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  616. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  617. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  618. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  619. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  620. first_mm_imreg,[]);
  621. end;
  622. procedure tcgaarch64.done_register_allocators;
  623. begin
  624. rg[R_INTREGISTER].free;
  625. rg[R_FPUREGISTER].free;
  626. rg[R_MMREGISTER].free;
  627. inherited done_register_allocators;
  628. end;
  629. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  630. begin
  631. case size of
  632. OS_F32:
  633. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  634. OS_F64:
  635. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  636. else
  637. internalerror(2014102701);
  638. end;
  639. end;
  640. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  641. begin
  642. if not weak then
  643. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
  644. else
  645. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
  646. end;
  647. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  648. begin
  649. list.concat(taicpu.op_reg(A_BLR,reg));
  650. end;
  651. {********************** load instructions ********************}
  652. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  653. var
  654. preva: tcgint;
  655. opc: tasmop;
  656. shift,maxshift: byte;
  657. so: tshifterop;
  658. reginited: boolean;
  659. mask: tcgint;
  660. begin
  661. { if we load a value into a 32 bit register, it is automatically
  662. zero-extended to 64 bit }
  663. if (hi(a)=0) and
  664. (size in [OS_64,OS_S64]) then
  665. begin
  666. size:=OS_32;
  667. reg:=makeregsize(reg,size);
  668. end;
  669. { values <= 32 bit are stored in a 32 bit register }
  670. if not(size in [OS_64,OS_S64]) then
  671. a:=cardinal(a);
  672. if size in [OS_64,OS_S64] then
  673. begin
  674. mask:=-1;
  675. maxshift:=64;
  676. end
  677. else
  678. begin
  679. mask:=$ffffffff;
  680. maxshift:=32;
  681. end;
  682. { single movn enough? (to be extended) }
  683. shift:=16;
  684. preva:=a;
  685. repeat
  686. if (a shr shift)=(mask shr shift) then
  687. begin
  688. if shift=16 then
  689. list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
  690. else
  691. begin
  692. shifterop_reset(so);
  693. so.shiftmode:=SM_LSL;
  694. so.shiftimm:=shift-16;
  695. list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
  696. end;
  697. exit;
  698. end;
  699. { only try the next 16 bits if the current one is all 1 bits, since
  700. the movn will set all lower bits to 1 }
  701. if word(a shr (shift-16))<>$ffff then
  702. break;
  703. inc(shift,16);
  704. until shift=maxshift;
  705. reginited:=false;
  706. shift:=0;
  707. { can be optimized later to use more movn }
  708. repeat
  709. { leftover is shifterconst? (don't check if we can represent it just
  710. as effectively with movz/movk, as this check is expensive) }
  711. if ((shift<tcgsize2size[size]*(8 div 2)) and
  712. (word(a)<>0) and
  713. ((a shr 16)<>0)) and
  714. is_shifter_const(a shl shift,size) then
  715. begin
  716. if reginited then
  717. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  718. else
  719. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  720. exit;
  721. end;
  722. { set all 16 bit parts <> 0 }
  723. if (word(a)<>0) or
  724. ((shift=0) and
  725. (a=0)) then
  726. if shift=0 then
  727. begin
  728. list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
  729. reginited:=true;
  730. end
  731. else
  732. begin
  733. shifterop_reset(so);
  734. so.shiftmode:=SM_LSL;
  735. so.shiftimm:=shift;
  736. if not reginited then
  737. begin
  738. opc:=A_MOVZ;
  739. reginited:=true;
  740. end
  741. else
  742. opc:=A_MOVK;
  743. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  744. end;
  745. preva:=a;
  746. a:=a shr 16;
  747. inc(shift,16);
  748. until word(preva)=preva;
  749. if not reginited then
  750. internalerror(2014102702);
  751. end;
  752. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  753. var
  754. reg: tregister;
  755. begin
  756. { use the zero register if possible }
  757. if a=0 then
  758. begin
  759. if size in [OS_64,OS_S64] then
  760. reg:=NR_XZR
  761. else
  762. reg:=NR_WZR;
  763. a_load_reg_ref(list,size,size,reg,ref);
  764. end
  765. else
  766. inherited;
  767. end;
  768. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  769. var
  770. oppostfix:toppostfix;
  771. hreg: tregister;
  772. begin
  773. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  774. begin
  775. fromsize:=tosize;
  776. reg:=makeregsize(list,reg,fromsize);
  777. end
  778. { have a 32 bit register but need a 64 bit one? }
  779. else if tosize in [OS_64,OS_S64] then
  780. begin
  781. { sign extend if necessary }
  782. if fromsize in [OS_S8,OS_S16,OS_S32] then
  783. begin
  784. { can't overwrite reg, may be a constant reg }
  785. hreg:=getintregister(list,tosize);
  786. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  787. reg:=hreg;
  788. end
  789. else
  790. { top 32 bit are zero by default }
  791. reg:=makeregsize(reg,OS_64);
  792. fromsize:=tosize;
  793. end;
  794. if (ref.alignment<>0) and
  795. (ref.alignment<tcgsize2size[tosize]) then
  796. begin
  797. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  798. end
  799. else
  800. begin
  801. case tosize of
  802. { signed integer registers }
  803. OS_8,
  804. OS_S8:
  805. oppostfix:=PF_B;
  806. OS_16,
  807. OS_S16:
  808. oppostfix:=PF_H;
  809. OS_32,
  810. OS_S32,
  811. OS_64,
  812. OS_S64:
  813. oppostfix:=PF_None;
  814. else
  815. InternalError(200308299);
  816. end;
  817. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  818. end;
  819. end;
  820. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  821. var
  822. oppostfix:toppostfix;
  823. begin
  824. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  825. fromsize:=tosize;
  826. { ensure that all bits of the 32/64 register are always correctly set:
  827. * default behaviour is always to zero-extend to the entire (64 bit)
  828. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  829. target register, as the upper 32 bit will be zeroed implicitly
  830. -> always make target register 32 bit
  831. * signed loads exist both with 32 and 64 bit target registers,
  832. depending on whether the value should be sign extended to 32 or
  833. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  834. corresponding 64 bit register are again zeroed) -> no need to
  835. change anything (we only have 32 and 64 bit registers), except that
  836. when loading an OS_S32 to a 32 bit register, we don't need/can't
  837. use sign extension
  838. }
  839. if fromsize in [OS_8,OS_16,OS_32] then
  840. reg:=makeregsize(reg,OS_32);
  841. if (ref.alignment<>0) and
  842. (ref.alignment<tcgsize2size[fromsize]) then
  843. begin
  844. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  845. exit;
  846. end;
  847. case fromsize of
  848. { signed integer registers }
  849. OS_8:
  850. oppostfix:=PF_B;
  851. OS_S8:
  852. oppostfix:=PF_SB;
  853. OS_16:
  854. oppostfix:=PF_H;
  855. OS_S16:
  856. oppostfix:=PF_SH;
  857. OS_S32:
  858. if getsubreg(reg)=R_SUBD then
  859. oppostfix:=PF_NONE
  860. else
  861. oppostfix:=PF_SW;
  862. OS_32,
  863. OS_64,
  864. OS_S64:
  865. oppostfix:=PF_None;
  866. else
  867. InternalError(200308297);
  868. end;
  869. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  870. { clear upper 16 bits if the value was negative }
  871. if (fromsize=OS_S8) and (tosize=OS_16) then
  872. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  873. end;
  874. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  875. var
  876. href: treference;
  877. hreg1, hreg2, tmpreg,tmpreg2: tregister;
  878. i : Integer;
  879. begin
  880. case fromsize of
  881. OS_64,OS_S64:
  882. begin
  883. { split into two 32 bit loads }
  884. hreg1:=getintregister(list,OS_32);
  885. hreg2:=getintregister(list,OS_32);
  886. if target_info.endian=endian_big then
  887. begin
  888. tmpreg:=hreg1;
  889. hreg1:=hreg2;
  890. hreg2:=tmpreg;
  891. end;
  892. { can we use LDP? }
  893. if (ref.alignment=4) and
  894. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  895. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  896. else
  897. begin
  898. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  899. href:=ref;
  900. inc(href.offset,4);
  901. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  902. end;
  903. a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
  904. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  905. end;
  906. OS_16,OS_S16,
  907. OS_32,OS_S32:
  908. begin
  909. if ref.alignment=2 then
  910. begin
  911. href:=ref;
  912. if target_info.endian=endian_big then
  913. inc(href.offset,tcgsize2size[fromsize]-2);
  914. tmpreg:=getintregister(list,OS_32);
  915. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
  916. tmpreg2:=getintregister(list,OS_32);
  917. for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
  918. begin
  919. if target_info.endian=endian_big then
  920. dec(href.offset,2)
  921. else
  922. inc(href.offset,2);
  923. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
  924. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
  925. end;
  926. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  927. end
  928. else
  929. begin
  930. href:=ref;
  931. if target_info.endian=endian_big then
  932. inc(href.offset,tcgsize2size[fromsize]-1);
  933. tmpreg:=getintregister(list,OS_32);
  934. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
  935. tmpreg2:=getintregister(list,OS_32);
  936. for i:=1 to tcgsize2size[fromsize]-1 do
  937. begin
  938. if target_info.endian=endian_big then
  939. dec(href.offset)
  940. else
  941. inc(href.offset);
  942. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
  943. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
  944. end;
  945. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  946. end;
  947. end;
  948. else
  949. inherited;
  950. end;
  951. end;
  952. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  953. var
  954. instr: taicpu;
  955. begin
  956. { we use both 32 and 64 bit registers -> insert conversion when when
  957. we have to truncate/sign extend inside the (32 or 64 bit) register
  958. holding the value, and when we sign extend from a 32 to a 64 bit
  959. register }
  960. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  961. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  962. (fromsize<>tosize) and
  963. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  964. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  965. (tosize in [OS_64,OS_S64])) or
  966. { needs to mask out the sign in the top 16 bits }
  967. ((fromsize=OS_S8) and
  968. (tosize=OS_16)) then
  969. begin
  970. case tosize of
  971. OS_8:
  972. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  973. OS_16:
  974. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  975. OS_S8:
  976. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  977. OS_S16:
  978. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  979. { while "mov wN, wM" automatically inserts a zero-extension and
  980. hence we could encode a 64->32 bit move like that, the problem
  981. is that we then can't distinguish 64->32 from 32->32 moves, and
  982. the 64->32 truncation could be removed altogether... So use a
  983. different instruction }
  984. OS_32,
  985. OS_S32:
  986. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  987. but because of the way location_force_register() tries to
  988. avoid superfluous zero/sign extensions, it's not always the
  989. case -> also force reg1 to to 64 bit }
  990. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  991. OS_64,
  992. OS_S64:
  993. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
  994. else
  995. internalerror(2002090901);
  996. end;
  997. end
  998. else
  999. begin
  1000. { 32 -> 32 bit move implies zero extension (sign extensions have
  1001. been handled above) -> also use for 32 <-> 64 bit moves }
  1002. if not(fromsize in [OS_64,OS_S64]) or
  1003. not(tosize in [OS_64,OS_S64]) then
  1004. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  1005. else
  1006. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  1007. list.Concat(instr);
  1008. { Notify the register allocator that we have written a move instruction so
  1009. it can try to eliminate it. }
  1010. add_move_instruction(instr);
  1011. end;
  1012. end;
  1013. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  1014. var
  1015. href: treference;
  1016. so: tshifterop;
  1017. op: tasmop;
  1018. begin
  1019. op:=A_LDR;
  1020. href:=ref;
  1021. { simplify as if we're going to perform a regular 64 bit load, using
  1022. "r" as the new base register if possible/necessary }
  1023. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  1024. { load literal? }
  1025. if assigned(href.symbol) then
  1026. begin
  1027. if (href.base<>NR_NO) or
  1028. (href.index<>NR_NO) or
  1029. not assigned(href.symboldata) then
  1030. internalerror(2014110912);
  1031. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  1032. end
  1033. else
  1034. begin
  1035. if href.index<>NR_NO then
  1036. begin
  1037. if href.shiftmode<>SM_None then
  1038. begin
  1039. { "add" supports a supperset of the shift modes supported by
  1040. load/store instructions }
  1041. shifterop_reset(so);
  1042. so.shiftmode:=href.shiftmode;
  1043. so.shiftimm:=href.shiftimm;
  1044. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  1045. end
  1046. else
  1047. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  1048. end
  1049. else if href.offset<>0 then
  1050. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  1051. else
  1052. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  1053. end;
  1054. end;
  1055. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  1056. begin
  1057. internalerror(2014122107)
  1058. end;
  1059. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  1060. begin
  1061. internalerror(2014122108)
  1062. end;
  1063. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1064. begin
  1065. internalerror(2014122109)
  1066. end;
  1067. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  1068. var
  1069. instr: taicpu;
  1070. begin
  1071. if assigned(shuffle) and
  1072. not shufflescalar(shuffle) then
  1073. internalerror(2014122104);
  1074. if fromsize=tosize then
  1075. begin
  1076. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  1077. { Notify the register allocator that we have written a move
  1078. instruction so it can try to eliminate it. }
  1079. add_move_instruction(instr);
  1080. { FMOV cannot generate a floating point exception }
  1081. end
  1082. else
  1083. begin
  1084. if (reg_cgsize(reg1)<>fromsize) or
  1085. (reg_cgsize(reg2)<>tosize) then
  1086. internalerror(2014110913);
  1087. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  1088. maybe_check_for_fpu_exception(list);
  1089. end;
  1090. list.Concat(instr);
  1091. end;
  1092. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  1093. var
  1094. tmpreg: tregister;
  1095. begin
  1096. if assigned(shuffle) and
  1097. not shufflescalar(shuffle) then
  1098. internalerror(2014122105);
  1099. tmpreg:=NR_NO;
  1100. if (fromsize<>tosize) then
  1101. begin
  1102. tmpreg:=reg;
  1103. reg:=getmmregister(list,fromsize);
  1104. end;
  1105. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  1106. if (fromsize<>tosize) then
  1107. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1108. end;
  1109. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  1110. var
  1111. tmpreg: tregister;
  1112. begin
  1113. if assigned(shuffle) and
  1114. not shufflescalar(shuffle) then
  1115. internalerror(2014122106);
  1116. if (fromsize<>tosize) then
  1117. begin
  1118. tmpreg:=getmmregister(list,tosize);
  1119. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1120. reg:=tmpreg;
  1121. end;
  1122. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  1123. end;
  1124. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  1125. begin
  1126. if not shufflescalar(shuffle) then
  1127. internalerror(2014122801);
  1128. if not(tcgsize2size[fromsize] in [4,8]) or
  1129. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  1130. internalerror(2014122803);
  1131. list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
  1132. end;
  1133. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  1134. var
  1135. r : tregister;
  1136. begin
  1137. if not shufflescalar(shuffle) then
  1138. internalerror(2014122802);
  1139. if not(tcgsize2size[fromsize] in [4,8]) or
  1140. (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
  1141. internalerror(2014122804);
  1142. if tcgsize2size[fromsize]<tcgsize2size[tosize] then
  1143. r:=makeregsize(intreg,fromsize)
  1144. else
  1145. r:=intreg;
  1146. list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
  1147. end;
  1148. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  1149. begin
  1150. case op of
  1151. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  1152. OP_XOR:
  1153. begin
  1154. if (src<>dst) or
  1155. (reg_cgsize(src)<>size) or
  1156. assigned(shuffle) then
  1157. internalerror(2015011401);
  1158. case size of
  1159. OS_F32,
  1160. OS_F64:
  1161. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  1162. else
  1163. internalerror(2015011402);
  1164. end;
  1165. end
  1166. else
  1167. internalerror(2015011403);
  1168. end;
  1169. end;
  1170. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  1171. var
  1172. bitsize: longint;
  1173. begin
  1174. if srcsize in [OS_64,OS_S64] then
  1175. begin
  1176. bitsize:=64;
  1177. end
  1178. else
  1179. begin
  1180. bitsize:=32;
  1181. end;
  1182. { source is 0 -> dst will have to become 255 }
  1183. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1184. if reverse then
  1185. begin
  1186. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1187. { xor 31/63 is the same as setting the lower 5/6 bits to
  1188. "31/63-(lower 5/6 bits of dst)" }
  1189. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1190. end
  1191. else
  1192. begin
  1193. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1194. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1195. end;
  1196. { set dst to -1 if src was 0 }
  1197. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1198. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1199. branch-free version? All of mine are 3...) }
  1200. list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
  1201. end;
  1202. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1203. var
  1204. href: treference;
  1205. hreg1, hreg2, tmpreg: tregister;
  1206. begin
  1207. if fromsize in [OS_64,OS_S64] then
  1208. begin
  1209. { split into two 32 bit stores }
  1210. hreg1:=getintregister(list,OS_32);
  1211. hreg2:=getintregister(list,OS_32);
  1212. a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
  1213. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1214. if target_info.endian=endian_big then
  1215. begin
  1216. tmpreg:=hreg1;
  1217. hreg1:=hreg2;
  1218. hreg2:=tmpreg;
  1219. end;
  1220. { can we use STP? }
  1221. if (ref.alignment=4) and
  1222. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1223. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1224. else
  1225. begin
  1226. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1227. href:=ref;
  1228. inc(href.offset,4);
  1229. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1230. end;
  1231. end
  1232. else
  1233. inherited;
  1234. end;
  1235. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1236. const
  1237. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1238. begin
  1239. if (op in overflowops) and
  1240. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1241. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1242. end;
  1243. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1244. begin
  1245. optimize_op_const(size,op,a);
  1246. case op of
  1247. OP_NONE:
  1248. exit;
  1249. OP_MOVE:
  1250. a_load_const_reg(list,size,a,reg);
  1251. OP_NEG,OP_NOT:
  1252. internalerror(200306011);
  1253. else
  1254. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1255. end;
  1256. end;
  1257. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1258. begin
  1259. Case op of
  1260. OP_NEG,
  1261. OP_NOT:
  1262. begin
  1263. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1264. maybeadjustresult(list,op,size,dst);
  1265. end
  1266. else
  1267. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1268. end;
  1269. end;
  1270. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1271. var
  1272. l: tlocation;
  1273. begin
  1274. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1275. end;
  1276. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1277. var
  1278. hreg: tregister;
  1279. begin
  1280. { no ROLV opcode... }
  1281. if op=OP_ROL then
  1282. begin
  1283. case size of
  1284. OS_32,OS_S32,
  1285. OS_64,OS_S64:
  1286. begin
  1287. hreg:=getintregister(list,size);
  1288. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1289. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1290. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1291. exit;
  1292. end;
  1293. else
  1294. internalerror(2014111005);
  1295. end;
  1296. end
  1297. else if (op=OP_ROR) and
  1298. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1299. internalerror(2014111006);
  1300. if TOpCG2AsmOpReg[op]=A_NONE then
  1301. internalerror(2014111007);
  1302. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1303. maybeadjustresult(list,op,size,dst);
  1304. end;
  1305. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1306. var
  1307. shiftcountmask: longint;
  1308. constreg: tregister;
  1309. begin
  1310. { add/sub instructions have only positive immediate operands }
  1311. if (op in [OP_ADD,OP_SUB]) and
  1312. (a<0) then
  1313. begin
  1314. if op=OP_ADD then
  1315. op:=op_SUB
  1316. else
  1317. op:=OP_ADD;
  1318. { avoid range/overflow error in case a = low(tcgint) }
  1319. {$push}{$r-}{$q-}
  1320. a:=-a;
  1321. {$pop}
  1322. end;
  1323. ovloc.loc:=LOC_VOID;
  1324. optimize_op_const(size,op,a);
  1325. case op of
  1326. OP_NONE:
  1327. begin
  1328. a_load_reg_reg(list,size,size,src,dst);
  1329. exit;
  1330. end;
  1331. OP_MOVE:
  1332. begin
  1333. a_load_const_reg(list,size,a,dst);
  1334. exit;
  1335. end;
  1336. else
  1337. ;
  1338. end;
  1339. case op of
  1340. OP_ADD,
  1341. OP_SUB:
  1342. begin
  1343. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1344. { on a 64 bit target, overflows with smaller data types
  1345. are handled via range errors }
  1346. if setflags and
  1347. (size in [OS_64,OS_S64]) then
  1348. begin
  1349. location_reset(ovloc,LOC_FLAGS,OS_8);
  1350. if size=OS_64 then
  1351. if op=OP_ADD then
  1352. ovloc.resflags:=F_CS
  1353. else
  1354. ovloc.resflags:=F_CC
  1355. else
  1356. ovloc.resflags:=F_VS;
  1357. end;
  1358. end;
  1359. OP_OR,
  1360. OP_AND,
  1361. OP_XOR:
  1362. begin
  1363. if not(size in [OS_64,OS_S64]) then
  1364. a:=cardinal(a);
  1365. if is_shifter_const(a,size) then
  1366. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1367. else
  1368. begin
  1369. constreg:=getintregister(list,size);
  1370. a_load_const_reg(list,size,a,constreg);
  1371. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1372. end;
  1373. end;
  1374. OP_SHL,
  1375. OP_SHR,
  1376. OP_SAR:
  1377. begin
  1378. if size in [OS_64,OS_S64] then
  1379. shiftcountmask:=63
  1380. else
  1381. shiftcountmask:=31;
  1382. if (a and shiftcountmask)<>0 Then
  1383. list.concat(taicpu.op_reg_reg_const(
  1384. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1385. else
  1386. a_load_reg_reg(list,size,size,src,dst);
  1387. if (a and not(tcgint(shiftcountmask)))<>0 then
  1388. internalError(2014112101);
  1389. end;
  1390. OP_ROL,
  1391. OP_ROR:
  1392. begin
  1393. case size of
  1394. OS_32,OS_S32:
  1395. if (a and not(tcgint(31)))<>0 then
  1396. internalError(2014112102);
  1397. OS_64,OS_S64:
  1398. if (a and not(tcgint(63)))<>0 then
  1399. internalError(2014112103);
  1400. else
  1401. internalError(2014112104);
  1402. end;
  1403. { there's only a ror opcode }
  1404. if op=OP_ROL then
  1405. a:=(tcgsize2size[size]*8)-a;
  1406. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1407. end;
  1408. OP_MUL,
  1409. OP_IMUL,
  1410. OP_DIV,
  1411. OP_IDIV:
  1412. begin
  1413. constreg:=getintregister(list,size);
  1414. a_load_const_reg(list,size,a,constreg);
  1415. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1416. end;
  1417. else
  1418. internalerror(2014111403);
  1419. end;
  1420. maybeadjustresult(list,op,size,dst);
  1421. end;
  1422. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1423. var
  1424. tmpreg1, tmpreg2: tregister;
  1425. begin
  1426. ovloc.loc:=LOC_VOID;
  1427. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1428. if setflags and
  1429. (size in [OS_64,OS_S64]) then
  1430. begin
  1431. case op of
  1432. OP_ADD,
  1433. OP_SUB:
  1434. begin
  1435. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1436. ovloc.loc:=LOC_FLAGS;
  1437. if size=OS_64 then
  1438. if op=OP_ADD then
  1439. ovloc.resflags:=F_CS
  1440. else
  1441. ovloc.resflags:=F_CC
  1442. else
  1443. ovloc.resflags:=F_VS;
  1444. { finished }
  1445. exit;
  1446. end;
  1447. OP_MUL:
  1448. begin
  1449. { check whether the upper 64 bit of the 128 bit product is 0 }
  1450. tmpreg1:=getintregister(list,OS_64);
  1451. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1452. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1453. ovloc.loc:=LOC_FLAGS;
  1454. ovloc.resflags:=F_NE;
  1455. { still have to perform the actual multiplication }
  1456. end;
  1457. OP_IMUL:
  1458. begin
  1459. { check whether the upper 64 bits of the 128 bit multiplication
  1460. result have the same value as the replicated sign bit of the
  1461. lower 64 bits }
  1462. tmpreg1:=getintregister(list,OS_64);
  1463. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1464. { calculate lower 64 bits (afterwards, because dst may be
  1465. equal to src1 or src2) }
  1466. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1467. { replicate sign bit }
  1468. tmpreg2:=getintregister(list,OS_64);
  1469. a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
  1470. list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
  1471. ovloc.loc:=LOC_FLAGS;
  1472. ovloc.resflags:=F_NE;
  1473. { finished }
  1474. exit;
  1475. end;
  1476. OP_IDIV,
  1477. OP_DIV:
  1478. begin
  1479. { not handled here, needs div-by-zero check (dividing by zero
  1480. just gives a 0 result on aarch64), and low(int64) div -1
  1481. check for overflow) }
  1482. internalerror(2014122101);
  1483. end;
  1484. else
  1485. internalerror(2019050936);
  1486. end;
  1487. end;
  1488. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1489. end;
  1490. {*************** compare instructructions ****************}
  1491. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1492. var
  1493. op: tasmop;
  1494. begin
  1495. if a>=0 then
  1496. op:=A_CMP
  1497. else
  1498. op:=A_CMN;
  1499. { avoid range/overflow error in case a=low(tcgint) }
  1500. {$push}{$r-}{$q-}
  1501. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1502. {$pop}
  1503. a_jmp_cond(list,cmp_op,l);
  1504. end;
  1505. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1506. begin
  1507. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1508. a_jmp_cond(list,cmp_op,l);
  1509. end;
  1510. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1511. var
  1512. ai: taicpu;
  1513. begin
  1514. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
  1515. ai.is_jmp:=true;
  1516. list.Concat(ai);
  1517. end;
  1518. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1519. var
  1520. ai: taicpu;
  1521. begin
  1522. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1523. ai.is_jmp:=true;
  1524. list.Concat(ai);
  1525. end;
  1526. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1527. var
  1528. ai: taicpu;
  1529. begin
  1530. ai:=TAiCpu.op_sym(A_B,l);
  1531. ai.is_jmp:=true;
  1532. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1533. list.Concat(ai);
  1534. end;
  1535. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1536. var
  1537. ai : taicpu;
  1538. begin
  1539. ai:=Taicpu.op_sym(A_B,l);
  1540. ai.is_jmp:=true;
  1541. ai.SetCondition(flags_to_cond(f));
  1542. list.Concat(ai);
  1543. end;
  1544. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1545. begin
  1546. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1547. end;
  1548. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1549. begin
  1550. { we need an explicit overflow location, because there are many
  1551. possibilities (not just the overflow flag, which is only used for
  1552. signed add/sub) }
  1553. internalerror(2014112303);
  1554. end;
  1555. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1556. var
  1557. hl : tasmlabel;
  1558. hflags : tresflags;
  1559. begin
  1560. if not(cs_check_overflow in current_settings.localswitches) then
  1561. exit;
  1562. current_asmdata.getjumplabel(hl);
  1563. case ovloc.loc of
  1564. LOC_FLAGS:
  1565. begin
  1566. hflags:=ovloc.resflags;
  1567. inverse_flags(hflags);
  1568. cg.a_jmp_flags(list,hflags,hl);
  1569. end;
  1570. else
  1571. internalerror(2014112304);
  1572. end;
  1573. a_call_name(list,'FPC_OVERFLOW',false);
  1574. a_label(list,hl);
  1575. end;
  1576. { *********** entry/exit code and address loading ************ }
  1577. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1578. var
  1579. ref: treference;
  1580. sr: tsuperregister;
  1581. pairreg: tregister;
  1582. begin
  1583. result:=0;
  1584. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1585. ref.addressmode:=AM_PREINDEXED;
  1586. pairreg:=NR_NO;
  1587. { store all used registers pairwise }
  1588. for sr:=lowsr to highsr do
  1589. if sr in rg[rt].used_in_proc then
  1590. if pairreg=NR_NO then
  1591. pairreg:=newreg(rt,sr,sub)
  1592. else
  1593. begin
  1594. inc(result,16);
  1595. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1596. pairreg:=NR_NO
  1597. end;
  1598. { one left -> store twice (stack must be 16 bytes aligned) }
  1599. if pairreg<>NR_NO then
  1600. begin
  1601. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1602. inc(result,16);
  1603. end;
  1604. end;
  1605. procedure FixupOffsets(p:TObject;arg:pointer);
  1606. var
  1607. sym: tabstractnormalvarsym absolute p;
  1608. begin
  1609. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1610. (sym.localloc.loc=LOC_REFERENCE) and
  1611. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1612. begin
  1613. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1614. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1615. end;
  1616. end;
  1617. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1618. var
  1619. ref: treference;
  1620. totalstackframesize: longint;
  1621. begin
  1622. if nostackframe then
  1623. exit;
  1624. { stack pointer has to be aligned to 16 bytes at all times }
  1625. localsize:=align(localsize,16);
  1626. { save stack pointer and return address }
  1627. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1628. ref.addressmode:=AM_PREINDEXED;
  1629. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1630. { initialise frame pointer }
  1631. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1632. totalstackframesize:=localsize;
  1633. { save modified integer registers }
  1634. inc(totalstackframesize,
  1635. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1636. { only the lower 64 bits of the modified vector registers need to be
  1637. saved; if the caller needs the upper 64 bits, it has to save them
  1638. itself }
  1639. inc(totalstackframesize,
  1640. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1641. { allocate stack space }
  1642. if localsize<>0 then
  1643. begin
  1644. localsize:=align(localsize,16);
  1645. current_procinfo.final_localsize:=localsize;
  1646. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1647. end;
  1648. { By default, we use the frame pointer to access parameters passed via
  1649. the stack and the stack pointer to address local variables and temps
  1650. because
  1651. a) we can use bigger positive than negative offsets (so accessing
  1652. locals via negative offsets from the frame pointer would be less
  1653. efficient)
  1654. b) we don't know the local size while generating the code, so
  1655. accessing the parameters via the stack pointer is not possible
  1656. without copying them
  1657. The problem with this is the get_frame() intrinsic:
  1658. a) it must return the same value as what we pass as parentfp
  1659. parameter, since that's how it's used in the TP-style objects unit
  1660. b) its return value must usable to access all local data from a
  1661. routine (locals and parameters), since it's all the nested
  1662. routines have access to
  1663. c) its return value must be usable to construct a backtrace, as it's
  1664. also used by the exception handling routines
  1665. The solution we use here, based on something similar that's done in
  1666. the MIPS port, is to generate all accesses to locals in the routine
  1667. itself SP-relative, and then after the code is generated and the local
  1668. size is known (namely, here), we change all SP-relative variables/
  1669. parameters into FP-relative ones. This means that they'll be accessed
  1670. less efficiently from nested routines, but those accesses are indirect
  1671. anyway and at least this way they can be accessed at all
  1672. }
  1673. if current_procinfo.has_nestedprocs then
  1674. begin
  1675. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1676. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1677. end;
  1678. end;
  1679. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1680. begin
  1681. { nothing to do on Darwin or Linux }
  1682. end;
  1683. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1684. begin
  1685. { done in g_proc_exit }
  1686. end;
  1687. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1688. var
  1689. ref: treference;
  1690. sr, highestsetsr: tsuperregister;
  1691. pairreg: tregister;
  1692. regcount: longint;
  1693. begin
  1694. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1695. ref.addressmode:=AM_POSTINDEXED;
  1696. { highest reg stored twice? }
  1697. regcount:=0;
  1698. highestsetsr:=RS_NO;
  1699. for sr:=lowsr to highsr do
  1700. if sr in rg[rt].used_in_proc then
  1701. begin
  1702. inc(regcount);
  1703. highestsetsr:=sr;
  1704. end;
  1705. if odd(regcount) then
  1706. begin
  1707. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1708. highestsetsr:=pred(highestsetsr);
  1709. end;
  1710. { load all (other) used registers pairwise }
  1711. pairreg:=NR_NO;
  1712. for sr:=highestsetsr downto lowsr do
  1713. if sr in rg[rt].used_in_proc then
  1714. if pairreg=NR_NO then
  1715. pairreg:=newreg(rt,sr,sub)
  1716. else
  1717. begin
  1718. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1719. pairreg:=NR_NO
  1720. end;
  1721. { There can't be any register left }
  1722. if pairreg<>NR_NO then
  1723. internalerror(2014112602);
  1724. end;
  1725. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1726. var
  1727. ref: treference;
  1728. regsstored: boolean;
  1729. sr: tsuperregister;
  1730. begin
  1731. if not(nostackframe) and
  1732. { we do not need an exit stack frame when we never return
  1733. * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
  1734. * the entry stack frame must be normally generated because the subroutine could be still left by
  1735. an exception and then the unwinding code might need to restore the registers stored by the entry code
  1736. }
  1737. not(po_noreturn in current_procinfo.procdef.procoptions) then
  1738. begin
  1739. { if no registers have been stored, we don't have to subtract the
  1740. allocated temp space from the stack pointer }
  1741. regsstored:=false;
  1742. for sr:=RS_X19 to RS_X28 do
  1743. if sr in rg[R_INTREGISTER].used_in_proc then
  1744. begin
  1745. regsstored:=true;
  1746. break;
  1747. end;
  1748. if not regsstored then
  1749. for sr:=RS_D8 to RS_D15 do
  1750. if sr in rg[R_MMREGISTER].used_in_proc then
  1751. begin
  1752. regsstored:=true;
  1753. break;
  1754. end;
  1755. { restore registers (and stack pointer) }
  1756. if regsstored then
  1757. begin
  1758. if current_procinfo.final_localsize<>0 then
  1759. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1760. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1761. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1762. end
  1763. else if current_procinfo.final_localsize<>0 then
  1764. { restore stack pointer }
  1765. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1766. { restore framepointer and return address }
  1767. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1768. ref.addressmode:=AM_POSTINDEXED;
  1769. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1770. end;
  1771. { return }
  1772. list.concat(taicpu.op_none(A_RET));
  1773. end;
  1774. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1775. begin
  1776. { done in g_proc_entry }
  1777. end;
  1778. { ************* concatcopy ************ }
  1779. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1780. var
  1781. paraloc1,paraloc2,paraloc3 : TCGPara;
  1782. pd : tprocdef;
  1783. begin
  1784. pd:=search_system_proc('MOVE');
  1785. paraloc1.init;
  1786. paraloc2.init;
  1787. paraloc3.init;
  1788. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  1789. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  1790. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  1791. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1792. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1793. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1794. paramanager.freecgpara(list,paraloc3);
  1795. paramanager.freecgpara(list,paraloc2);
  1796. paramanager.freecgpara(list,paraloc1);
  1797. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1798. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1799. a_call_name(list,'FPC_MOVE',false);
  1800. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1801. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1802. paraloc3.done;
  1803. paraloc2.done;
  1804. paraloc1.done;
  1805. end;
  1806. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  1807. var
  1808. sourcebasereplaced, destbasereplaced: boolean;
  1809. { get optimal memory operation to use for loading/storing data
  1810. in an unrolled loop }
  1811. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  1812. begin
  1813. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  1814. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  1815. begin
  1816. memop:=unscaledop;
  1817. needsimplify:=true;
  1818. end
  1819. else if (unscaledop<>A_NONE) and
  1820. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  1821. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  1822. begin
  1823. memop:=unscaledop;
  1824. needsimplify:=false;
  1825. end
  1826. else
  1827. begin
  1828. memop:=scaledop;
  1829. needsimplify:=true;
  1830. end;
  1831. end;
  1832. { adjust the offset and/or addressing mode after a load/store so it's
  1833. correct for the next one of the same size }
  1834. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  1835. begin
  1836. case ref.addressmode of
  1837. AM_OFFSET:
  1838. inc(ref.offset,oplen);
  1839. AM_POSTINDEXED:
  1840. { base register updated by instruction, next offset can remain
  1841. the same }
  1842. ;
  1843. AM_PREINDEXED:
  1844. begin
  1845. { base register updated by instruction -> next instruction can
  1846. use post-indexing with offset = sizeof(operation) }
  1847. ref.offset:=0;
  1848. ref.addressmode:=AM_OFFSET;
  1849. end;
  1850. end;
  1851. end;
  1852. { generate a load/store and adjust the reference offset to the next
  1853. memory location if necessary }
  1854. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1855. begin
  1856. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  1857. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  1858. end;
  1859. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  1860. the next memory location if necessary }
  1861. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1862. begin
  1863. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  1864. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  1865. end;
  1866. { turn a reference into a pre- or post-indexed reference for use in a
  1867. load/store of a particular size }
  1868. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  1869. var
  1870. tmpreg: tregister;
  1871. scaledoffset: longint;
  1872. orgaddressmode: taddressmode;
  1873. begin
  1874. scaledoffset:=tcgsize2size[opsize];
  1875. if scaledop in [A_LDP,A_STP] then
  1876. scaledoffset:=scaledoffset*2;
  1877. { can we use the reference as post-indexed without changes? }
  1878. if forcepostindexing then
  1879. begin
  1880. orgaddressmode:=ref.addressmode;
  1881. ref.addressmode:=AM_POSTINDEXED;
  1882. if (orgaddressmode=AM_POSTINDEXED) or
  1883. ((ref.offset=0) and
  1884. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  1885. begin
  1886. { just change the post-indexed offset to the access size }
  1887. ref.offset:=scaledoffset;
  1888. { and replace the base register if that didn't happen yet
  1889. (could be sp or a regvar) }
  1890. if not basereplaced then
  1891. begin
  1892. tmpreg:=getaddressregister(list);
  1893. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1894. ref.base:=tmpreg;
  1895. basereplaced:=true;
  1896. end;
  1897. exit;
  1898. end;
  1899. ref.addressmode:=orgaddressmode;
  1900. end;
  1901. {$ifdef dummy}
  1902. This could in theory be useful in case you have a concatcopy from
  1903. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  1904. very unlikely. Disabled because it still needs fixes, as it
  1905. also generates pre-indexed loads right now at the very end for the
  1906. left-over gencopies
  1907. { can we turn it into a pre-indexed reference for free? (after the
  1908. first operation, it will be turned into an offset one) }
  1909. if not forcepostindexing and
  1910. (ref.offset<>0) then
  1911. begin
  1912. orgaddressmode:=ref.addressmode;
  1913. ref.addressmode:=AM_PREINDEXED;
  1914. tmpreg:=ref.base;
  1915. if not basereplaced and
  1916. (ref.base=tmpreg) then
  1917. begin
  1918. tmpreg:=getaddressregister(list);
  1919. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1920. ref.base:=tmpreg;
  1921. basereplaced:=true;
  1922. end;
  1923. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  1924. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1925. exit;
  1926. end;
  1927. {$endif dummy}
  1928. if not forcepostindexing then
  1929. begin
  1930. ref.addressmode:=AM_OFFSET;
  1931. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1932. { this may still cause problems if the final offset is no longer
  1933. a simple ref; it's a bit complicated to pass all information
  1934. through at all places and check that here, so play safe: we
  1935. currently never generate unrolled copies for more than 64
  1936. bytes (32 with non-double-register copies) }
  1937. if ref.index=NR_NO then
  1938. begin
  1939. if ((scaledop in [A_LDP,A_STP]) and
  1940. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  1941. ((scaledop in [A_LDUR,A_STUR]) and
  1942. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  1943. ((scaledop in [A_LDR,A_STR]) and
  1944. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  1945. exit;
  1946. end;
  1947. end;
  1948. tmpreg:=getaddressregister(list);
  1949. a_loadaddr_ref_reg(list,ref,tmpreg);
  1950. basereplaced:=true;
  1951. if forcepostindexing then
  1952. begin
  1953. reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
  1954. ref.addressmode:=AM_POSTINDEXED;
  1955. end
  1956. else
  1957. begin
  1958. reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  1959. ref.addressmode:=AM_OFFSET;
  1960. end
  1961. end;
  1962. { prepare a reference for use by gencopy. This is done both after the
  1963. unrolled and regular copy loop -> get rid of post-indexing mode, make
  1964. sure ref is valid }
  1965. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  1966. var
  1967. simplify: boolean;
  1968. begin
  1969. if ref.addressmode=AM_POSTINDEXED then
  1970. ref.offset:=tcgsize2size[opsize];
  1971. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  1972. if simplify then
  1973. begin
  1974. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  1975. op:=scaledop;
  1976. end;
  1977. end;
  1978. { generate a copy from source to dest of size opsize/postfix }
  1979. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  1980. var
  1981. reg: tregister;
  1982. loadop, storeop: tasmop;
  1983. begin
  1984. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  1985. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  1986. reg:=getintregister(list,opsize);
  1987. genloadstore(list,loadop,reg,source,postfix,opsize);
  1988. genloadstore(list,storeop,reg,dest,postfix,opsize);
  1989. end;
  1990. { copy the leftovers after an unrolled or regular copy loop }
  1991. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  1992. begin
  1993. { stop post-indexing if we did so in the loop, since in that case all
  1994. offsets definitely can be represented now }
  1995. if source.addressmode=AM_POSTINDEXED then
  1996. begin
  1997. source.addressmode:=AM_OFFSET;
  1998. source.offset:=0;
  1999. end;
  2000. if dest.addressmode=AM_POSTINDEXED then
  2001. begin
  2002. dest.addressmode:=AM_OFFSET;
  2003. dest.offset:=0;
  2004. end;
  2005. { transfer the leftovers }
  2006. if len>=8 then
  2007. begin
  2008. dec(len,8);
  2009. gencopy(list,source,dest,PF_NONE,OS_64);
  2010. end;
  2011. if len>=4 then
  2012. begin
  2013. dec(len,4);
  2014. gencopy(list,source,dest,PF_NONE,OS_32);
  2015. end;
  2016. if len>=2 then
  2017. begin
  2018. dec(len,2);
  2019. gencopy(list,source,dest,PF_H,OS_16);
  2020. end;
  2021. if len>=1 then
  2022. begin
  2023. dec(len);
  2024. gencopy(list,source,dest,PF_B,OS_8);
  2025. end;
  2026. end;
  2027. const
  2028. { load_length + loop dec + cbnz }
  2029. loopoverhead=12;
  2030. { loop overhead + load + store }
  2031. totallooplen=loopoverhead + 8;
  2032. var
  2033. totalalign: longint;
  2034. maxlenunrolled: tcgint;
  2035. loadop, storeop: tasmop;
  2036. opsize: tcgsize;
  2037. postfix: toppostfix;
  2038. tmpsource, tmpdest: treference;
  2039. scaledstoreop, unscaledstoreop,
  2040. scaledloadop, unscaledloadop: tasmop;
  2041. regs: array[1..8] of tregister;
  2042. countreg: tregister;
  2043. i, regcount: longint;
  2044. hl: tasmlabel;
  2045. simplifysource, simplifydest: boolean;
  2046. begin
  2047. if len=0 then
  2048. exit;
  2049. sourcebasereplaced:=false;
  2050. destbasereplaced:=false;
  2051. { maximum common alignment }
  2052. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  2053. { use a simple load/store? }
  2054. if (len in [1,2,4,8]) and
  2055. ((totalalign>=(len div 2)) or
  2056. (source.alignment=len) or
  2057. (dest.alignment=len)) then
  2058. begin
  2059. opsize:=int_cgsize(len);
  2060. a_load_ref_ref(list,opsize,opsize,source,dest);
  2061. exit;
  2062. end;
  2063. { alignment > length is not useful, and would break some checks below }
  2064. while totalalign>len do
  2065. totalalign:=totalalign div 2;
  2066. { operation sizes to use based on common alignment }
  2067. case totalalign of
  2068. 1:
  2069. begin
  2070. postfix:=PF_B;
  2071. opsize:=OS_8;
  2072. end;
  2073. 2:
  2074. begin
  2075. postfix:=PF_H;
  2076. opsize:=OS_16;
  2077. end;
  2078. 4:
  2079. begin
  2080. postfix:=PF_None;
  2081. opsize:=OS_32;
  2082. end
  2083. else
  2084. begin
  2085. totalalign:=8;
  2086. postfix:=PF_None;
  2087. opsize:=OS_64;
  2088. end;
  2089. end;
  2090. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  2091. maxlenunrolled:=min(totalalign,8)*4;
  2092. { ldp/stp -> 2 registers per instruction }
  2093. if (totalalign>=4) and
  2094. (len>=totalalign*2) then
  2095. begin
  2096. maxlenunrolled:=maxlenunrolled*2;
  2097. scaledstoreop:=A_STP;
  2098. scaledloadop:=A_LDP;
  2099. unscaledstoreop:=A_NONE;
  2100. unscaledloadop:=A_NONE;
  2101. end
  2102. else
  2103. begin
  2104. scaledstoreop:=A_STR;
  2105. scaledloadop:=A_LDR;
  2106. unscaledstoreop:=A_STUR;
  2107. unscaledloadop:=A_LDUR;
  2108. end;
  2109. { we only need 4 instructions extra to call FPC_MOVE }
  2110. if cs_opt_size in current_settings.optimizerswitches then
  2111. maxlenunrolled:=maxlenunrolled div 2;
  2112. if (len>maxlenunrolled) and
  2113. (len>totalalign*8) then
  2114. begin
  2115. g_concatcopy_move(list,source,dest,len);
  2116. exit;
  2117. end;
  2118. simplifysource:=true;
  2119. simplifydest:=true;
  2120. tmpsource:=source;
  2121. tmpdest:=dest;
  2122. { can we directly encode all offsets in an unrolled loop? }
  2123. if len<=maxlenunrolled then
  2124. begin
  2125. {$ifdef extdebug}
  2126. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  2127. {$endif extdebug}
  2128. { the leftovers will be handled separately -> -(len mod opsize) }
  2129. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  2130. { additionally, the last regular load/store will be at
  2131. offset+len-opsize (if len-(len mod opsize)>len) }
  2132. if tmpsource.offset>source.offset then
  2133. dec(tmpsource.offset,tcgsize2size[opsize]);
  2134. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  2135. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  2136. if tmpdest.offset>dest.offset then
  2137. dec(tmpdest.offset,tcgsize2size[opsize]);
  2138. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  2139. tmpsource:=source;
  2140. tmpdest:=dest;
  2141. { if we can't directly encode all offsets, simplify }
  2142. if simplifysource then
  2143. begin
  2144. loadop:=scaledloadop;
  2145. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  2146. end;
  2147. if simplifydest then
  2148. begin
  2149. storeop:=scaledstoreop;
  2150. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  2151. end;
  2152. regcount:=len div tcgsize2size[opsize];
  2153. { in case we transfer two registers at a time, we copy an even
  2154. number of registers }
  2155. if loadop=A_LDP then
  2156. regcount:=regcount and not(1);
  2157. { initialise for dfa }
  2158. regs[low(regs)]:=NR_NO;
  2159. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  2160. for i:=1 to regcount do
  2161. regs[i]:=getintregister(list,opsize);
  2162. if loadop=A_LDP then
  2163. begin
  2164. { load registers }
  2165. for i:=1 to (regcount div 2) do
  2166. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  2167. { store registers }
  2168. for i:=1 to (regcount div 2) do
  2169. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  2170. end
  2171. else
  2172. begin
  2173. for i:=1 to regcount do
  2174. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  2175. for i:=1 to regcount do
  2176. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  2177. end;
  2178. { leftover }
  2179. len:=len-regcount*tcgsize2size[opsize];
  2180. {$ifdef extdebug}
  2181. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2182. {$endif extdebug}
  2183. end
  2184. else
  2185. begin
  2186. {$ifdef extdebug}
  2187. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2188. {$endif extdebug}
  2189. { regular loop -> definitely use post-indexing }
  2190. loadop:=scaledloadop;
  2191. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2192. storeop:=scaledstoreop;
  2193. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2194. current_asmdata.getjumplabel(hl);
  2195. countreg:=getintregister(list,OS_32);
  2196. if loadop=A_LDP then
  2197. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
  2198. else
  2199. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2200. a_label(list,hl);
  2201. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2202. if loadop=A_LDP then
  2203. begin
  2204. regs[1]:=getintregister(list,opsize);
  2205. regs[2]:=getintregister(list,opsize);
  2206. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2207. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2208. end
  2209. else
  2210. begin
  2211. regs[1]:=getintregister(list,opsize);
  2212. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2213. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2214. end;
  2215. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2216. len:=len mod tcgsize2size[opsize];
  2217. end;
  2218. gencopyleftovers(list,tmpsource,tmpdest,len);
  2219. end;
  2220. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2221. begin
  2222. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2223. InternalError(2013020102);
  2224. end;
  2225. procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  2226. var
  2227. r : TRegister;
  2228. ai: taicpu;
  2229. l1,l2: TAsmLabel;
  2230. begin
  2231. { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
  2232. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  2233. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  2234. begin
  2235. r:=getintregister(list,OS_INT);
  2236. list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
  2237. list.concat(taicpu.op_reg_const(A_TST,r,$1f));
  2238. current_asmdata.getjumplabel(l1);
  2239. current_asmdata.getjumplabel(l2);
  2240. ai:=taicpu.op_sym(A_B,l1);
  2241. ai.is_jmp:=true;
  2242. ai.condition:=C_NE;
  2243. list.concat(ai);
  2244. list.concat(taicpu.op_reg_const(A_TST,r,$80));
  2245. ai:=taicpu.op_sym(A_B,l2);
  2246. ai.is_jmp:=true;
  2247. ai.condition:=C_EQ;
  2248. list.concat(ai);
  2249. a_label(list,l1);
  2250. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2251. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  2252. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2253. a_label(list,l2);
  2254. if clear then
  2255. current_procinfo.FPUExceptionCheckNeeded:=false;
  2256. end;
  2257. end;
  2258. procedure tcgaarch64.g_profilecode(list : TAsmList);
  2259. begin
  2260. if target_info.system = system_aarch64_linux then
  2261. begin
  2262. list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
  2263. a_call_name(list,'_mcount',false);
  2264. end
  2265. else
  2266. internalerror(2020021901);
  2267. end;
  2268. {$endif dummy}
  2269. {$warnings off}
  2270. procedure create_codegen;
  2271. begin
  2272. cg:=tcgcpu.Create;
  2273. cg64:=tcg64fxtensa.Create;
  2274. end;
  2275. end.