cgcpu.pas 108 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for AArch64
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu;
  27. type
  28. { tcgaarch64 }
  29. tcgaarch64=class(tcg)
  30. protected
  31. { changes register size without adding register allocation info }
  32. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  33. public
  34. { simplifies "ref" so it can be used with "op". If "ref" can be used
  35. with a different load/Store operation that has the same meaning as the
  36. original one, "op" will be replaced with the alternative }
  37. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  38. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  39. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  40. procedure init_register_allocators;override;
  41. procedure done_register_allocators;override;
  42. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  43. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  44. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  45. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  46. { General purpose instructions }
  47. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  48. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  49. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  50. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  51. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  52. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  53. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  54. { move instructions }
  55. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  56. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  57. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  58. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  59. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  60. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  61. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  62. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  63. { fpu move instructions (not used, all floating point is vector unit-based) }
  64. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  65. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  66. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  67. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
  68. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
  69. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
  70. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  71. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
  72. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
  73. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  74. { comparison operations }
  75. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  76. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  77. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  78. procedure a_jmp_name(list: TAsmList; const s: string);override;
  79. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  80. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  81. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  82. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  83. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  84. procedure g_stackpointer_alloc(list: TAsmList; localsize: longint);override;
  85. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  86. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  87. procedure g_maybe_got_init(list: TAsmList); override;
  88. procedure g_restore_registers(list: TAsmList);override;
  89. procedure g_save_registers(list: TAsmList);override;
  90. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  91. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  92. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  93. procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
  94. procedure g_profilecode(list: TAsmList);override;
  95. private
  96. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  97. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  98. end;
  99. procedure create_codegen;
  100. const
  101. TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  102. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  103. );
  104. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  105. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  106. );
  107. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  108. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  109. );
  110. winstackpagesize = 4096;
  111. implementation
  112. uses
  113. globals,verbose,systems,cutils,cclasses,
  114. paramgr,fmodule,
  115. symtable,symsym,
  116. tgobj,
  117. ncgutil,
  118. procinfo,cpupi;
  119. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  120. var
  121. href: treference;
  122. so: tshifterop;
  123. accesssize: longint;
  124. begin
  125. if (ref.base=NR_NO) then
  126. begin
  127. if ref.shiftmode<>SM_None then
  128. internalerror(2014110701);
  129. ref.base:=ref.index;
  130. ref.index:=NR_NO;
  131. end;
  132. { no abitrary scale factor support (the generic code doesn't set it,
  133. AArch-specific code shouldn't either) }
  134. if not(ref.scalefactor in [0,1]) then
  135. internalerror(2014111002);
  136. case simple_ref_type(op,size,oppostfix,ref) of
  137. sr_simple:
  138. exit;
  139. sr_internal_illegal:
  140. internalerror(2014121702);
  141. sr_complex:
  142. { continue } ;
  143. end;
  144. if assigned(ref.symbol) then
  145. begin
  146. { internal "load symbol" instructions should already be valid }
  147. if assigned(ref.symboldata) or
  148. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  149. internalerror(2014110802);
  150. { no relative symbol support (needed) yet }
  151. if assigned(ref.relsymbol) then
  152. internalerror(2014111001);
  153. { loading a symbol address (whether it's in the GOT or not) consists
  154. of two parts: first load the page on which it is located, then
  155. either the offset in the page or load the value at that offset in
  156. the page. This final GOT-load can be relaxed by the linker in case
  157. the variable itself can be stored directly in the GOT }
  158. if (preferred_newbasereg=NR_NO) or
  159. (ref.base=preferred_newbasereg) or
  160. (ref.index=preferred_newbasereg) then
  161. preferred_newbasereg:=getaddressregister(list);
  162. { load the (GOT) page }
  163. reference_reset_symbol(href,ref.symbol,0,8,[]);
  164. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL,AT_DATA]) and
  165. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  166. ((ref.symbol.typ=AT_DATA) and
  167. (ref.symbol.bind=AB_LOCAL)) or
  168. (target_info.system=system_aarch64_win64) then
  169. href.refaddr:=addr_page
  170. else
  171. href.refaddr:=addr_gotpage;
  172. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  173. { load the GOT entry (= address of the variable) }
  174. reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
  175. href.symbol:=ref.symbol;
  176. { code symbols defined in the current compilation unit do not
  177. have to be accessed via the GOT }
  178. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL,AT_DATA]) and
  179. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  180. ((ref.symbol.typ=AT_DATA) and
  181. (ref.symbol.bind=AB_LOCAL)) or
  182. (target_info.system=system_aarch64_win64) then
  183. begin
  184. href.base:=NR_NO;
  185. href.refaddr:=addr_pageoffset;
  186. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  187. end
  188. else
  189. begin
  190. href.refaddr:=addr_gotpageoffset;
  191. { use a_load_ref_reg() rather than directly encoding the LDR,
  192. so that we'll check the validity of the reference }
  193. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  194. end;
  195. { set as new base register }
  196. if ref.base=NR_NO then
  197. ref.base:=preferred_newbasereg
  198. else if ref.index=NR_NO then
  199. ref.index:=preferred_newbasereg
  200. else
  201. begin
  202. { make sure it's valid in case ref.base is SP -> make it
  203. the second operand}
  204. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  205. ref.base:=preferred_newbasereg
  206. end;
  207. ref.symbol:=nil;
  208. end;
  209. { base & index }
  210. if (ref.base<>NR_NO) and
  211. (ref.index<>NR_NO) then
  212. begin
  213. case op of
  214. A_LDR, A_STR:
  215. begin
  216. if (ref.shiftmode=SM_None) and
  217. (ref.shiftimm<>0) then
  218. internalerror(2014110805);
  219. { wrong shift? (possible in case of something like
  220. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  221. the final load is a 1 byte -> can't use shift after all }
  222. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  223. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  224. (ref.offset<>0)) then
  225. begin
  226. if preferred_newbasereg=NR_NO then
  227. preferred_newbasereg:=getaddressregister(list);
  228. { "add" supports a superset of the shift modes supported by
  229. load/store instructions }
  230. shifterop_reset(so);
  231. so.shiftmode:=ref.shiftmode;
  232. so.shiftimm:=ref.shiftimm;
  233. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  234. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  235. { possibly still an invalid offset -> fall through }
  236. end
  237. else if ref.offset<>0 then
  238. begin
  239. if (preferred_newbasereg=NR_NO) or
  240. { we keep ref.index, so it must not be overwritten }
  241. (ref.index=preferred_newbasereg) then
  242. preferred_newbasereg:=getaddressregister(list);
  243. { add to the base and not to the index, because the index
  244. may be scaled; this works even if the base is SP }
  245. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  246. ref.offset:=0;
  247. ref.base:=preferred_newbasereg;
  248. { finished }
  249. exit;
  250. end
  251. else
  252. { valid -> exit }
  253. exit;
  254. end;
  255. { todo }
  256. A_LD1,A_LD2,A_LD3,A_LD4,
  257. A_ST1,A_ST2,A_ST3,A_ST4:
  258. internalerror(2014110702);
  259. { these don't support base+index }
  260. A_LDUR,A_STUR,
  261. A_LDP,A_STP:
  262. begin
  263. { these either don't support pre-/post-indexing, or don't
  264. support it with base+index }
  265. if ref.addressmode<>AM_OFFSET then
  266. internalerror(2014110911);
  267. if preferred_newbasereg=NR_NO then
  268. preferred_newbasereg:=getaddressregister(list);
  269. if ref.shiftmode<>SM_None then
  270. begin
  271. { "add" supports a superset of the shift modes supported by
  272. load/store instructions }
  273. shifterop_reset(so);
  274. so.shiftmode:=ref.shiftmode;
  275. so.shiftimm:=ref.shiftimm;
  276. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  277. end
  278. else
  279. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  280. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  281. { fall through to the handling of base + offset, since the
  282. offset may still be too big }
  283. end;
  284. else
  285. internalerror(2014110903);
  286. end;
  287. end;
  288. { base + offset }
  289. if ref.base<>NR_NO then
  290. begin
  291. { valid offset for LDUR/STUR -> use that }
  292. if (ref.addressmode=AM_OFFSET) and
  293. (op in [A_LDR,A_STR]) and
  294. (ref.offset>=-256) and
  295. (ref.offset<=255) then
  296. begin
  297. if op=A_LDR then
  298. op:=A_LDUR
  299. else
  300. op:=A_STUR
  301. end
  302. { if it's not a valid LDUR/STUR, use LDR/STR }
  303. else if (op in [A_LDUR,A_STUR]) and
  304. ((ref.offset<-256) or
  305. (ref.offset>255) or
  306. (ref.addressmode<>AM_OFFSET)) then
  307. begin
  308. if op=A_LDUR then
  309. op:=A_LDR
  310. else
  311. op:=A_STR
  312. end;
  313. case op of
  314. A_LDR,A_STR:
  315. begin
  316. case ref.addressmode of
  317. AM_PREINDEXED:
  318. begin
  319. { since the loaded/stored register cannot be the same
  320. as the base register, we can safely add the
  321. offset to the base if it doesn't fit}
  322. if (ref.offset<-256) or
  323. (ref.offset>255) then
  324. begin
  325. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  326. ref.offset:=0;
  327. end;
  328. end;
  329. AM_POSTINDEXED:
  330. begin
  331. { cannot emulate post-indexing if we have to fold the
  332. offset into the base register }
  333. if (ref.offset<-256) or
  334. (ref.offset>255) then
  335. internalerror(2014110909);
  336. { ok }
  337. end;
  338. AM_OFFSET:
  339. begin
  340. { unsupported offset -> fold into base register }
  341. accesssize:=1 shl tcgsizep2size[size];
  342. if (ref.offset<0) or
  343. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  344. ((ref.offset mod accesssize)<>0) then
  345. begin
  346. if preferred_newbasereg=NR_NO then
  347. preferred_newbasereg:=getaddressregister(list);
  348. { can we split the offset beween an
  349. "add/sub (imm12 shl 12)" and the load (also an
  350. imm12)?
  351. -- the offset from the load will always be added,
  352. that's why the lower bound has a smaller range
  353. than the upper bound; it must also be a multiple
  354. of the access size }
  355. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  356. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  357. ((ref.offset mod accesssize)=0) then
  358. begin
  359. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  360. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  361. end
  362. else
  363. begin
  364. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  365. ref.offset:=0;
  366. end;
  367. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  368. end;
  369. end
  370. end;
  371. end;
  372. A_LDP,A_STP:
  373. begin
  374. { unsupported offset -> fold into base register (these
  375. instructions support all addressmodes) }
  376. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  377. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  378. begin
  379. case ref.addressmode of
  380. AM_POSTINDEXED:
  381. { don't emulate post-indexing if we have to fold the
  382. offset into the base register }
  383. internalerror(2014110910);
  384. AM_PREINDEXED:
  385. { this means the offset must be added to the current
  386. base register }
  387. preferred_newbasereg:=ref.base;
  388. AM_OFFSET:
  389. if preferred_newbasereg=NR_NO then
  390. preferred_newbasereg:=getaddressregister(list);
  391. end;
  392. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  393. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
  394. end
  395. end;
  396. A_LDUR,A_STUR:
  397. begin
  398. { valid, checked above }
  399. end;
  400. { todo }
  401. A_LD1,A_LD2,A_LD3,A_LD4,
  402. A_ST1,A_ST2,A_ST3,A_ST4:
  403. internalerror(2014110908);
  404. else
  405. internalerror(2014110708);
  406. end;
  407. { done }
  408. exit;
  409. end;
  410. { only an offset -> change to base (+ offset 0) }
  411. if preferred_newbasereg=NR_NO then
  412. preferred_newbasereg:=getaddressregister(list);
  413. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  414. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
  415. end;
  416. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  417. var
  418. subreg:Tsubregister;
  419. begin
  420. subreg:=cgsize2subreg(getregtype(reg),size);
  421. result:=reg;
  422. setsubreg(result,subreg);
  423. end;
  424. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  425. begin
  426. internalerror(2014122110);
  427. { squash warning }
  428. result:=NR_NO;
  429. end;
  430. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  431. begin
  432. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  433. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  434. result:=ref;
  435. end;
  436. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  437. var
  438. instr: taicpu;
  439. so: tshifterop;
  440. hadtmpreg: boolean;
  441. begin
  442. { imm12 }
  443. if (a>=0) and
  444. (a<=((1 shl 12)-1)) then
  445. if usedest then
  446. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  447. else
  448. instr:=taicpu.op_reg_const(op,src,a)
  449. { imm12 lsl 12 }
  450. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  451. begin
  452. so.shiftmode:=SM_LSL;
  453. so.shiftimm:=12;
  454. if usedest then
  455. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  456. else
  457. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  458. end
  459. else
  460. begin
  461. { todo: other possible optimizations (e.g. load 16 bit constant in
  462. register and then add/sub/cmp/cmn shifted the rest) }
  463. if tmpreg=NR_NO then
  464. begin
  465. hadtmpreg:=false;
  466. tmpreg:=getintregister(list,size);
  467. end
  468. else
  469. begin
  470. hadtmpreg:=true;
  471. getcpuregister(list,tmpreg);
  472. end;
  473. a_load_const_reg(list,size,a,tmpreg);
  474. if usedest then
  475. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  476. else
  477. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  478. if hadtmpreg then
  479. ungetcpuregister(list,tmpreg);
  480. end;
  481. if setflags then
  482. setoppostfix(instr,PF_S);
  483. list.concat(instr);
  484. end;
  485. {****************************************************************************
  486. Assembler code
  487. ****************************************************************************}
  488. procedure tcgaarch64.init_register_allocators;
  489. begin
  490. inherited init_register_allocators;
  491. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  492. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  493. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  494. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  495. { maybe we can enable this in the future for leaf functions (it's
  496. the frame pointer)
  497. ,RS_X29 }],
  498. first_int_imreg,[]);
  499. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  500. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  501. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  502. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  503. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  504. first_mm_imreg,[]);
  505. end;
  506. procedure tcgaarch64.done_register_allocators;
  507. begin
  508. rg[R_INTREGISTER].free;
  509. rg[R_FPUREGISTER].free;
  510. rg[R_MMREGISTER].free;
  511. inherited done_register_allocators;
  512. end;
  513. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  514. begin
  515. case size of
  516. OS_F32:
  517. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  518. OS_F64:
  519. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  520. else
  521. internalerror(2014102701);
  522. end;
  523. end;
  524. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  525. begin
  526. if not weak then
  527. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
  528. else
  529. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
  530. end;
  531. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  532. begin
  533. list.concat(taicpu.op_reg(A_BLR,reg));
  534. end;
  535. {********************** load instructions ********************}
  536. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  537. var
  538. opc: tasmop;
  539. shift: byte;
  540. so: tshifterop;
  541. reginited,doinverted,extendedsize: boolean;
  542. manipulated_a: tcgint;
  543. leftover_a: word;
  544. begin
  545. {$ifdef extdebug}
  546. list.concat(tai_comment.Create(strpnew('Generating constant ' + tostr(a) + ' / $' + hexstr(a, 16))));
  547. {$endif extdebug}
  548. extendedsize := (size in [OS_64,OS_S64]);
  549. case a of
  550. { Small positive number }
  551. $0..$FFFF:
  552. begin
  553. list.concat(taicpu.op_reg_const(A_MOVZ, reg, a));
  554. Exit;
  555. end;
  556. { Small negative number }
  557. -65536..-1:
  558. begin
  559. list.concat(taicpu.op_reg_const(A_MOVN, reg, Word(not a)));
  560. Exit;
  561. end;
  562. { Can be represented as a negative number more compactly }
  563. $FFFF0000..$FFFFFFFF:
  564. begin
  565. { if we load a value into a 32 bit register, it is automatically
  566. zero-extended to 64 bit }
  567. list.concat(taicpu.op_reg_const(A_MOVN, makeregsize(reg,OS_32), Word(not a)));
  568. Exit;
  569. end;
  570. else
  571. begin
  572. if not extendedsize then
  573. { Mostly so programmers don't get confused when they view the disassembly and
  574. 'a' is sign-extended to 64-bit, say, but also avoids potential problems with
  575. third-party assemblers if the number is out of bounds for a given size }
  576. a := Cardinal(a);
  577. { Check to see if a is a valid shifter constant that can be encoded in ORR as is }
  578. if is_shifter_const(a,size) then
  579. begin
  580. { Use synthetic "MOV" instruction instead of "ORR reg,wzr,#a" (an alias),
  581. since AArch64 conventions prefer this, and it's clearer in the
  582. disassembly }
  583. list.concat(taicpu.op_reg_const(A_MOV,reg,a));
  584. Exit;
  585. end;
  586. { If the value of a fits into 32 bits, it's fastest to use movz/movk regardless }
  587. if extendedsize and ((a shr 32) <> 0) then
  588. begin
  589. { This determines whether this write can be performed with an ORR followed by MOVK
  590. by copying the 3nd word to the 1st word for the ORR constant, then overwriting
  591. the 1st word. The alternative would require 4 instructions. This sequence is
  592. common when division reciprocals are calculated (e.g. 3 produces AAAAAAAAAAAAAAAB). }
  593. leftover_a := word(a and $FFFF);
  594. manipulated_a := (a and $FFFFFFFFFFFF0000) or ((a shr 32) and $FFFF);
  595. { if manipulated_a = a, don't check, because is_shifter_const was already
  596. called for a and it returned False. Reduces processing time. [Kit] }
  597. if (manipulated_a <> a) and is_shifter_const(manipulated_a, OS_64) then
  598. begin
  599. { Encode value as:
  600. orr reg,xzr,manipulated_a
  601. movk reg,#(leftover_a)
  602. Use "orr" instead of "mov" here for the assembly dump so it better
  603. implies that something special is happening with the number arrangement.
  604. }
  605. list.concat(taicpu.op_reg_reg_const(A_ORR, reg, NR_XZR, manipulated_a));
  606. list.concat(taicpu.op_reg_const(A_MOVK, reg, leftover_a));
  607. Exit;
  608. end;
  609. { This determines whether this write can be performed with an ORR followed by MOVK
  610. by copying the 2nd word to the 4th word for the ORR constant, then overwriting
  611. the 4th word. The alternative would require 3 instructions }
  612. leftover_a := word(a shr 48);
  613. manipulated_a := (a and $0000FFFFFFFFFFFF);
  614. if manipulated_a = $0000FFFFFFFFFFFF then
  615. begin
  616. { This is even better, as we can just use a single MOVN on the last word }
  617. shifterop_reset(so);
  618. so.shiftmode := SM_LSL;
  619. so.shiftimm := 48;
  620. list.concat(taicpu.op_reg_const_shifterop(A_MOVN, reg, word(not leftover_a), so));
  621. Exit;
  622. end;
  623. manipulated_a := manipulated_a or (((a shr 16) and $FFFF) shl 48);
  624. { if manipulated_a = a, don't check, because is_shifter_const was already
  625. called for a and it returned False. Reduces processing time. [Kit] }
  626. if (manipulated_a <> a) and is_shifter_const(manipulated_a, OS_64) then
  627. begin
  628. { Encode value as:
  629. orr reg,xzr,manipulated_a
  630. movk reg,#(leftover_a),lsl #48
  631. Use "orr" instead of "mov" here for the assembly dump so it better
  632. implies that something special is happening with the number arrangement.
  633. }
  634. list.concat(taicpu.op_reg_reg_const(A_ORR, reg, NR_XZR, manipulated_a));
  635. shifterop_reset(so);
  636. so.shiftmode := SM_LSL;
  637. so.shiftimm := 48;
  638. list.concat(taicpu.op_reg_const_shifterop(A_MOVK, reg, leftover_a, so));
  639. Exit;
  640. end;
  641. case a of
  642. { If a is in the given negative range, it can be stored
  643. more efficiently if it is inverted. }
  644. TCgInt($FFFF000000000000)..-65537:
  645. begin
  646. { NOTE: This excluded range can be more efficiently
  647. stored as the first 16 bits followed by a shifter constant }
  648. case a of
  649. TCgInt($FFFF0000FFFF0000)..TCgInt($FFFF0000FFFFFFFF):
  650. doinverted := False;
  651. else
  652. begin
  653. doinverted := True;
  654. a := not a;
  655. end;
  656. end;
  657. end;
  658. else
  659. doinverted := False;
  660. end;
  661. end
  662. else
  663. doinverted:=False;
  664. end;
  665. end;
  666. reginited:=false;
  667. shift:=0;
  668. if doinverted then
  669. opc:=A_MOVN
  670. else
  671. opc:=A_MOVZ;
  672. repeat
  673. { leftover is shifterconst? (don't check if we can represent it just
  674. as effectively with movz/movk, as this check is expensive) }
  675. if (word(a)<>0) then
  676. begin
  677. if not doinverted and
  678. ((shift<tcgsize2size[size]*(8 div 2)) and
  679. ((a shr 16)<>0)) and
  680. is_shifter_const(a shl shift,size) then
  681. begin
  682. if reginited then
  683. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  684. else
  685. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  686. exit;
  687. end;
  688. { set all 16 bit parts <> 0 }
  689. if shift=0 then
  690. begin
  691. list.concat(taicpu.op_reg_const(opc,reg,word(a)));
  692. reginited:=true;
  693. end
  694. else
  695. begin
  696. shifterop_reset(so);
  697. so.shiftmode:=SM_LSL;
  698. so.shiftimm:=shift;
  699. if not reginited then
  700. begin
  701. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  702. reginited:=true;
  703. end
  704. else
  705. begin
  706. if doinverted then
  707. list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(not a),so))
  708. else
  709. list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(a),so));
  710. end;
  711. end;
  712. end;
  713. a:=a shr 16;
  714. inc(shift,16);
  715. until a = 0;
  716. if not reginited then
  717. internalerror(2014102702);
  718. end;
  719. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  720. var
  721. reg: tregister;
  722. href: treference;
  723. i: Integer;
  724. begin
  725. { use the zero register if possible }
  726. if a=0 then
  727. begin
  728. href:=ref;
  729. inc(href.offset,tcgsize2size[size]-1);
  730. if (tcgsize2size[size]>1) and (ref.alignment=1) and (simple_ref_type(A_STUR,OS_8,PF_None,ref)=sr_simple) and
  731. (simple_ref_type(A_STUR,OS_8,PF_None,href)=sr_simple) then
  732. begin
  733. href:=ref;
  734. for i:=0 to tcgsize2size[size]-1 do
  735. begin
  736. a_load_const_ref(list,OS_8,0,href);
  737. inc(href.offset);
  738. end;
  739. end
  740. else
  741. begin
  742. if size in [OS_64,OS_S64] then
  743. reg:=NR_XZR
  744. else
  745. reg:=NR_WZR;
  746. a_load_reg_ref(list,size,size,reg,ref);
  747. end;
  748. end
  749. else
  750. inherited;
  751. end;
  752. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  753. var
  754. oppostfix:toppostfix;
  755. hreg: tregister;
  756. begin
  757. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  758. begin
  759. fromsize:=tosize;
  760. reg:=makeregsize(list,reg,fromsize);
  761. end
  762. { have a 32 bit register but need a 64 bit one? }
  763. else if tosize in [OS_64,OS_S64] then
  764. begin
  765. { sign extend if necessary }
  766. if fromsize in [OS_S8,OS_S16,OS_S32] then
  767. begin
  768. { can't overwrite reg, may be a constant reg }
  769. hreg:=getintregister(list,tosize);
  770. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  771. reg:=hreg;
  772. end
  773. else
  774. { top 32 bit are zero by default }
  775. reg:=makeregsize(reg,OS_64);
  776. fromsize:=tosize;
  777. end;
  778. if not(target_info.system=system_aarch64_darwin) and (ref.alignment<>0) and
  779. (ref.alignment<tcgsize2size[tosize]) then
  780. begin
  781. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  782. end
  783. else
  784. begin
  785. case tosize of
  786. { signed integer registers }
  787. OS_8,
  788. OS_S8:
  789. oppostfix:=PF_B;
  790. OS_16,
  791. OS_S16:
  792. oppostfix:=PF_H;
  793. OS_32,
  794. OS_S32,
  795. OS_64,
  796. OS_S64:
  797. oppostfix:=PF_None;
  798. else
  799. InternalError(200308299);
  800. end;
  801. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  802. end;
  803. end;
  804. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  805. var
  806. oppostfix:toppostfix;
  807. begin
  808. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  809. fromsize:=tosize;
  810. { ensure that all bits of the 32/64 register are always correctly set:
  811. * default behaviour is always to zero-extend to the entire (64 bit)
  812. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  813. target register, as the upper 32 bit will be zeroed implicitly
  814. -> always make target register 32 bit
  815. * signed loads exist both with 32 and 64 bit target registers,
  816. depending on whether the value should be sign extended to 32 or
  817. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  818. corresponding 64 bit register are again zeroed) -> no need to
  819. change anything (we only have 32 and 64 bit registers), except that
  820. when loading an OS_S32 to a 32 bit register, we don't need/can't
  821. use sign extension
  822. }
  823. if fromsize in [OS_8,OS_16,OS_32] then
  824. reg:=makeregsize(reg,OS_32);
  825. if not(target_info.system=system_aarch64_darwin) and (ref.alignment<>0) and
  826. (ref.alignment<tcgsize2size[fromsize]) then
  827. begin
  828. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  829. exit;
  830. end;
  831. case fromsize of
  832. { signed integer registers }
  833. OS_8:
  834. oppostfix:=PF_B;
  835. OS_S8:
  836. oppostfix:=PF_SB;
  837. OS_16:
  838. oppostfix:=PF_H;
  839. OS_S16:
  840. oppostfix:=PF_SH;
  841. OS_S32:
  842. if getsubreg(reg)=R_SUBD then
  843. oppostfix:=PF_NONE
  844. else
  845. oppostfix:=PF_SW;
  846. OS_32,
  847. OS_64,
  848. OS_S64:
  849. oppostfix:=PF_None;
  850. else
  851. InternalError(200308297);
  852. end;
  853. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  854. { clear upper 16 bits if the value was negative }
  855. if (fromsize=OS_S8) and (tosize=OS_16) then
  856. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  857. end;
  858. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  859. var
  860. href: treference;
  861. hreg1, hreg2, tmpreg,tmpreg2: tregister;
  862. i : Integer;
  863. begin
  864. case fromsize of
  865. OS_64,OS_S64:
  866. begin
  867. { split into two 32 bit loads }
  868. hreg1:=getintregister(list,OS_32);
  869. hreg2:=getintregister(list,OS_32);
  870. if target_info.endian=endian_big then
  871. begin
  872. tmpreg:=hreg1;
  873. hreg1:=hreg2;
  874. hreg2:=tmpreg;
  875. end;
  876. { can we use LDP? }
  877. if (ref.alignment=4) and
  878. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  879. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  880. else
  881. begin
  882. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  883. href:=ref;
  884. inc(href.offset,4);
  885. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  886. end;
  887. a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
  888. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  889. end;
  890. OS_16,OS_S16,
  891. OS_32,OS_S32:
  892. begin
  893. if ref.alignment=2 then
  894. begin
  895. href:=ref;
  896. if target_info.endian=endian_big then
  897. inc(href.offset,tcgsize2size[fromsize]-2);
  898. tmpreg:=getintregister(list,OS_32);
  899. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
  900. tmpreg2:=getintregister(list,OS_32);
  901. for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
  902. begin
  903. if target_info.endian=endian_big then
  904. dec(href.offset,2)
  905. else
  906. inc(href.offset,2);
  907. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
  908. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
  909. end;
  910. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  911. end
  912. else
  913. begin
  914. href:=ref;
  915. if target_info.endian=endian_big then
  916. inc(href.offset,tcgsize2size[fromsize]-1);
  917. tmpreg:=getintregister(list,OS_32);
  918. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
  919. tmpreg2:=getintregister(list,OS_32);
  920. for i:=1 to tcgsize2size[fromsize]-1 do
  921. begin
  922. if target_info.endian=endian_big then
  923. dec(href.offset)
  924. else
  925. inc(href.offset);
  926. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
  927. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
  928. end;
  929. if (tosize in [OS_S8,OS_S16]) then
  930. list.concat(taicpu.op_reg_reg(A_SXTH,tmpreg,tmpreg));
  931. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  932. end;
  933. end;
  934. else
  935. inherited;
  936. end;
  937. end;
  938. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  939. var
  940. instr: taicpu;
  941. begin
  942. { we use both 32 and 64 bit registers -> insert conversion when when
  943. we have to truncate/sign extend inside the (32 or 64 bit) register
  944. holding the value, and when we sign extend from a 32 to a 64 bit
  945. register }
  946. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  947. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  948. (fromsize<>tosize) and
  949. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  950. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  951. (tosize in [OS_64,OS_S64])) or
  952. { needs to mask out the sign in the top 16 bits }
  953. ((fromsize=OS_S8) and
  954. (tosize=OS_16)) then
  955. begin
  956. case tosize of
  957. OS_8:
  958. list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_32)));
  959. OS_16:
  960. list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_32)));
  961. OS_S8:
  962. list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
  963. OS_S16:
  964. list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
  965. { while "mov wN, wM" automatically inserts a zero-extension and
  966. hence we could encode a 64->32 bit move like that, the problem
  967. is that we then can't distinguish 64->32 from 32->32 moves, and
  968. the 64->32 truncation could be removed altogether... So use a
  969. different instruction }
  970. OS_32,
  971. OS_S32:
  972. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  973. but because of the way location_force_register() tries to
  974. avoid superfluous zero/sign extensions, it's not always the
  975. case -> also force reg1 to to 64 bit }
  976. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  977. OS_64,
  978. OS_S64:
  979. list.concat(taicpu.op_reg_reg(A_SXTW,reg2,makeregsize(reg1,OS_32)));
  980. else
  981. internalerror(2002090901);
  982. end;
  983. end
  984. else
  985. begin
  986. { 32 -> 32 bit move implies zero extension (sign extensions have
  987. been handled above) -> also use for 32 <-> 64 bit moves }
  988. if not(fromsize in [OS_64,OS_S64]) or
  989. not(tosize in [OS_64,OS_S64]) then
  990. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  991. else
  992. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  993. list.Concat(instr);
  994. { Notify the register allocator that we have written a move instruction so
  995. it can try to eliminate it. }
  996. add_move_instruction(instr);
  997. end;
  998. end;
  999. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  1000. var
  1001. href: treference;
  1002. so: tshifterop;
  1003. op: tasmop;
  1004. begin
  1005. op:=A_LDR;
  1006. href:=ref;
  1007. { simplify as if we're going to perform a regular 64 bit load, using
  1008. "r" as the new base register if possible/necessary }
  1009. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  1010. { load literal? }
  1011. if assigned(href.symbol) then
  1012. begin
  1013. if (href.base<>NR_NO) or
  1014. (href.index<>NR_NO) or
  1015. not assigned(href.symboldata) then
  1016. internalerror(2014110912);
  1017. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  1018. end
  1019. else
  1020. begin
  1021. if href.index<>NR_NO then
  1022. begin
  1023. if href.shiftmode<>SM_None then
  1024. begin
  1025. { "add" supports a supperset of the shift modes supported by
  1026. load/store instructions }
  1027. shifterop_reset(so);
  1028. so.shiftmode:=href.shiftmode;
  1029. so.shiftimm:=href.shiftimm;
  1030. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  1031. end
  1032. else
  1033. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  1034. end
  1035. else if href.offset<>0 then
  1036. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  1037. else
  1038. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  1039. end;
  1040. end;
  1041. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  1042. begin
  1043. internalerror(2014122107)
  1044. end;
  1045. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  1046. begin
  1047. internalerror(2014122108)
  1048. end;
  1049. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  1050. begin
  1051. internalerror(2014122109)
  1052. end;
  1053. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  1054. var
  1055. instr: taicpu;
  1056. begin
  1057. if assigned(shuffle) and
  1058. not shufflescalar(shuffle) then
  1059. internalerror(2014122104);
  1060. if fromsize=tosize then
  1061. begin
  1062. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  1063. { Notify the register allocator that we have written a move
  1064. instruction so it can try to eliminate it. }
  1065. add_move_instruction(instr);
  1066. { FMOV cannot generate a floating point exception }
  1067. end
  1068. else
  1069. begin
  1070. if (reg_cgsize(reg1)<>fromsize) or
  1071. (reg_cgsize(reg2)<>tosize) then
  1072. internalerror(2014110913);
  1073. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  1074. maybe_check_for_fpu_exception(list);
  1075. end;
  1076. list.Concat(instr);
  1077. end;
  1078. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  1079. var
  1080. tmpreg: tregister;
  1081. begin
  1082. if assigned(shuffle) and
  1083. not shufflescalar(shuffle) then
  1084. internalerror(2014122105);
  1085. tmpreg:=NR_NO;
  1086. if (fromsize<>tosize) then
  1087. begin
  1088. tmpreg:=reg;
  1089. reg:=getmmregister(list,fromsize);
  1090. end;
  1091. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  1092. if (fromsize<>tosize) then
  1093. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1094. end;
  1095. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  1096. var
  1097. tmpreg: tregister;
  1098. begin
  1099. if assigned(shuffle) and
  1100. not shufflescalar(shuffle) then
  1101. internalerror(2014122106);
  1102. if (fromsize<>tosize) then
  1103. begin
  1104. tmpreg:=getmmregister(list,tosize);
  1105. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1106. reg:=tmpreg;
  1107. end;
  1108. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  1109. end;
  1110. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  1111. begin
  1112. if not shufflescalar(shuffle) then
  1113. internalerror(2014122801);
  1114. if tcgsize2size[fromsize]<>tcgsize2size[tosize] then
  1115. internalerror(2014122803);
  1116. case tcgsize2size[tosize] of
  1117. 4:
  1118. setsubreg(mmreg,R_SUBMMS);
  1119. 8:
  1120. setsubreg(mmreg,R_SUBMMD);
  1121. else
  1122. internalerror(2020101310);
  1123. end;
  1124. list.concat(taicpu.op_indexedreg_reg(A_INS,mmreg,0,intreg));
  1125. end;
  1126. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  1127. var
  1128. r : tregister;
  1129. begin
  1130. if not shufflescalar(shuffle) then
  1131. internalerror(2014122802);
  1132. if tcgsize2size[fromsize]>tcgsize2size[tosize] then
  1133. internalerror(2014122804);
  1134. case tcgsize2size[fromsize] of
  1135. 4:
  1136. setsubreg(mmreg,R_SUBMMS);
  1137. 8:
  1138. setsubreg(mmreg,R_SUBMMD);
  1139. else
  1140. internalerror(2020101311);
  1141. end;
  1142. if tcgsize2size[fromsize]<tcgsize2size[tosize] then
  1143. r:=makeregsize(intreg,fromsize)
  1144. else
  1145. r:=intreg;
  1146. list.concat(taicpu.op_reg_reg(A_FMOV,r,mmreg));
  1147. end;
  1148. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  1149. begin
  1150. case op of
  1151. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  1152. OP_XOR:
  1153. begin
  1154. if shuffle=nil then
  1155. begin
  1156. dst:=newreg(R_MMREGISTER,getsupreg(dst),R_SUBMM16B);
  1157. src:=newreg(R_MMREGISTER,getsupreg(src),R_SUBMM16B);
  1158. list.concat(taicpu.op_reg_reg_reg(A_EOR,dst,dst,src))
  1159. end
  1160. else if (src<>dst) or
  1161. (reg_cgsize(src)<>size) or
  1162. assigned(shuffle) then
  1163. internalerror(2015011401)
  1164. else
  1165. case size of
  1166. OS_F32,
  1167. OS_F64:
  1168. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  1169. else
  1170. internalerror(2015011402);
  1171. end;
  1172. end
  1173. else
  1174. internalerror(2015011403);
  1175. end;
  1176. end;
  1177. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  1178. var
  1179. bitsize: longint;
  1180. begin
  1181. if srcsize in [OS_64,OS_S64] then
  1182. begin
  1183. bitsize:=64;
  1184. end
  1185. else
  1186. begin
  1187. bitsize:=32;
  1188. end;
  1189. { source is 0 -> dst will have to become 255 }
  1190. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1191. if reverse then
  1192. begin
  1193. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1194. { xor 31/63 is the same as setting the lower 5/6 bits to
  1195. "31/63-(lower 5/6 bits of dst)" }
  1196. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1197. end
  1198. else
  1199. begin
  1200. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1201. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1202. end;
  1203. { set dst to -1 if src was 0 }
  1204. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1205. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1206. branch-free version? All of mine are 3...) }
  1207. list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32)));
  1208. end;
  1209. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1210. var
  1211. href: treference;
  1212. hreg1, hreg2, tmpreg: tregister;
  1213. begin
  1214. if fromsize in [OS_64,OS_S64] then
  1215. begin
  1216. { split into two 32 bit stores }
  1217. hreg1:=getintregister(list,OS_32);
  1218. hreg2:=getintregister(list,OS_32);
  1219. a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
  1220. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1221. if target_info.endian=endian_big then
  1222. begin
  1223. tmpreg:=hreg1;
  1224. hreg1:=hreg2;
  1225. hreg2:=tmpreg;
  1226. end;
  1227. { can we use STP? }
  1228. if (ref.alignment=4) and
  1229. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1230. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1231. else
  1232. begin
  1233. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1234. href:=ref;
  1235. inc(href.offset,4);
  1236. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1237. end;
  1238. end
  1239. else
  1240. inherited;
  1241. end;
  1242. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1243. const
  1244. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1245. begin
  1246. if (op in overflowops) and
  1247. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1248. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1249. end;
  1250. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1251. begin
  1252. optimize_op_const(size,op,a);
  1253. case op of
  1254. OP_NONE:
  1255. exit;
  1256. OP_MOVE:
  1257. a_load_const_reg(list,size,a,reg);
  1258. OP_NEG,OP_NOT:
  1259. internalerror(200306011);
  1260. else
  1261. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1262. end;
  1263. end;
  1264. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1265. begin
  1266. Case op of
  1267. OP_NEG,
  1268. OP_NOT:
  1269. begin
  1270. if (op=OP_NOT) and (size in [OS_8,OS_S8]) then
  1271. list.concat(taicpu.op_reg_reg_const(A_EOR,dst,src,255))
  1272. else
  1273. begin
  1274. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1275. maybeadjustresult(list,op,size,dst);
  1276. end;
  1277. end
  1278. else
  1279. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1280. end;
  1281. end;
  1282. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1283. var
  1284. l: tlocation;
  1285. begin
  1286. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1287. end;
  1288. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1289. var
  1290. hreg: tregister;
  1291. begin
  1292. { no ROLV opcode... }
  1293. if op=OP_ROL then
  1294. begin
  1295. case size of
  1296. OS_32,OS_S32,
  1297. OS_64,OS_S64:
  1298. begin
  1299. hreg:=getintregister(list,size);
  1300. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1301. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1302. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1303. exit;
  1304. end;
  1305. else
  1306. internalerror(2014111005);
  1307. end;
  1308. end
  1309. else if (op=OP_ROR) and
  1310. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1311. internalerror(2014111006);
  1312. if TOpCG2AsmOpReg[op]=A_NONE then
  1313. internalerror(2014111007);
  1314. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1315. maybeadjustresult(list,op,size,dst);
  1316. end;
  1317. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1318. var
  1319. shiftcountmask: longint;
  1320. constreg: tregister;
  1321. begin
  1322. { add/sub instructions have only positive immediate operands }
  1323. if (op in [OP_ADD,OP_SUB]) and
  1324. (a<0) then
  1325. begin
  1326. if op=OP_ADD then
  1327. op:=op_SUB
  1328. else
  1329. op:=OP_ADD;
  1330. { avoid range/overflow error in case a = low(tcgint) }
  1331. {$push}{$r-}{$q-}
  1332. a:=-a;
  1333. {$pop}
  1334. end;
  1335. ovloc.loc:=LOC_VOID;
  1336. optimize_op_const(size,op,a);
  1337. case op of
  1338. OP_NONE:
  1339. begin
  1340. a_load_reg_reg(list,size,size,src,dst);
  1341. exit;
  1342. end;
  1343. OP_MOVE:
  1344. begin
  1345. a_load_const_reg(list,size,a,dst);
  1346. exit;
  1347. end;
  1348. else
  1349. ;
  1350. end;
  1351. case op of
  1352. OP_ADD,
  1353. OP_SUB:
  1354. begin
  1355. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1356. { on a 64 bit target, overflows with smaller data types
  1357. are handled via range errors }
  1358. if setflags and
  1359. (size in [OS_64,OS_S64]) then
  1360. begin
  1361. location_reset(ovloc,LOC_FLAGS,OS_8);
  1362. if size=OS_64 then
  1363. if op=OP_ADD then
  1364. ovloc.resflags:=F_CS
  1365. else
  1366. ovloc.resflags:=F_CC
  1367. else
  1368. ovloc.resflags:=F_VS;
  1369. end;
  1370. end;
  1371. OP_OR,
  1372. OP_AND,
  1373. OP_XOR:
  1374. begin
  1375. if not(size in [OS_64,OS_S64]) then
  1376. a:=cardinal(a);
  1377. if is_shifter_const(a,size) then
  1378. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1379. else
  1380. begin
  1381. constreg:=getintregister(list,size);
  1382. a_load_const_reg(list,size,a,constreg);
  1383. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1384. end;
  1385. end;
  1386. OP_SHL,
  1387. OP_SHR,
  1388. OP_SAR:
  1389. begin
  1390. if size in [OS_64,OS_S64] then
  1391. shiftcountmask:=63
  1392. else
  1393. shiftcountmask:=31;
  1394. if (a and shiftcountmask)<>0 Then
  1395. list.concat(taicpu.op_reg_reg_const(
  1396. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1397. else
  1398. a_load_reg_reg(list,size,size,src,dst);
  1399. if (a and not(tcgint(shiftcountmask)))<>0 then
  1400. internalError(2014112101);
  1401. end;
  1402. OP_ROL,
  1403. OP_ROR:
  1404. begin
  1405. case size of
  1406. OS_32,OS_S32:
  1407. if (a and not(tcgint(31)))<>0 then
  1408. internalError(2014112102);
  1409. OS_64,OS_S64:
  1410. if (a and not(tcgint(63)))<>0 then
  1411. internalError(2014112103);
  1412. else
  1413. internalError(2014112104);
  1414. end;
  1415. { there's only a ror opcode }
  1416. if op=OP_ROL then
  1417. a:=(tcgsize2size[size]*8)-a;
  1418. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1419. end;
  1420. OP_MUL,
  1421. OP_IMUL,
  1422. OP_DIV,
  1423. OP_IDIV:
  1424. begin
  1425. constreg:=getintregister(list,size);
  1426. a_load_const_reg(list,size,a,constreg);
  1427. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1428. end;
  1429. else
  1430. internalerror(2014111403);
  1431. end;
  1432. maybeadjustresult(list,op,size,dst);
  1433. end;
  1434. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1435. var
  1436. tmpreg1, tmpreg2: tregister;
  1437. begin
  1438. ovloc.loc:=LOC_VOID;
  1439. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1440. if setflags and
  1441. (size in [OS_64,OS_S64]) then
  1442. begin
  1443. case op of
  1444. OP_ADD,
  1445. OP_SUB:
  1446. begin
  1447. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1448. ovloc.loc:=LOC_FLAGS;
  1449. if size=OS_64 then
  1450. if op=OP_ADD then
  1451. ovloc.resflags:=F_CS
  1452. else
  1453. ovloc.resflags:=F_CC
  1454. else
  1455. ovloc.resflags:=F_VS;
  1456. { finished }
  1457. exit;
  1458. end;
  1459. OP_MUL:
  1460. begin
  1461. { check whether the upper 64 bit of the 128 bit product is 0 }
  1462. tmpreg1:=getintregister(list,OS_64);
  1463. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1464. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1465. ovloc.loc:=LOC_FLAGS;
  1466. ovloc.resflags:=F_NE;
  1467. { still have to perform the actual multiplication }
  1468. end;
  1469. OP_IMUL:
  1470. begin
  1471. { check whether the upper 64 bits of the 128 bit multiplication
  1472. result have the same value as the replicated sign bit of the
  1473. lower 64 bits }
  1474. tmpreg1:=getintregister(list,OS_64);
  1475. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1476. { calculate lower 64 bits (afterwards, because dst may be
  1477. equal to src1 or src2) }
  1478. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1479. { replicate sign bit }
  1480. tmpreg2:=getintregister(list,OS_64);
  1481. a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
  1482. list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
  1483. ovloc.loc:=LOC_FLAGS;
  1484. ovloc.resflags:=F_NE;
  1485. { finished }
  1486. exit;
  1487. end;
  1488. OP_IDIV,
  1489. OP_DIV:
  1490. begin
  1491. { not handled here, needs div-by-zero check (dividing by zero
  1492. just gives a 0 result on aarch64), and low(int64) div -1
  1493. check for overflow) }
  1494. internalerror(2014122101);
  1495. end;
  1496. else
  1497. internalerror(2019050936);
  1498. end;
  1499. end;
  1500. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1501. end;
  1502. {*************** compare instructructions ****************}
  1503. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1504. var
  1505. op: tasmop;
  1506. begin
  1507. if a>=0 then
  1508. op:=A_CMP
  1509. else
  1510. op:=A_CMN;
  1511. { avoid range/overflow error in case a=low(tcgint) }
  1512. {$push}{$r-}{$q-}
  1513. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1514. {$pop}
  1515. a_jmp_cond(list,cmp_op,l);
  1516. end;
  1517. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1518. begin
  1519. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1520. a_jmp_cond(list,cmp_op,l);
  1521. end;
  1522. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1523. var
  1524. ai: taicpu;
  1525. begin
  1526. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
  1527. ai.is_jmp:=true;
  1528. list.Concat(ai);
  1529. end;
  1530. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1531. var
  1532. ai: taicpu;
  1533. begin
  1534. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1535. ai.is_jmp:=true;
  1536. list.Concat(ai);
  1537. end;
  1538. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1539. var
  1540. ai: taicpu;
  1541. begin
  1542. ai:=TAiCpu.op_sym(A_B,l);
  1543. ai.is_jmp:=true;
  1544. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1545. list.Concat(ai);
  1546. end;
  1547. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1548. var
  1549. ai : taicpu;
  1550. begin
  1551. ai:=Taicpu.op_sym(A_B,l);
  1552. ai.is_jmp:=true;
  1553. ai.SetCondition(flags_to_cond(f));
  1554. list.Concat(ai);
  1555. end;
  1556. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1557. begin
  1558. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1559. end;
  1560. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1561. begin
  1562. { we need an explicit overflow location, because there are many
  1563. possibilities (not just the overflow flag, which is only used for
  1564. signed add/sub) }
  1565. internalerror(2014112303);
  1566. end;
  1567. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1568. var
  1569. hl : tasmlabel;
  1570. hflags : tresflags;
  1571. begin
  1572. if not(cs_check_overflow in current_settings.localswitches) then
  1573. exit;
  1574. current_asmdata.getjumplabel(hl);
  1575. case ovloc.loc of
  1576. LOC_FLAGS:
  1577. begin
  1578. hflags:=ovloc.resflags;
  1579. inverse_flags(hflags);
  1580. cg.a_jmp_flags(list,hflags,hl);
  1581. end;
  1582. else
  1583. internalerror(2014112304);
  1584. end;
  1585. a_call_name(list,'FPC_OVERFLOW',false);
  1586. a_label(list,hl);
  1587. end;
  1588. { *********** entry/exit code and address loading ************ }
  1589. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1590. var
  1591. ref: treference;
  1592. sr: tsuperregister;
  1593. pairreg: tregister;
  1594. sehreg,sehregp : TAsmSehDirective;
  1595. begin
  1596. result:=0;
  1597. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1598. ref.addressmode:=AM_PREINDEXED;
  1599. pairreg:=NR_NO;
  1600. { for SEH on Win64 we can only store consecutive register pairs, others
  1601. need to be stored with STR }
  1602. if target_info.system=system_aarch64_win64 then
  1603. begin
  1604. if rt=R_INTREGISTER then
  1605. begin
  1606. sehreg:=ash_savereg_x;
  1607. sehregp:=ash_saveregp_x;
  1608. end
  1609. else if rt=R_MMREGISTER then
  1610. begin
  1611. sehreg:=ash_savefreg_x;
  1612. sehregp:=ash_savefregp_x;
  1613. end
  1614. else
  1615. internalerror(2020041304);
  1616. for sr:=lowsr to highsr do
  1617. if sr in rg[rt].used_in_proc then
  1618. if pairreg=NR_NO then
  1619. pairreg:=newreg(rt,sr,sub)
  1620. else
  1621. begin
  1622. inc(result,16);
  1623. if getsupreg(pairreg)=sr-1 then
  1624. begin
  1625. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1626. list.concat(cai_seh_directive.create_reg_offset(sehregp,pairreg,16));
  1627. pairreg:=NR_NO;
  1628. end
  1629. else
  1630. begin
  1631. list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
  1632. list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
  1633. pairreg:=newreg(rt,sr,sub);
  1634. end;
  1635. end;
  1636. if pairreg<>NR_NO then
  1637. begin
  1638. inc(result,16);
  1639. list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
  1640. list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
  1641. end;
  1642. end
  1643. else
  1644. begin
  1645. { store all used registers pairwise }
  1646. for sr:=lowsr to highsr do
  1647. if sr in rg[rt].used_in_proc then
  1648. if pairreg=NR_NO then
  1649. pairreg:=newreg(rt,sr,sub)
  1650. else
  1651. begin
  1652. inc(result,16);
  1653. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1654. pairreg:=NR_NO
  1655. end;
  1656. { one left -> store twice (stack must be 16 bytes aligned) }
  1657. if pairreg<>NR_NO then
  1658. begin
  1659. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1660. inc(result,16);
  1661. end;
  1662. end;
  1663. end;
  1664. procedure FixupOffsets(p:TObject;arg:pointer);
  1665. var
  1666. sym: tabstractnormalvarsym absolute p;
  1667. begin
  1668. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1669. (sym.localloc.loc=LOC_REFERENCE) and
  1670. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1671. begin
  1672. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1673. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1674. end;
  1675. end;
  1676. procedure tcgaarch64.g_stackpointer_alloc(list : TAsmList;localsize : longint);
  1677. var
  1678. href : treference;
  1679. i : integer;
  1680. again : tasmlabel;
  1681. begin
  1682. if localsize>0 then
  1683. begin
  1684. { windows guards only a few pages for stack growing,
  1685. so we have to access every page first }
  1686. if (target_info.system=system_aarch64_win64) and
  1687. (localsize>=winstackpagesize) then
  1688. begin
  1689. if localsize div winstackpagesize<=4 then
  1690. begin
  1691. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1692. for i:=1 to localsize div winstackpagesize do
  1693. begin
  1694. reference_reset_base(href,NR_SP,localsize-i*winstackpagesize+4,ctempposinvalid,4,[]);
  1695. list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
  1696. end;
  1697. reference_reset_base(href,NR_SP,0,ctempposinvalid,4,[]);
  1698. list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
  1699. end
  1700. else
  1701. begin
  1702. current_asmdata.getjumplabel(again);
  1703. getcpuregister(list,NR_IP0);
  1704. a_load_const_reg(list,OS_ADDR,localsize div winstackpagesize,NR_IP0);
  1705. a_label(list,again);
  1706. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,winstackpagesize,NR_SP,NR_IP1,false,true);
  1707. reference_reset_base(href,NR_SP,0,ctempposinvalid,4,[]);
  1708. list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
  1709. list.concat(setoppostfix(Taicpu.op_reg_reg_const(A_SUB,NR_IP0,NR_IP0,1),PF_S));
  1710. a_jmp_cond(list,OC_NE,again);
  1711. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize mod winstackpagesize,NR_SP,NR_IP1,false,true);
  1712. ungetcpuregister(list,NR_IP0);
  1713. end
  1714. end
  1715. else
  1716. begin
  1717. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1718. if target_info.system=system_aarch64_win64 then
  1719. list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
  1720. end;
  1721. end;
  1722. end;
  1723. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1724. var
  1725. hitem: tlinkedlistitem;
  1726. seh_proc: tai_seh_directive;
  1727. templist: TAsmList;
  1728. suppress_endprologue: boolean;
  1729. ref: treference;
  1730. totalstackframesize: longint;
  1731. begin
  1732. { on aarch64, we need to store the link register and the generate a frame pointer if the subroutine either
  1733. - receives parameters on the stack
  1734. - is not a leaf procedure
  1735. - has nested procedures
  1736. - helpers retrieve the stack pointer
  1737. }
  1738. hitem:=list.last;
  1739. { pi_has_unwind_info may already be set at this point if there are
  1740. SEH directives in assembler body. In this case, .seh_endprologue
  1741. is expected to be one of those directives, and not generated here. }
  1742. suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
  1743. if not nostackframe then
  1744. begin
  1745. { stack pointer has to be aligned to 16 bytes at all times }
  1746. localsize:=align(localsize,16);
  1747. if target_info.system=system_aarch64_win64 then
  1748. include(current_procinfo.flags,pi_has_unwind_info);
  1749. if not(pi_no_framepointer_needed in current_procinfo.flags) then
  1750. begin
  1751. { save stack pointer and return address }
  1752. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1753. ref.addressmode:=AM_PREINDEXED;
  1754. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1755. current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
  1756. current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
  1757. current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
  1758. if target_info.system=system_aarch64_win64 then
  1759. list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
  1760. { initialise frame pointer }
  1761. if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
  1762. begin
  1763. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1764. current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
  1765. if target_info.system=system_aarch64_win64 then
  1766. list.concat(cai_seh_directive.create(ash_setfp));
  1767. end
  1768. else
  1769. begin
  1770. gen_load_frame_for_exceptfilter(list);
  1771. localsize:=current_procinfo.maxpushedparasize;
  1772. end;
  1773. end;
  1774. totalstackframesize:=localsize;
  1775. { save modified integer registers }
  1776. inc(totalstackframesize,
  1777. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1778. { only the lower 64 bits of the modified vector registers need to be
  1779. saved; if the caller needs the upper 64 bits, it has to save them
  1780. itself }
  1781. inc(totalstackframesize,
  1782. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1783. { allocate stack space }
  1784. if localsize<>0 then
  1785. begin
  1786. localsize:=align(localsize,16);
  1787. current_procinfo.final_localsize:=localsize;
  1788. g_stackpointer_alloc(list,localsize);
  1789. end;
  1790. { By default, we use the frame pointer to access parameters passed via
  1791. the stack and the stack pointer to address local variables and temps
  1792. because
  1793. a) we can use bigger positive than negative offsets (so accessing
  1794. locals via negative offsets from the frame pointer would be less
  1795. efficient)
  1796. b) we don't know the local size while generating the code, so
  1797. accessing the parameters via the stack pointer is not possible
  1798. without copying them
  1799. The problem with this is the get_frame() intrinsic:
  1800. a) it must return the same value as what we pass as parentfp
  1801. parameter, since that's how it's used in the TP-style objects unit
  1802. b) its return value must usable to access all local data from a
  1803. routine (locals and parameters), since it's all the nested
  1804. routines have access to
  1805. c) its return value must be usable to construct a backtrace, as it's
  1806. also used by the exception handling routines
  1807. The solution we use here, based on something similar that's done in
  1808. the MIPS port, is to generate all accesses to locals in the routine
  1809. itself SP-relative, and then after the code is generated and the local
  1810. size is known (namely, here), we change all SP-relative variables/
  1811. parameters into FP-relative ones. This means that they'll be accessed
  1812. less efficiently from nested routines, but those accesses are indirect
  1813. anyway and at least this way they can be accessed at all
  1814. }
  1815. if current_procinfo.has_nestedprocs or
  1816. (
  1817. (target_info.system=system_aarch64_win64) and
  1818. (current_procinfo.flags*[pi_has_implicit_finally,pi_needs_implicit_finally,pi_uses_exceptions]<>[])
  1819. ) then
  1820. begin
  1821. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1822. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1823. end;
  1824. end;
  1825. if not (pi_has_unwind_info in current_procinfo.flags) then
  1826. exit;
  1827. { Generate unwind data for aarch64-win64 }
  1828. seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
  1829. if assigned(hitem) then
  1830. list.insertafter(seh_proc,hitem)
  1831. else
  1832. list.insert(seh_proc);
  1833. { the directive creates another section }
  1834. inc(list.section_count);
  1835. templist:=TAsmList.Create;
  1836. if not suppress_endprologue then
  1837. begin
  1838. templist.concat(cai_seh_directive.create(ash_endprologue));
  1839. end;
  1840. if assigned(current_procinfo.endprologue_ai) then
  1841. current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
  1842. else
  1843. list.concatlist(templist);
  1844. templist.free;
  1845. end;
  1846. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1847. begin
  1848. { nothing to do on Darwin or Linux }
  1849. end;
  1850. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1851. begin
  1852. { done in g_proc_exit }
  1853. end;
  1854. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1855. var
  1856. ref: treference;
  1857. sr, highestsetsr: tsuperregister;
  1858. pairreg: tregister;
  1859. i,
  1860. regcount: longint;
  1861. aiarr : array of tai;
  1862. begin
  1863. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1864. ref.addressmode:=AM_POSTINDEXED;
  1865. regcount:=0;
  1866. { due to SEH on Win64 we can only load consecutive registers and single
  1867. ones are done using LDR, so we need to handle this differently there }
  1868. if target_info.system=system_aarch64_win64 then
  1869. begin
  1870. setlength(aiarr,highsr-lowsr+1);
  1871. pairreg:=NR_NO;
  1872. for sr:=lowsr to highsr do
  1873. if sr in rg[rt].used_in_proc then
  1874. begin
  1875. if pairreg=NR_NO then
  1876. pairreg:=newreg(rt,sr,sub)
  1877. else
  1878. begin
  1879. if getsupreg(pairreg)=sr-1 then
  1880. begin
  1881. aiarr[regcount]:=taicpu.op_reg_reg_ref(A_LDP,pairreg,newreg(rt,sr,sub),ref);
  1882. inc(regcount);
  1883. pairreg:=NR_NO;
  1884. end
  1885. else
  1886. begin
  1887. aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
  1888. inc(regcount);
  1889. pairreg:=newreg(rt,sr,sub);
  1890. end;
  1891. end;
  1892. end;
  1893. if pairreg<>NR_NO then
  1894. begin
  1895. aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
  1896. inc(regcount);
  1897. pairreg:=NR_NO;
  1898. end;
  1899. for i:=regcount-1 downto 0 do
  1900. list.concat(aiarr[i]);
  1901. end
  1902. else
  1903. begin
  1904. { highest reg stored twice? }
  1905. highestsetsr:=RS_NO;
  1906. for sr:=lowsr to highsr do
  1907. if sr in rg[rt].used_in_proc then
  1908. begin
  1909. inc(regcount);
  1910. highestsetsr:=sr;
  1911. end;
  1912. if odd(regcount) then
  1913. begin
  1914. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1915. highestsetsr:=pred(highestsetsr);
  1916. end;
  1917. { load all (other) used registers pairwise }
  1918. pairreg:=NR_NO;
  1919. for sr:=highestsetsr downto lowsr do
  1920. if sr in rg[rt].used_in_proc then
  1921. if pairreg=NR_NO then
  1922. pairreg:=newreg(rt,sr,sub)
  1923. else
  1924. begin
  1925. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1926. pairreg:=NR_NO
  1927. end;
  1928. end;
  1929. { There can't be any register left }
  1930. if pairreg<>NR_NO then
  1931. internalerror(2014112602);
  1932. end;
  1933. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1934. var
  1935. ref: treference;
  1936. regsstored: boolean;
  1937. sr: tsuperregister;
  1938. begin
  1939. if not(nostackframe) and
  1940. { we do not need an exit stack frame when we never return
  1941. * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
  1942. * the entry stack frame must be normally generated because the subroutine could be still left by
  1943. an exception and then the unwinding code might need to restore the registers stored by the entry code
  1944. }
  1945. not(po_noreturn in current_procinfo.procdef.procoptions) then
  1946. begin
  1947. { if no registers have been stored, we don't have to subtract the
  1948. allocated temp space from the stack pointer }
  1949. regsstored:=false;
  1950. for sr:=RS_X19 to RS_X28 do
  1951. if sr in rg[R_INTREGISTER].used_in_proc then
  1952. begin
  1953. regsstored:=true;
  1954. break;
  1955. end;
  1956. if not regsstored then
  1957. for sr:=RS_D8 to RS_D15 do
  1958. if sr in rg[R_MMREGISTER].used_in_proc then
  1959. begin
  1960. regsstored:=true;
  1961. break;
  1962. end;
  1963. { restore registers (and stack pointer) }
  1964. if regsstored then
  1965. begin
  1966. if current_procinfo.final_localsize<>0 then
  1967. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1968. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1969. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1970. end
  1971. else if current_procinfo.final_localsize<>0 then
  1972. begin
  1973. { restore stack pointer }
  1974. if pi_no_framepointer_needed in current_procinfo.flags then
  1975. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,current_procinfo.framepointer,current_procinfo.final_localsize,
  1976. current_procinfo.framepointer,NR_IP0,false,true)
  1977. else
  1978. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1979. end;
  1980. if not(pi_no_framepointer_needed in current_procinfo.flags) then
  1981. begin
  1982. { restore framepointer and return address }
  1983. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1984. ref.addressmode:=AM_POSTINDEXED;
  1985. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1986. end;
  1987. end;
  1988. { return }
  1989. list.concat(taicpu.op_none(A_RET));
  1990. if (pi_has_unwind_info in current_procinfo.flags) then
  1991. begin
  1992. tcpuprocinfo(current_procinfo).dump_scopes(list);
  1993. list.concat(cai_seh_directive.create(ash_endproc));
  1994. end;
  1995. end;
  1996. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1997. begin
  1998. { done in g_proc_entry }
  1999. end;
  2000. { ************* concatcopy ************ }
  2001. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  2002. var
  2003. paraloc1,paraloc2,paraloc3 : TCGPara;
  2004. pd : tprocdef;
  2005. begin
  2006. pd:=search_system_proc('MOVE');
  2007. paraloc1.init;
  2008. paraloc2.init;
  2009. paraloc3.init;
  2010. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  2011. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  2012. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  2013. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  2014. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  2015. a_loadaddr_ref_cgpara(list,source,paraloc1);
  2016. paramanager.freecgpara(list,paraloc3);
  2017. paramanager.freecgpara(list,paraloc2);
  2018. paramanager.freecgpara(list,paraloc1);
  2019. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2020. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  2021. a_call_name(list,'FPC_MOVE',false);
  2022. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  2023. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2024. paraloc3.done;
  2025. paraloc2.done;
  2026. paraloc1.done;
  2027. end;
  2028. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  2029. var
  2030. sourcebasereplaced, destbasereplaced: boolean;
  2031. { get optimal memory operation to use for loading/storing data
  2032. in an unrolled loop }
  2033. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  2034. begin
  2035. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  2036. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  2037. begin
  2038. memop:=unscaledop;
  2039. needsimplify:=true;
  2040. end
  2041. else if (unscaledop<>A_NONE) and
  2042. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  2043. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  2044. begin
  2045. memop:=unscaledop;
  2046. needsimplify:=false;
  2047. end
  2048. else
  2049. begin
  2050. memop:=scaledop;
  2051. needsimplify:=true;
  2052. end;
  2053. end;
  2054. { adjust the offset and/or addressing mode after a load/store so it's
  2055. correct for the next one of the same size }
  2056. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  2057. begin
  2058. case ref.addressmode of
  2059. AM_OFFSET:
  2060. inc(ref.offset,oplen);
  2061. AM_POSTINDEXED:
  2062. { base register updated by instruction, next offset can remain
  2063. the same }
  2064. ;
  2065. AM_PREINDEXED:
  2066. begin
  2067. { base register updated by instruction -> next instruction can
  2068. use post-indexing with offset = sizeof(operation) }
  2069. ref.offset:=0;
  2070. ref.addressmode:=AM_OFFSET;
  2071. end;
  2072. end;
  2073. end;
  2074. { generate a load/store and adjust the reference offset to the next
  2075. memory location if necessary }
  2076. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  2077. begin
  2078. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  2079. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  2080. end;
  2081. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  2082. the next memory location if necessary }
  2083. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  2084. begin
  2085. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  2086. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  2087. end;
  2088. { turn a reference into a pre- or post-indexed reference for use in a
  2089. load/store of a particular size }
  2090. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  2091. var
  2092. tmpreg: tregister;
  2093. scaledoffset: longint;
  2094. orgaddressmode: taddressmode;
  2095. begin
  2096. scaledoffset:=tcgsize2size[opsize];
  2097. if scaledop in [A_LDP,A_STP] then
  2098. scaledoffset:=scaledoffset*2;
  2099. { can we use the reference as post-indexed without changes? }
  2100. if forcepostindexing then
  2101. begin
  2102. orgaddressmode:=ref.addressmode;
  2103. ref.addressmode:=AM_POSTINDEXED;
  2104. if (orgaddressmode=AM_POSTINDEXED) or
  2105. ((ref.offset=0) and
  2106. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  2107. begin
  2108. { just change the post-indexed offset to the access size }
  2109. ref.offset:=scaledoffset;
  2110. { and replace the base register if that didn't happen yet
  2111. (could be sp or a regvar) }
  2112. if not basereplaced then
  2113. begin
  2114. tmpreg:=getaddressregister(list);
  2115. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  2116. ref.base:=tmpreg;
  2117. basereplaced:=true;
  2118. end;
  2119. exit;
  2120. end;
  2121. ref.addressmode:=orgaddressmode;
  2122. end;
  2123. {$ifdef dummy}
  2124. This could in theory be useful in case you have a concatcopy from
  2125. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  2126. very unlikely. Disabled because it still needs fixes, as it
  2127. also generates pre-indexed loads right now at the very end for the
  2128. left-over gencopies
  2129. { can we turn it into a pre-indexed reference for free? (after the
  2130. first operation, it will be turned into an offset one) }
  2131. if not forcepostindexing and
  2132. (ref.offset<>0) then
  2133. begin
  2134. orgaddressmode:=ref.addressmode;
  2135. ref.addressmode:=AM_PREINDEXED;
  2136. tmpreg:=ref.base;
  2137. if not basereplaced and
  2138. (ref.base=tmpreg) then
  2139. begin
  2140. tmpreg:=getaddressregister(list);
  2141. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  2142. ref.base:=tmpreg;
  2143. basereplaced:=true;
  2144. end;
  2145. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  2146. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  2147. exit;
  2148. end;
  2149. {$endif dummy}
  2150. if not forcepostindexing then
  2151. begin
  2152. ref.addressmode:=AM_OFFSET;
  2153. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  2154. { this may still cause problems if the final offset is no longer
  2155. a simple ref; it's a bit complicated to pass all information
  2156. through at all places and check that here, so play safe: we
  2157. currently never generate unrolled copies for more than 64
  2158. bytes (32 with non-double-register copies) }
  2159. if ref.index=NR_NO then
  2160. begin
  2161. if ((scaledop in [A_LDP,A_STP]) and
  2162. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  2163. ((scaledop in [A_LDUR,A_STUR]) and
  2164. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  2165. ((scaledop in [A_LDR,A_STR]) and
  2166. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  2167. exit;
  2168. end;
  2169. end;
  2170. tmpreg:=getaddressregister(list);
  2171. a_loadaddr_ref_reg(list,ref,tmpreg);
  2172. basereplaced:=true;
  2173. if forcepostindexing then
  2174. begin
  2175. reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
  2176. ref.addressmode:=AM_POSTINDEXED;
  2177. end
  2178. else
  2179. begin
  2180. reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  2181. ref.addressmode:=AM_OFFSET;
  2182. end
  2183. end;
  2184. { prepare a reference for use by gencopy. This is done both after the
  2185. unrolled and regular copy loop -> get rid of post-indexing mode, make
  2186. sure ref is valid }
  2187. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  2188. var
  2189. simplify: boolean;
  2190. begin
  2191. if ref.addressmode=AM_POSTINDEXED then
  2192. ref.offset:=tcgsize2size[opsize];
  2193. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  2194. if simplify then
  2195. begin
  2196. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  2197. op:=scaledop;
  2198. end;
  2199. end;
  2200. { generate a copy from source to dest of size opsize/postfix }
  2201. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  2202. var
  2203. reg: tregister;
  2204. loadop, storeop: tasmop;
  2205. begin
  2206. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  2207. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  2208. reg:=getintregister(list,opsize);
  2209. genloadstore(list,loadop,reg,source,postfix,opsize);
  2210. genloadstore(list,storeop,reg,dest,postfix,opsize);
  2211. end;
  2212. { copy the leftovers after an unrolled or regular copy loop }
  2213. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  2214. begin
  2215. { stop post-indexing if we did so in the loop, since in that case all
  2216. offsets definitely can be represented now }
  2217. if source.addressmode=AM_POSTINDEXED then
  2218. begin
  2219. source.addressmode:=AM_OFFSET;
  2220. source.offset:=0;
  2221. end;
  2222. if dest.addressmode=AM_POSTINDEXED then
  2223. begin
  2224. dest.addressmode:=AM_OFFSET;
  2225. dest.offset:=0;
  2226. end;
  2227. { transfer the leftovers }
  2228. if len>=8 then
  2229. begin
  2230. dec(len,8);
  2231. gencopy(list,source,dest,PF_NONE,OS_64);
  2232. end;
  2233. if len>=4 then
  2234. begin
  2235. dec(len,4);
  2236. gencopy(list,source,dest,PF_NONE,OS_32);
  2237. end;
  2238. if len>=2 then
  2239. begin
  2240. dec(len,2);
  2241. gencopy(list,source,dest,PF_H,OS_16);
  2242. end;
  2243. if len>=1 then
  2244. begin
  2245. dec(len);
  2246. gencopy(list,source,dest,PF_B,OS_8);
  2247. end;
  2248. end;
  2249. const
  2250. { load_length + loop dec + cbnz }
  2251. loopoverhead=12;
  2252. { loop overhead + load + store }
  2253. totallooplen=loopoverhead + 8;
  2254. var
  2255. totalalign: longint;
  2256. maxlenunrolled: tcgint;
  2257. loadop, storeop: tasmop;
  2258. opsize: tcgsize;
  2259. postfix: toppostfix;
  2260. tmpsource, tmpdest: treference;
  2261. scaledstoreop, unscaledstoreop,
  2262. scaledloadop, unscaledloadop: tasmop;
  2263. regs: array[1..8] of tregister;
  2264. countreg: tregister;
  2265. i, regcount: longint;
  2266. hl: tasmlabel;
  2267. simplifysource, simplifydest: boolean;
  2268. begin
  2269. if len=0 then
  2270. exit;
  2271. sourcebasereplaced:=false;
  2272. destbasereplaced:=false;
  2273. { maximum common alignment }
  2274. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  2275. { use a simple load/store? }
  2276. if (len in [1,2,4,8]) and
  2277. ((totalalign>=(len div 2)) or
  2278. (source.alignment=len) or
  2279. (dest.alignment=len)) then
  2280. begin
  2281. opsize:=int_cgsize(len);
  2282. a_load_ref_ref(list,opsize,opsize,source,dest);
  2283. exit;
  2284. end;
  2285. { alignment > length is not useful, and would break some checks below }
  2286. while totalalign>len do
  2287. totalalign:=totalalign div 2;
  2288. { operation sizes to use based on common alignment }
  2289. case totalalign of
  2290. 1:
  2291. begin
  2292. postfix:=PF_B;
  2293. opsize:=OS_8;
  2294. end;
  2295. 2:
  2296. begin
  2297. postfix:=PF_H;
  2298. opsize:=OS_16;
  2299. end;
  2300. 4:
  2301. begin
  2302. postfix:=PF_None;
  2303. opsize:=OS_32;
  2304. end
  2305. else
  2306. begin
  2307. totalalign:=8;
  2308. postfix:=PF_None;
  2309. opsize:=OS_64;
  2310. end;
  2311. end;
  2312. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  2313. maxlenunrolled:=min(totalalign,8)*4;
  2314. { ldp/stp -> 2 registers per instruction }
  2315. if (totalalign>=4) and
  2316. (len>=totalalign*2) then
  2317. begin
  2318. maxlenunrolled:=maxlenunrolled*2;
  2319. scaledstoreop:=A_STP;
  2320. scaledloadop:=A_LDP;
  2321. unscaledstoreop:=A_NONE;
  2322. unscaledloadop:=A_NONE;
  2323. end
  2324. else
  2325. begin
  2326. scaledstoreop:=A_STR;
  2327. scaledloadop:=A_LDR;
  2328. unscaledstoreop:=A_STUR;
  2329. unscaledloadop:=A_LDUR;
  2330. end;
  2331. { we only need 4 instructions extra to call FPC_MOVE }
  2332. if cs_opt_size in current_settings.optimizerswitches then
  2333. maxlenunrolled:=maxlenunrolled div 2;
  2334. if (len>maxlenunrolled) and
  2335. (len>totalalign*8) and
  2336. (pi_do_call in current_procinfo.flags) then
  2337. begin
  2338. g_concatcopy_move(list,source,dest,len);
  2339. exit;
  2340. end;
  2341. simplifysource:=true;
  2342. simplifydest:=true;
  2343. tmpsource:=source;
  2344. tmpdest:=dest;
  2345. { can we directly encode all offsets in an unrolled loop? }
  2346. if len<=maxlenunrolled then
  2347. begin
  2348. {$ifdef extdebug}
  2349. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  2350. {$endif extdebug}
  2351. { the leftovers will be handled separately -> -(len mod opsize) }
  2352. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  2353. { additionally, the last regular load/store will be at
  2354. offset+len-opsize (if len-(len mod opsize)>len) }
  2355. if tmpsource.offset>source.offset then
  2356. dec(tmpsource.offset,tcgsize2size[opsize]);
  2357. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  2358. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  2359. if tmpdest.offset>dest.offset then
  2360. dec(tmpdest.offset,tcgsize2size[opsize]);
  2361. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  2362. tmpsource:=source;
  2363. tmpdest:=dest;
  2364. { if we can't directly encode all offsets, simplify }
  2365. if simplifysource then
  2366. begin
  2367. loadop:=scaledloadop;
  2368. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  2369. end;
  2370. if simplifydest then
  2371. begin
  2372. storeop:=scaledstoreop;
  2373. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  2374. end;
  2375. regcount:=len div tcgsize2size[opsize];
  2376. { in case we transfer two registers at a time, we copy an even
  2377. number of registers }
  2378. if loadop=A_LDP then
  2379. regcount:=regcount and not(1);
  2380. { initialise for dfa }
  2381. regs[low(regs)]:=NR_NO;
  2382. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  2383. for i:=1 to regcount do
  2384. regs[i]:=getintregister(list,opsize);
  2385. if loadop=A_LDP then
  2386. begin
  2387. { load registers }
  2388. for i:=1 to (regcount div 2) do
  2389. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  2390. { store registers }
  2391. for i:=1 to (regcount div 2) do
  2392. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  2393. end
  2394. else
  2395. begin
  2396. for i:=1 to regcount do
  2397. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  2398. for i:=1 to regcount do
  2399. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  2400. end;
  2401. { leftover }
  2402. len:=len-regcount*tcgsize2size[opsize];
  2403. {$ifdef extdebug}
  2404. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2405. {$endif extdebug}
  2406. end
  2407. else
  2408. begin
  2409. {$ifdef extdebug}
  2410. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2411. {$endif extdebug}
  2412. { regular loop -> definitely use post-indexing }
  2413. loadop:=scaledloadop;
  2414. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2415. storeop:=scaledstoreop;
  2416. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2417. current_asmdata.getjumplabel(hl);
  2418. countreg:=getintregister(list,OS_32);
  2419. if loadop=A_LDP then
  2420. a_load_const_reg(list,OS_32,len div (tcgsize2size[opsize]*2),countreg)
  2421. else
  2422. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2423. a_label(list,hl);
  2424. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2425. if loadop=A_LDP then
  2426. begin
  2427. regs[1]:=getintregister(list,opsize);
  2428. regs[2]:=getintregister(list,opsize);
  2429. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2430. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2431. end
  2432. else
  2433. begin
  2434. regs[1]:=getintregister(list,opsize);
  2435. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2436. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2437. end;
  2438. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2439. if loadop=A_LDP then
  2440. len:=len mod (tcgsize2size[opsize]*2)
  2441. else
  2442. len:=len mod tcgsize2size[opsize];
  2443. end;
  2444. gencopyleftovers(list,tmpsource,tmpdest,len);
  2445. end;
  2446. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2447. begin
  2448. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2449. InternalError(2013020102);
  2450. end;
  2451. procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  2452. var
  2453. r, tmpreg: TRegister;
  2454. ai: taicpu;
  2455. l1,l2: TAsmLabel;
  2456. begin
  2457. { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
  2458. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  2459. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  2460. begin
  2461. r:=getintregister(list,OS_INT);
  2462. tmpreg:=getintregister(list,OS_INT);
  2463. list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
  2464. list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$1f));
  2465. current_asmdata.getjumplabel(l1);
  2466. current_asmdata.getjumplabel(l2);
  2467. ai:=taicpu.op_reg_sym_ofs(A_CBNZ,tmpreg,l1,0);
  2468. ai.is_jmp:=true;
  2469. list.concat(ai);
  2470. list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$80));
  2471. ai:=taicpu.op_reg_sym_ofs(A_CBZ,tmpreg,l2,0);
  2472. ai.is_jmp:=true;
  2473. list.concat(ai);
  2474. a_label(list,l1);
  2475. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2476. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  2477. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2478. a_label(list,l2);
  2479. if clear then
  2480. current_procinfo.FPUExceptionCheckNeeded:=false;
  2481. end;
  2482. end;
  2483. procedure tcgaarch64.g_profilecode(list : TAsmList);
  2484. begin
  2485. if target_info.system = system_aarch64_linux then
  2486. begin
  2487. list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
  2488. a_call_name(list,'_mcount',false);
  2489. end
  2490. else
  2491. internalerror(2020021901);
  2492. end;
  2493. procedure create_codegen;
  2494. begin
  2495. cg:=tcgaarch64.Create;
  2496. cg128:=tcg128.Create;
  2497. end;
  2498. end.