cgcpu.pas 91 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. This unit implements the code generator for Xtensa
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,parabase,
  22. cgbase,cgutils,cgobj,
  23. aasmbase,aasmtai,aasmdata,aasmcpu,
  24. cpubase,cpuinfo,
  25. node,symconst,SymType,symdef,
  26. rgcpu;
  27. type
  28. tcgcpu=class(tcg)
  29. {$ifdef dummy}
  30. protected
  31. { changes register size without adding register allocation info }
  32. function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
  33. public
  34. { simplifies "ref" so it can be used with "op". If "ref" can be used
  35. with a different load/Store operation that has the same meaning as the
  36. original one, "op" will be replaced with the alternative }
  37. procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  38. function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
  39. procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  40. procedure init_register_allocators;override;
  41. procedure done_register_allocators;override;
  42. function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
  43. function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  44. procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
  45. procedure a_call_reg(list:TAsmList;Reg:tregister);override;
  46. { General purpose instructions }
  47. procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  48. procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
  49. procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
  50. procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
  51. procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
  52. procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  53. procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
  54. { move instructions }
  55. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
  56. procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
  57. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
  58. procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
  59. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
  60. procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
  61. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
  62. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
  63. { fpu move instructions (not used, all floating point is vector unit-based) }
  64. procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
  65. procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
  66. procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
  67. procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
  68. procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
  69. procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
  70. procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
  71. procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
  72. procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
  73. procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
  74. { comparison operations }
  75. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
  76. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
  77. procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
  78. procedure a_jmp_name(list: TAsmList; const s: string);override;
  79. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
  80. procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
  81. procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
  82. procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
  83. procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
  84. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
  85. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
  86. procedure g_maybe_got_init(list: TAsmList); override;
  87. procedure g_restore_registers(list: TAsmList);override;
  88. procedure g_save_registers(list: TAsmList);override;
  89. procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
  90. procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
  91. procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
  92. procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
  93. procedure g_profilecode(list: TAsmList);override;
  94. private
  95. function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  96. procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  97. {$endif dummy}
  98. end;
  99. procedure create_codegen;
  100. {
  101. const
  102. TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
  103. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
  104. );
  105. TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
  106. A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
  107. );
  108. TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
  109. C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
  110. );
  111. }
  112. implementation
  113. uses
  114. globals,verbose,systems,cutils,
  115. paramgr,fmodule,
  116. symtable,symsym,
  117. tgobj,
  118. procinfo,cpupi,
  119. cg64f32;
  120. {$ifdef dummy}
  121. procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
  122. var
  123. href: treference;
  124. so: tshifterop;
  125. accesssize: longint;
  126. begin
  127. if (ref.base=NR_NO) then
  128. begin
  129. if ref.shiftmode<>SM_None then
  130. internalerror(2014110701);
  131. ref.base:=ref.index;
  132. ref.index:=NR_NO;
  133. end;
  134. { no abitrary scale factor support (the generic code doesn't set it,
  135. AArch-specific code shouldn't either) }
  136. if not(ref.scalefactor in [0,1]) then
  137. internalerror(2014111002);
  138. case simple_ref_type(op,size,oppostfix,ref) of
  139. sr_simple:
  140. exit;
  141. sr_internal_illegal:
  142. internalerror(2014121702);
  143. sr_complex:
  144. { continue } ;
  145. end;
  146. if assigned(ref.symbol) then
  147. begin
  148. { internal "load symbol" instructions should already be valid }
  149. if assigned(ref.symboldata) or
  150. (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
  151. internalerror(2014110802);
  152. { no relative symbol support (needed) yet }
  153. if assigned(ref.relsymbol) then
  154. internalerror(2014111001);
  155. { loading a symbol address (whether it's in the GOT or not) consists
  156. of two parts: first load the page on which it is located, then
  157. either the offset in the page or load the value at that offset in
  158. the page. This final GOT-load can be relaxed by the linker in case
  159. the variable itself can be stored directly in the GOT }
  160. if (preferred_newbasereg=NR_NO) or
  161. (ref.base=preferred_newbasereg) or
  162. (ref.index=preferred_newbasereg) then
  163. preferred_newbasereg:=getaddressregister(list);
  164. { load the (GOT) page }
  165. reference_reset_symbol(href,ref.symbol,0,8,[]);
  166. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  167. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  168. ((ref.symbol.typ=AT_DATA) and
  169. (ref.symbol.bind=AB_LOCAL)) then
  170. href.refaddr:=addr_page
  171. else
  172. href.refaddr:=addr_gotpage;
  173. list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
  174. { load the GOT entry (= address of the variable) }
  175. reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
  176. href.symbol:=ref.symbol;
  177. { code symbols defined in the current compilation unit do not
  178. have to be accessed via the GOT }
  179. if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
  180. (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
  181. ((ref.symbol.typ=AT_DATA) and
  182. (ref.symbol.bind=AB_LOCAL)) then
  183. begin
  184. href.base:=NR_NO;
  185. href.refaddr:=addr_pageoffset;
  186. list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
  187. end
  188. else
  189. begin
  190. href.refaddr:=addr_gotpageoffset;
  191. { use a_load_ref_reg() rather than directly encoding the LDR,
  192. so that we'll check the validity of the reference }
  193. a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
  194. end;
  195. { set as new base register }
  196. if ref.base=NR_NO then
  197. ref.base:=preferred_newbasereg
  198. else if ref.index=NR_NO then
  199. ref.index:=preferred_newbasereg
  200. else
  201. begin
  202. { make sure it's valid in case ref.base is SP -> make it
  203. the second operand}
  204. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
  205. ref.base:=preferred_newbasereg
  206. end;
  207. ref.symbol:=nil;
  208. end;
  209. { base & index }
  210. if (ref.base<>NR_NO) and
  211. (ref.index<>NR_NO) then
  212. begin
  213. case op of
  214. A_LDR, A_STR:
  215. begin
  216. if (ref.shiftmode=SM_None) and
  217. (ref.shiftimm<>0) then
  218. internalerror(2014110805);
  219. { wrong shift? (possible in case of something like
  220. array_of_2byte_rec[x].bytefield -> shift will be set 1, but
  221. the final load is a 1 byte -> can't use shift after all }
  222. if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
  223. ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
  224. (ref.offset<>0)) then
  225. begin
  226. if preferred_newbasereg=NR_NO then
  227. preferred_newbasereg:=getaddressregister(list);
  228. { "add" supports a superset of the shift modes supported by
  229. load/store instructions }
  230. shifterop_reset(so);
  231. so.shiftmode:=ref.shiftmode;
  232. so.shiftimm:=ref.shiftimm;
  233. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  234. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  235. { possibly still an invalid offset -> fall through }
  236. end
  237. else if ref.offset<>0 then
  238. begin
  239. if (preferred_newbasereg=NR_NO) or
  240. { we keep ref.index, so it must not be overwritten }
  241. (ref.index=preferred_newbasereg) then
  242. preferred_newbasereg:=getaddressregister(list);
  243. { add to the base and not to the index, because the index
  244. may be scaled; this works even if the base is SP }
  245. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  246. ref.offset:=0;
  247. ref.base:=preferred_newbasereg;
  248. { finished }
  249. exit;
  250. end
  251. else
  252. { valid -> exit }
  253. exit;
  254. end;
  255. { todo }
  256. A_LD1,A_LD2,A_LD3,A_LD4,
  257. A_ST1,A_ST2,A_ST3,A_ST4:
  258. internalerror(2014110704);
  259. { these don't support base+index }
  260. A_LDUR,A_STUR,
  261. A_LDP,A_STP:
  262. begin
  263. { these either don't support pre-/post-indexing, or don't
  264. support it with base+index }
  265. if ref.addressmode<>AM_OFFSET then
  266. internalerror(2014110911);
  267. if preferred_newbasereg=NR_NO then
  268. preferred_newbasereg:=getaddressregister(list);
  269. if ref.shiftmode<>SM_None then
  270. begin
  271. { "add" supports a superset of the shift modes supported by
  272. load/store instructions }
  273. shifterop_reset(so);
  274. so.shiftmode:=ref.shiftmode;
  275. so.shiftimm:=ref.shiftimm;
  276. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
  277. end
  278. else
  279. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
  280. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  281. { fall through to the handling of base + offset, since the
  282. offset may still be too big }
  283. end;
  284. else
  285. internalerror(2014110901);
  286. end;
  287. end;
  288. { base + offset }
  289. if ref.base<>NR_NO then
  290. begin
  291. { valid offset for LDUR/STUR -> use that }
  292. if (ref.addressmode=AM_OFFSET) and
  293. (op in [A_LDR,A_STR]) and
  294. (ref.offset>=-256) and
  295. (ref.offset<=255) then
  296. begin
  297. if op=A_LDR then
  298. op:=A_LDUR
  299. else
  300. op:=A_STUR
  301. end
  302. { if it's not a valid LDUR/STUR, use LDR/STR }
  303. else if (op in [A_LDUR,A_STUR]) and
  304. ((ref.offset<-256) or
  305. (ref.offset>255) or
  306. (ref.addressmode<>AM_OFFSET)) then
  307. begin
  308. if op=A_LDUR then
  309. op:=A_LDR
  310. else
  311. op:=A_STR
  312. end;
  313. case op of
  314. A_LDR,A_STR:
  315. begin
  316. case ref.addressmode of
  317. AM_PREINDEXED:
  318. begin
  319. { since the loaded/stored register cannot be the same
  320. as the base register, we can safely add the
  321. offset to the base if it doesn't fit}
  322. if (ref.offset<-256) or
  323. (ref.offset>255) then
  324. begin
  325. a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
  326. ref.offset:=0;
  327. end;
  328. end;
  329. AM_POSTINDEXED:
  330. begin
  331. { cannot emulate post-indexing if we have to fold the
  332. offset into the base register }
  333. if (ref.offset<-256) or
  334. (ref.offset>255) then
  335. internalerror(2014110909);
  336. { ok }
  337. end;
  338. AM_OFFSET:
  339. begin
  340. { unsupported offset -> fold into base register }
  341. accesssize:=1 shl tcgsizep2size[size];
  342. if (ref.offset<0) or
  343. (ref.offset>(((1 shl 12)-1)*accesssize)) or
  344. ((ref.offset mod accesssize)<>0) then
  345. begin
  346. if preferred_newbasereg=NR_NO then
  347. preferred_newbasereg:=getaddressregister(list);
  348. { can we split the offset beween an
  349. "add/sub (imm12 shl 12)" and the load (also an
  350. imm12)?
  351. -- the offset from the load will always be added,
  352. that's why the lower bound has a smaller range
  353. than the upper bound; it must also be a multiple
  354. of the access size }
  355. if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
  356. (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
  357. ((ref.offset mod accesssize)=0) then
  358. begin
  359. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
  360. ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
  361. end
  362. else
  363. begin
  364. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  365. ref.offset:=0;
  366. end;
  367. reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
  368. end;
  369. end
  370. end;
  371. end;
  372. A_LDP,A_STP:
  373. begin
  374. { unsupported offset -> fold into base register (these
  375. instructions support all addressmodes) }
  376. if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
  377. (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
  378. begin
  379. case ref.addressmode of
  380. AM_POSTINDEXED:
  381. { don't emulate post-indexing if we have to fold the
  382. offset into the base register }
  383. internalerror(2014110910);
  384. AM_PREINDEXED:
  385. { this means the offset must be added to the current
  386. base register }
  387. preferred_newbasereg:=ref.base;
  388. AM_OFFSET:
  389. if preferred_newbasereg=NR_NO then
  390. preferred_newbasereg:=getaddressregister(list);
  391. end;
  392. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
  393. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
  394. end
  395. end;
  396. A_LDUR,A_STUR:
  397. begin
  398. { valid, checked above }
  399. end;
  400. { todo }
  401. A_LD1,A_LD2,A_LD3,A_LD4,
  402. A_ST1,A_ST2,A_ST3,A_ST4:
  403. internalerror(2014110908);
  404. else
  405. internalerror(2014110708);
  406. end;
  407. { done }
  408. exit;
  409. end;
  410. { only an offset -> change to base (+ offset 0) }
  411. if preferred_newbasereg=NR_NO then
  412. preferred_newbasereg:=getaddressregister(list);
  413. a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
  414. reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
  415. end;
  416. function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
  417. var
  418. subreg:Tsubregister;
  419. begin
  420. subreg:=cgsize2subreg(getregtype(reg),size);
  421. result:=reg;
  422. setsubreg(result,subreg);
  423. end;
  424. function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
  425. begin
  426. internalerror(2014122110);
  427. { squash warning }
  428. result:=NR_NO;
  429. end;
  430. function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
  431. begin
  432. make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
  433. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
  434. result:=ref;
  435. end;
  436. procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
  437. var
  438. instr: taicpu;
  439. so: tshifterop;
  440. hadtmpreg: boolean;
  441. begin
  442. { imm12 }
  443. if (a>=0) and
  444. (a<=((1 shl 12)-1)) then
  445. if usedest then
  446. instr:=taicpu.op_reg_reg_const(op,dst,src,a)
  447. else
  448. instr:=taicpu.op_reg_const(op,src,a)
  449. { imm12 lsl 12 }
  450. else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
  451. begin
  452. so.shiftmode:=SM_LSL;
  453. so.shiftimm:=12;
  454. if usedest then
  455. instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
  456. else
  457. instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
  458. end
  459. else
  460. begin
  461. { todo: other possible optimizations (e.g. load 16 bit constant in
  462. register and then add/sub/cmp/cmn shifted the rest) }
  463. if tmpreg=NR_NO then
  464. begin
  465. hadtmpreg:=false;
  466. tmpreg:=getintregister(list,size);
  467. end
  468. else
  469. begin
  470. hadtmpreg:=true;
  471. getcpuregister(list,tmpreg);
  472. end;
  473. a_load_const_reg(list,size,a,tmpreg);
  474. if usedest then
  475. instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
  476. else
  477. instr:=taicpu.op_reg_reg(op,src,tmpreg);
  478. if hadtmpreg then
  479. ungetcpuregister(list,tmpreg);
  480. end;
  481. if setflags then
  482. setoppostfix(instr,PF_S);
  483. list.concat(instr);
  484. end;
  485. {****************************************************************************
  486. Assembler code
  487. ****************************************************************************}
  488. procedure tcgaarch64.init_register_allocators;
  489. begin
  490. inherited init_register_allocators;
  491. rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
  492. [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
  493. RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
  494. RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
  495. { maybe we can enable this in the future for leaf functions (it's
  496. the frame pointer)
  497. ,RS_X29 }],
  498. first_int_imreg,[]);
  499. rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
  500. [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
  501. RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
  502. RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
  503. RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
  504. first_mm_imreg,[]);
  505. end;
  506. procedure tcgaarch64.done_register_allocators;
  507. begin
  508. rg[R_INTREGISTER].free;
  509. rg[R_FPUREGISTER].free;
  510. rg[R_MMREGISTER].free;
  511. inherited done_register_allocators;
  512. end;
  513. function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
  514. begin
  515. case size of
  516. OS_F32:
  517. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
  518. OS_F64:
  519. result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
  520. else
  521. internalerror(2014102701);
  522. end;
  523. end;
  524. procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
  525. begin
  526. if not weak then
  527. list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
  528. else
  529. list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
  530. end;
  531. procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
  532. begin
  533. list.concat(taicpu.op_reg(A_BLR,reg));
  534. end;
  535. {********************** load instructions ********************}
  536. procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
  537. var
  538. preva: tcgint;
  539. opc: tasmop;
  540. shift,maxshift: byte;
  541. so: tshifterop;
  542. reginited: boolean;
  543. mask: tcgint;
  544. begin
  545. { if we load a value into a 32 bit register, it is automatically
  546. zero-extended to 64 bit }
  547. if (hi(a)=0) and
  548. (size in [OS_64,OS_S64]) then
  549. begin
  550. size:=OS_32;
  551. reg:=makeregsize(reg,size);
  552. end;
  553. { values <= 32 bit are stored in a 32 bit register }
  554. if not(size in [OS_64,OS_S64]) then
  555. a:=cardinal(a);
  556. if size in [OS_64,OS_S64] then
  557. begin
  558. mask:=-1;
  559. maxshift:=64;
  560. end
  561. else
  562. begin
  563. mask:=$ffffffff;
  564. maxshift:=32;
  565. end;
  566. { single movn enough? (to be extended) }
  567. shift:=16;
  568. preva:=a;
  569. repeat
  570. if (a shr shift)=(mask shr shift) then
  571. begin
  572. if shift=16 then
  573. list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
  574. else
  575. begin
  576. shifterop_reset(so);
  577. so.shiftmode:=SM_LSL;
  578. so.shiftimm:=shift-16;
  579. list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
  580. end;
  581. exit;
  582. end;
  583. { only try the next 16 bits if the current one is all 1 bits, since
  584. the movn will set all lower bits to 1 }
  585. if word(a shr (shift-16))<>$ffff then
  586. break;
  587. inc(shift,16);
  588. until shift=maxshift;
  589. reginited:=false;
  590. shift:=0;
  591. { can be optimized later to use more movn }
  592. repeat
  593. { leftover is shifterconst? (don't check if we can represent it just
  594. as effectively with movz/movk, as this check is expensive) }
  595. if ((shift<tcgsize2size[size]*(8 div 2)) and
  596. (word(a)<>0) and
  597. ((a shr 16)<>0)) and
  598. is_shifter_const(a shl shift,size) then
  599. begin
  600. if reginited then
  601. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
  602. else
  603. list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
  604. exit;
  605. end;
  606. { set all 16 bit parts <> 0 }
  607. if (word(a)<>0) or
  608. ((shift=0) and
  609. (a=0)) then
  610. if shift=0 then
  611. begin
  612. list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
  613. reginited:=true;
  614. end
  615. else
  616. begin
  617. shifterop_reset(so);
  618. so.shiftmode:=SM_LSL;
  619. so.shiftimm:=shift;
  620. if not reginited then
  621. begin
  622. opc:=A_MOVZ;
  623. reginited:=true;
  624. end
  625. else
  626. opc:=A_MOVK;
  627. list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
  628. end;
  629. preva:=a;
  630. a:=a shr 16;
  631. inc(shift,16);
  632. until word(preva)=preva;
  633. if not reginited then
  634. internalerror(2014102702);
  635. end;
  636. procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
  637. var
  638. reg: tregister;
  639. begin
  640. { use the zero register if possible }
  641. if a=0 then
  642. begin
  643. if size in [OS_64,OS_S64] then
  644. reg:=NR_XZR
  645. else
  646. reg:=NR_WZR;
  647. a_load_reg_ref(list,size,size,reg,ref);
  648. end
  649. else
  650. inherited;
  651. end;
  652. procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  653. var
  654. oppostfix:toppostfix;
  655. hreg: tregister;
  656. begin
  657. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  658. begin
  659. fromsize:=tosize;
  660. reg:=makeregsize(list,reg,fromsize);
  661. end
  662. { have a 32 bit register but need a 64 bit one? }
  663. else if tosize in [OS_64,OS_S64] then
  664. begin
  665. { sign extend if necessary }
  666. if fromsize in [OS_S8,OS_S16,OS_S32] then
  667. begin
  668. { can't overwrite reg, may be a constant reg }
  669. hreg:=getintregister(list,tosize);
  670. a_load_reg_reg(list,fromsize,tosize,reg,hreg);
  671. reg:=hreg;
  672. end
  673. else
  674. { top 32 bit are zero by default }
  675. reg:=makeregsize(reg,OS_64);
  676. fromsize:=tosize;
  677. end;
  678. if (ref.alignment<>0) and
  679. (ref.alignment<tcgsize2size[tosize]) then
  680. begin
  681. a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
  682. end
  683. else
  684. begin
  685. case tosize of
  686. { signed integer registers }
  687. OS_8,
  688. OS_S8:
  689. oppostfix:=PF_B;
  690. OS_16,
  691. OS_S16:
  692. oppostfix:=PF_H;
  693. OS_32,
  694. OS_S32,
  695. OS_64,
  696. OS_S64:
  697. oppostfix:=PF_None;
  698. else
  699. InternalError(200308299);
  700. end;
  701. handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
  702. end;
  703. end;
  704. procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  705. var
  706. oppostfix:toppostfix;
  707. begin
  708. if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
  709. fromsize:=tosize;
  710. { ensure that all bits of the 32/64 register are always correctly set:
  711. * default behaviour is always to zero-extend to the entire (64 bit)
  712. register -> unsigned 8/16/32 bit loads only exist with a 32 bit
  713. target register, as the upper 32 bit will be zeroed implicitly
  714. -> always make target register 32 bit
  715. * signed loads exist both with 32 and 64 bit target registers,
  716. depending on whether the value should be sign extended to 32 or
  717. to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
  718. corresponding 64 bit register are again zeroed) -> no need to
  719. change anything (we only have 32 and 64 bit registers), except that
  720. when loading an OS_S32 to a 32 bit register, we don't need/can't
  721. use sign extension
  722. }
  723. if fromsize in [OS_8,OS_16,OS_32] then
  724. reg:=makeregsize(reg,OS_32);
  725. if (ref.alignment<>0) and
  726. (ref.alignment<tcgsize2size[fromsize]) then
  727. begin
  728. a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
  729. exit;
  730. end;
  731. case fromsize of
  732. { signed integer registers }
  733. OS_8:
  734. oppostfix:=PF_B;
  735. OS_S8:
  736. oppostfix:=PF_SB;
  737. OS_16:
  738. oppostfix:=PF_H;
  739. OS_S16:
  740. oppostfix:=PF_SH;
  741. OS_S32:
  742. if getsubreg(reg)=R_SUBD then
  743. oppostfix:=PF_NONE
  744. else
  745. oppostfix:=PF_SW;
  746. OS_32,
  747. OS_64,
  748. OS_S64:
  749. oppostfix:=PF_None;
  750. else
  751. InternalError(200308297);
  752. end;
  753. handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
  754. { clear upper 16 bits if the value was negative }
  755. if (fromsize=OS_S8) and (tosize=OS_16) then
  756. a_load_reg_reg(list,fromsize,tosize,reg,reg);
  757. end;
  758. procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
  759. var
  760. href: treference;
  761. hreg1, hreg2, tmpreg,tmpreg2: tregister;
  762. i : Integer;
  763. begin
  764. case fromsize of
  765. OS_64,OS_S64:
  766. begin
  767. { split into two 32 bit loads }
  768. hreg1:=getintregister(list,OS_32);
  769. hreg2:=getintregister(list,OS_32);
  770. if target_info.endian=endian_big then
  771. begin
  772. tmpreg:=hreg1;
  773. hreg1:=hreg2;
  774. hreg2:=tmpreg;
  775. end;
  776. { can we use LDP? }
  777. if (ref.alignment=4) and
  778. (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
  779. list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
  780. else
  781. begin
  782. a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
  783. href:=ref;
  784. inc(href.offset,4);
  785. a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
  786. end;
  787. a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
  788. list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
  789. end;
  790. OS_16,OS_S16,
  791. OS_32,OS_S32:
  792. begin
  793. if ref.alignment=2 then
  794. begin
  795. href:=ref;
  796. if target_info.endian=endian_big then
  797. inc(href.offset,tcgsize2size[fromsize]-2);
  798. tmpreg:=getintregister(list,OS_32);
  799. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
  800. tmpreg2:=getintregister(list,OS_32);
  801. for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
  802. begin
  803. if target_info.endian=endian_big then
  804. dec(href.offset,2)
  805. else
  806. inc(href.offset,2);
  807. a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
  808. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
  809. end;
  810. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  811. end
  812. else
  813. begin
  814. href:=ref;
  815. if target_info.endian=endian_big then
  816. inc(href.offset,tcgsize2size[fromsize]-1);
  817. tmpreg:=getintregister(list,OS_32);
  818. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
  819. tmpreg2:=getintregister(list,OS_32);
  820. for i:=1 to tcgsize2size[fromsize]-1 do
  821. begin
  822. if target_info.endian=endian_big then
  823. dec(href.offset)
  824. else
  825. inc(href.offset);
  826. a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
  827. list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
  828. end;
  829. a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
  830. end;
  831. end;
  832. else
  833. inherited;
  834. end;
  835. end;
  836. procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
  837. var
  838. instr: taicpu;
  839. begin
  840. { we use both 32 and 64 bit registers -> insert conversion when when
  841. we have to truncate/sign extend inside the (32 or 64 bit) register
  842. holding the value, and when we sign extend from a 32 to a 64 bit
  843. register }
  844. if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
  845. ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
  846. (fromsize<>tosize) and
  847. not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
  848. ((fromsize in [OS_S8,OS_S16,OS_S32]) and
  849. (tosize in [OS_64,OS_S64])) or
  850. { needs to mask out the sign in the top 16 bits }
  851. ((fromsize=OS_S8) and
  852. (tosize=OS_16)) then
  853. begin
  854. case tosize of
  855. OS_8:
  856. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  857. OS_16:
  858. list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  859. OS_S8:
  860. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
  861. OS_S16:
  862. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
  863. { while "mov wN, wM" automatically inserts a zero-extension and
  864. hence we could encode a 64->32 bit move like that, the problem
  865. is that we then can't distinguish 64->32 from 32->32 moves, and
  866. the 64->32 truncation could be removed altogether... So use a
  867. different instruction }
  868. OS_32,
  869. OS_S32:
  870. { in theory, reg1 should be 64 bit here (since fromsize>tosize),
  871. but because of the way location_force_register() tries to
  872. avoid superfluous zero/sign extensions, it's not always the
  873. case -> also force reg1 to to 64 bit }
  874. list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
  875. OS_64,
  876. OS_S64:
  877. list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
  878. else
  879. internalerror(2002090901);
  880. end;
  881. end
  882. else
  883. begin
  884. { 32 -> 32 bit move implies zero extension (sign extensions have
  885. been handled above) -> also use for 32 <-> 64 bit moves }
  886. if not(fromsize in [OS_64,OS_S64]) or
  887. not(tosize in [OS_64,OS_S64]) then
  888. instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
  889. else
  890. instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
  891. list.Concat(instr);
  892. { Notify the register allocator that we have written a move instruction so
  893. it can try to eliminate it. }
  894. add_move_instruction(instr);
  895. end;
  896. end;
  897. procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
  898. var
  899. href: treference;
  900. so: tshifterop;
  901. op: tasmop;
  902. begin
  903. op:=A_LDR;
  904. href:=ref;
  905. { simplify as if we're going to perform a regular 64 bit load, using
  906. "r" as the new base register if possible/necessary }
  907. make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
  908. { load literal? }
  909. if assigned(href.symbol) then
  910. begin
  911. if (href.base<>NR_NO) or
  912. (href.index<>NR_NO) or
  913. not assigned(href.symboldata) then
  914. internalerror(2014110912);
  915. list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
  916. end
  917. else
  918. begin
  919. if href.index<>NR_NO then
  920. begin
  921. if href.shiftmode<>SM_None then
  922. begin
  923. { "add" supports a supperset of the shift modes supported by
  924. load/store instructions }
  925. shifterop_reset(so);
  926. so.shiftmode:=href.shiftmode;
  927. so.shiftimm:=href.shiftimm;
  928. list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
  929. end
  930. else
  931. a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
  932. end
  933. else if href.offset<>0 then
  934. a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
  935. else
  936. a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
  937. end;
  938. end;
  939. procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
  940. begin
  941. internalerror(2014122107)
  942. end;
  943. procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
  944. begin
  945. internalerror(2014122108)
  946. end;
  947. procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
  948. begin
  949. internalerror(2014122109)
  950. end;
  951. procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
  952. var
  953. instr: taicpu;
  954. begin
  955. if assigned(shuffle) and
  956. not shufflescalar(shuffle) then
  957. internalerror(2014122104);
  958. if fromsize=tosize then
  959. begin
  960. instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
  961. { Notify the register allocator that we have written a move
  962. instruction so it can try to eliminate it. }
  963. add_move_instruction(instr);
  964. { FMOV cannot generate a floating point exception }
  965. end
  966. else
  967. begin
  968. if (reg_cgsize(reg1)<>fromsize) or
  969. (reg_cgsize(reg2)<>tosize) then
  970. internalerror(2014110913);
  971. instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
  972. maybe_check_for_fpu_exception(list);
  973. end;
  974. list.Concat(instr);
  975. end;
  976. procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
  977. var
  978. tmpreg: tregister;
  979. begin
  980. if assigned(shuffle) and
  981. not shufflescalar(shuffle) then
  982. internalerror(2014122105);
  983. tmpreg:=NR_NO;
  984. if (fromsize<>tosize) then
  985. begin
  986. tmpreg:=reg;
  987. reg:=getmmregister(list,fromsize);
  988. end;
  989. handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
  990. if (fromsize<>tosize) then
  991. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  992. end;
  993. procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
  994. var
  995. tmpreg: tregister;
  996. begin
  997. if assigned(shuffle) and
  998. not shufflescalar(shuffle) then
  999. internalerror(2014122106);
  1000. if (fromsize<>tosize) then
  1001. begin
  1002. tmpreg:=getmmregister(list,tosize);
  1003. a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
  1004. reg:=tmpreg;
  1005. end;
  1006. handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
  1007. end;
  1008. procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
  1009. begin
  1010. if not shufflescalar(shuffle) then
  1011. internalerror(2014122801);
  1012. if not(tcgsize2size[fromsize] in [4,8]) or
  1013. (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
  1014. internalerror(2014122803);
  1015. list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
  1016. end;
  1017. procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
  1018. var
  1019. r : tregister;
  1020. begin
  1021. if not shufflescalar(shuffle) then
  1022. internalerror(2014122802);
  1023. if not(tcgsize2size[fromsize] in [4,8]) or
  1024. (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
  1025. internalerror(2014122804);
  1026. if tcgsize2size[fromsize]<tcgsize2size[tosize] then
  1027. r:=makeregsize(intreg,fromsize)
  1028. else
  1029. r:=intreg;
  1030. list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
  1031. end;
  1032. procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
  1033. begin
  1034. case op of
  1035. { "xor Vx,Vx" is used to initialize global regvars to 0 }
  1036. OP_XOR:
  1037. begin
  1038. if (src<>dst) or
  1039. (reg_cgsize(src)<>size) or
  1040. assigned(shuffle) then
  1041. internalerror(2015011401);
  1042. case size of
  1043. OS_F32,
  1044. OS_F64:
  1045. list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
  1046. else
  1047. internalerror(2015011402);
  1048. end;
  1049. end
  1050. else
  1051. internalerror(2015011403);
  1052. end;
  1053. end;
  1054. procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
  1055. var
  1056. bitsize: longint;
  1057. begin
  1058. if srcsize in [OS_64,OS_S64] then
  1059. begin
  1060. bitsize:=64;
  1061. end
  1062. else
  1063. begin
  1064. bitsize:=32;
  1065. end;
  1066. { source is 0 -> dst will have to become 255 }
  1067. list.concat(taicpu.op_reg_const(A_CMP,src,0));
  1068. if reverse then
  1069. begin
  1070. list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
  1071. { xor 31/63 is the same as setting the lower 5/6 bits to
  1072. "31/63-(lower 5/6 bits of dst)" }
  1073. list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
  1074. end
  1075. else
  1076. begin
  1077. list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
  1078. list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
  1079. end;
  1080. { set dst to -1 if src was 0 }
  1081. list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
  1082. { mask the -1 to 255 if src was 0 (anyone find a two-instruction
  1083. branch-free version? All of mine are 3...) }
  1084. list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
  1085. end;
  1086. procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
  1087. var
  1088. href: treference;
  1089. hreg1, hreg2, tmpreg: tregister;
  1090. begin
  1091. if fromsize in [OS_64,OS_S64] then
  1092. begin
  1093. { split into two 32 bit stores }
  1094. hreg1:=getintregister(list,OS_32);
  1095. hreg2:=getintregister(list,OS_32);
  1096. a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
  1097. a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
  1098. if target_info.endian=endian_big then
  1099. begin
  1100. tmpreg:=hreg1;
  1101. hreg1:=hreg2;
  1102. hreg2:=tmpreg;
  1103. end;
  1104. { can we use STP? }
  1105. if (ref.alignment=4) and
  1106. (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
  1107. list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
  1108. else
  1109. begin
  1110. a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
  1111. href:=ref;
  1112. inc(href.offset,4);
  1113. a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
  1114. end;
  1115. end
  1116. else
  1117. inherited;
  1118. end;
  1119. procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
  1120. const
  1121. overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1122. begin
  1123. if (op in overflowops) and
  1124. (size in [OS_8,OS_S8,OS_16,OS_S16]) then
  1125. a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
  1126. end;
  1127. procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
  1128. begin
  1129. optimize_op_const(size,op,a);
  1130. case op of
  1131. OP_NONE:
  1132. exit;
  1133. OP_MOVE:
  1134. a_load_const_reg(list,size,a,reg);
  1135. OP_NEG,OP_NOT:
  1136. internalerror(200306011);
  1137. else
  1138. a_op_const_reg_reg(list,op,size,a,reg,reg);
  1139. end;
  1140. end;
  1141. procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
  1142. begin
  1143. Case op of
  1144. OP_NEG,
  1145. OP_NOT:
  1146. begin
  1147. list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
  1148. maybeadjustresult(list,op,size,dst);
  1149. end
  1150. else
  1151. a_op_reg_reg_reg(list,op,size,src,dst,dst);
  1152. end;
  1153. end;
  1154. procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
  1155. var
  1156. l: tlocation;
  1157. begin
  1158. a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
  1159. end;
  1160. procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
  1161. var
  1162. hreg: tregister;
  1163. begin
  1164. { no ROLV opcode... }
  1165. if op=OP_ROL then
  1166. begin
  1167. case size of
  1168. OS_32,OS_S32,
  1169. OS_64,OS_S64:
  1170. begin
  1171. hreg:=getintregister(list,size);
  1172. a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
  1173. a_op_reg_reg(list,OP_SUB,size,src1,hreg);
  1174. a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
  1175. exit;
  1176. end;
  1177. else
  1178. internalerror(2014111005);
  1179. end;
  1180. end
  1181. else if (op=OP_ROR) and
  1182. not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
  1183. internalerror(2014111006);
  1184. if TOpCG2AsmOpReg[op]=A_NONE then
  1185. internalerror(2014111007);
  1186. list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
  1187. maybeadjustresult(list,op,size,dst);
  1188. end;
  1189. procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1190. var
  1191. shiftcountmask: longint;
  1192. constreg: tregister;
  1193. begin
  1194. { add/sub instructions have only positive immediate operands }
  1195. if (op in [OP_ADD,OP_SUB]) and
  1196. (a<0) then
  1197. begin
  1198. if op=OP_ADD then
  1199. op:=op_SUB
  1200. else
  1201. op:=OP_ADD;
  1202. { avoid range/overflow error in case a = low(tcgint) }
  1203. {$push}{$r-}{$q-}
  1204. a:=-a;
  1205. {$pop}
  1206. end;
  1207. ovloc.loc:=LOC_VOID;
  1208. optimize_op_const(size,op,a);
  1209. case op of
  1210. OP_NONE:
  1211. begin
  1212. a_load_reg_reg(list,size,size,src,dst);
  1213. exit;
  1214. end;
  1215. OP_MOVE:
  1216. begin
  1217. a_load_const_reg(list,size,a,dst);
  1218. exit;
  1219. end;
  1220. else
  1221. ;
  1222. end;
  1223. case op of
  1224. OP_ADD,
  1225. OP_SUB:
  1226. begin
  1227. handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
  1228. { on a 64 bit target, overflows with smaller data types
  1229. are handled via range errors }
  1230. if setflags and
  1231. (size in [OS_64,OS_S64]) then
  1232. begin
  1233. location_reset(ovloc,LOC_FLAGS,OS_8);
  1234. if size=OS_64 then
  1235. if op=OP_ADD then
  1236. ovloc.resflags:=F_CS
  1237. else
  1238. ovloc.resflags:=F_CC
  1239. else
  1240. ovloc.resflags:=F_VS;
  1241. end;
  1242. end;
  1243. OP_OR,
  1244. OP_AND,
  1245. OP_XOR:
  1246. begin
  1247. if not(size in [OS_64,OS_S64]) then
  1248. a:=cardinal(a);
  1249. if is_shifter_const(a,size) then
  1250. list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
  1251. else
  1252. begin
  1253. constreg:=getintregister(list,size);
  1254. a_load_const_reg(list,size,a,constreg);
  1255. a_op_reg_reg_reg(list,op,size,constreg,src,dst);
  1256. end;
  1257. end;
  1258. OP_SHL,
  1259. OP_SHR,
  1260. OP_SAR:
  1261. begin
  1262. if size in [OS_64,OS_S64] then
  1263. shiftcountmask:=63
  1264. else
  1265. shiftcountmask:=31;
  1266. if (a and shiftcountmask)<>0 Then
  1267. list.concat(taicpu.op_reg_reg_const(
  1268. TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
  1269. else
  1270. a_load_reg_reg(list,size,size,src,dst);
  1271. if (a and not(tcgint(shiftcountmask)))<>0 then
  1272. internalError(2014112101);
  1273. end;
  1274. OP_ROL,
  1275. OP_ROR:
  1276. begin
  1277. case size of
  1278. OS_32,OS_S32:
  1279. if (a and not(tcgint(31)))<>0 then
  1280. internalError(2014112102);
  1281. OS_64,OS_S64:
  1282. if (a and not(tcgint(63)))<>0 then
  1283. internalError(2014112103);
  1284. else
  1285. internalError(2014112104);
  1286. end;
  1287. { there's only a ror opcode }
  1288. if op=OP_ROL then
  1289. a:=(tcgsize2size[size]*8)-a;
  1290. list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
  1291. end;
  1292. OP_MUL,
  1293. OP_IMUL,
  1294. OP_DIV,
  1295. OP_IDIV:
  1296. begin
  1297. constreg:=getintregister(list,size);
  1298. a_load_const_reg(list,size,a,constreg);
  1299. a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
  1300. end;
  1301. else
  1302. internalerror(2014111403);
  1303. end;
  1304. maybeadjustresult(list,op,size,dst);
  1305. end;
  1306. procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
  1307. var
  1308. tmpreg1, tmpreg2: tregister;
  1309. begin
  1310. ovloc.loc:=LOC_VOID;
  1311. { overflow can only occur with 64 bit calculations on 64 bit cpus }
  1312. if setflags and
  1313. (size in [OS_64,OS_S64]) then
  1314. begin
  1315. case op of
  1316. OP_ADD,
  1317. OP_SUB:
  1318. begin
  1319. list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
  1320. ovloc.loc:=LOC_FLAGS;
  1321. if size=OS_64 then
  1322. if op=OP_ADD then
  1323. ovloc.resflags:=F_CS
  1324. else
  1325. ovloc.resflags:=F_CC
  1326. else
  1327. ovloc.resflags:=F_VS;
  1328. { finished }
  1329. exit;
  1330. end;
  1331. OP_MUL:
  1332. begin
  1333. { check whether the upper 64 bit of the 128 bit product is 0 }
  1334. tmpreg1:=getintregister(list,OS_64);
  1335. list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
  1336. list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
  1337. ovloc.loc:=LOC_FLAGS;
  1338. ovloc.resflags:=F_NE;
  1339. { still have to perform the actual multiplication }
  1340. end;
  1341. OP_IMUL:
  1342. begin
  1343. { check whether the upper 64 bits of the 128 bit multiplication
  1344. result have the same value as the replicated sign bit of the
  1345. lower 64 bits }
  1346. tmpreg1:=getintregister(list,OS_64);
  1347. list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
  1348. { calculate lower 64 bits (afterwards, because dst may be
  1349. equal to src1 or src2) }
  1350. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1351. { replicate sign bit }
  1352. tmpreg2:=getintregister(list,OS_64);
  1353. a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
  1354. list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
  1355. ovloc.loc:=LOC_FLAGS;
  1356. ovloc.resflags:=F_NE;
  1357. { finished }
  1358. exit;
  1359. end;
  1360. OP_IDIV,
  1361. OP_DIV:
  1362. begin
  1363. { not handled here, needs div-by-zero check (dividing by zero
  1364. just gives a 0 result on aarch64), and low(int64) div -1
  1365. check for overflow) }
  1366. internalerror(2014122101);
  1367. end;
  1368. else
  1369. internalerror(2019050936);
  1370. end;
  1371. end;
  1372. a_op_reg_reg_reg(list,op,size,src1,src2,dst);
  1373. end;
  1374. {*************** compare instructructions ****************}
  1375. procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
  1376. var
  1377. op: tasmop;
  1378. begin
  1379. if a>=0 then
  1380. op:=A_CMP
  1381. else
  1382. op:=A_CMN;
  1383. { avoid range/overflow error in case a=low(tcgint) }
  1384. {$push}{$r-}{$q-}
  1385. handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
  1386. {$pop}
  1387. a_jmp_cond(list,cmp_op,l);
  1388. end;
  1389. procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
  1390. begin
  1391. list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
  1392. a_jmp_cond(list,cmp_op,l);
  1393. end;
  1394. procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
  1395. var
  1396. ai: taicpu;
  1397. begin
  1398. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
  1399. ai.is_jmp:=true;
  1400. list.Concat(ai);
  1401. end;
  1402. procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
  1403. var
  1404. ai: taicpu;
  1405. begin
  1406. ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
  1407. ai.is_jmp:=true;
  1408. list.Concat(ai);
  1409. end;
  1410. procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
  1411. var
  1412. ai: taicpu;
  1413. begin
  1414. ai:=TAiCpu.op_sym(A_B,l);
  1415. ai.is_jmp:=true;
  1416. ai.SetCondition(TOpCmp2AsmCond[cond]);
  1417. list.Concat(ai);
  1418. end;
  1419. procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
  1420. var
  1421. ai : taicpu;
  1422. begin
  1423. ai:=Taicpu.op_sym(A_B,l);
  1424. ai.is_jmp:=true;
  1425. ai.SetCondition(flags_to_cond(f));
  1426. list.Concat(ai);
  1427. end;
  1428. procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
  1429. begin
  1430. list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
  1431. end;
  1432. procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
  1433. begin
  1434. { we need an explicit overflow location, because there are many
  1435. possibilities (not just the overflow flag, which is only used for
  1436. signed add/sub) }
  1437. internalerror(2014112303);
  1438. end;
  1439. procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
  1440. var
  1441. hl : tasmlabel;
  1442. hflags : tresflags;
  1443. begin
  1444. if not(cs_check_overflow in current_settings.localswitches) then
  1445. exit;
  1446. current_asmdata.getjumplabel(hl);
  1447. case ovloc.loc of
  1448. LOC_FLAGS:
  1449. begin
  1450. hflags:=ovloc.resflags;
  1451. inverse_flags(hflags);
  1452. cg.a_jmp_flags(list,hflags,hl);
  1453. end;
  1454. else
  1455. internalerror(2014112304);
  1456. end;
  1457. a_call_name(list,'FPC_OVERFLOW',false);
  1458. a_label(list,hl);
  1459. end;
  1460. { *********** entry/exit code and address loading ************ }
  1461. function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
  1462. var
  1463. ref: treference;
  1464. sr: tsuperregister;
  1465. pairreg: tregister;
  1466. begin
  1467. result:=0;
  1468. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1469. ref.addressmode:=AM_PREINDEXED;
  1470. pairreg:=NR_NO;
  1471. { store all used registers pairwise }
  1472. for sr:=lowsr to highsr do
  1473. if sr in rg[rt].used_in_proc then
  1474. if pairreg=NR_NO then
  1475. pairreg:=newreg(rt,sr,sub)
  1476. else
  1477. begin
  1478. inc(result,16);
  1479. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
  1480. pairreg:=NR_NO
  1481. end;
  1482. { one left -> store twice (stack must be 16 bytes aligned) }
  1483. if pairreg<>NR_NO then
  1484. begin
  1485. list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
  1486. inc(result,16);
  1487. end;
  1488. end;
  1489. procedure FixupOffsets(p:TObject;arg:pointer);
  1490. var
  1491. sym: tabstractnormalvarsym absolute p;
  1492. begin
  1493. if (tsym(p).typ in [paravarsym,localvarsym]) and
  1494. (sym.localloc.loc=LOC_REFERENCE) and
  1495. (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
  1496. begin
  1497. sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
  1498. dec(sym.localloc.reference.offset,PLongint(arg)^);
  1499. end;
  1500. end;
  1501. procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
  1502. var
  1503. ref: treference;
  1504. totalstackframesize: longint;
  1505. begin
  1506. if nostackframe then
  1507. exit;
  1508. { stack pointer has to be aligned to 16 bytes at all times }
  1509. localsize:=align(localsize,16);
  1510. { save stack pointer and return address }
  1511. reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
  1512. ref.addressmode:=AM_PREINDEXED;
  1513. list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
  1514. { initialise frame pointer }
  1515. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
  1516. totalstackframesize:=localsize;
  1517. { save modified integer registers }
  1518. inc(totalstackframesize,
  1519. save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
  1520. { only the lower 64 bits of the modified vector registers need to be
  1521. saved; if the caller needs the upper 64 bits, it has to save them
  1522. itself }
  1523. inc(totalstackframesize,
  1524. save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
  1525. { allocate stack space }
  1526. if localsize<>0 then
  1527. begin
  1528. localsize:=align(localsize,16);
  1529. current_procinfo.final_localsize:=localsize;
  1530. handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
  1531. end;
  1532. { By default, we use the frame pointer to access parameters passed via
  1533. the stack and the stack pointer to address local variables and temps
  1534. because
  1535. a) we can use bigger positive than negative offsets (so accessing
  1536. locals via negative offsets from the frame pointer would be less
  1537. efficient)
  1538. b) we don't know the local size while generating the code, so
  1539. accessing the parameters via the stack pointer is not possible
  1540. without copying them
  1541. The problem with this is the get_frame() intrinsic:
  1542. a) it must return the same value as what we pass as parentfp
  1543. parameter, since that's how it's used in the TP-style objects unit
  1544. b) its return value must usable to access all local data from a
  1545. routine (locals and parameters), since it's all the nested
  1546. routines have access to
  1547. c) its return value must be usable to construct a backtrace, as it's
  1548. also used by the exception handling routines
  1549. The solution we use here, based on something similar that's done in
  1550. the MIPS port, is to generate all accesses to locals in the routine
  1551. itself SP-relative, and then after the code is generated and the local
  1552. size is known (namely, here), we change all SP-relative variables/
  1553. parameters into FP-relative ones. This means that they'll be accessed
  1554. less efficiently from nested routines, but those accesses are indirect
  1555. anyway and at least this way they can be accessed at all
  1556. }
  1557. if current_procinfo.has_nestedprocs then
  1558. begin
  1559. current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1560. current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
  1561. end;
  1562. end;
  1563. procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
  1564. begin
  1565. { nothing to do on Darwin or Linux }
  1566. end;
  1567. procedure tcgaarch64.g_restore_registers(list:TAsmList);
  1568. begin
  1569. { done in g_proc_exit }
  1570. end;
  1571. procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
  1572. var
  1573. ref: treference;
  1574. sr, highestsetsr: tsuperregister;
  1575. pairreg: tregister;
  1576. regcount: longint;
  1577. begin
  1578. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1579. ref.addressmode:=AM_POSTINDEXED;
  1580. { highest reg stored twice? }
  1581. regcount:=0;
  1582. highestsetsr:=RS_NO;
  1583. for sr:=lowsr to highsr do
  1584. if sr in rg[rt].used_in_proc then
  1585. begin
  1586. inc(regcount);
  1587. highestsetsr:=sr;
  1588. end;
  1589. if odd(regcount) then
  1590. begin
  1591. list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
  1592. highestsetsr:=pred(highestsetsr);
  1593. end;
  1594. { load all (other) used registers pairwise }
  1595. pairreg:=NR_NO;
  1596. for sr:=highestsetsr downto lowsr do
  1597. if sr in rg[rt].used_in_proc then
  1598. if pairreg=NR_NO then
  1599. pairreg:=newreg(rt,sr,sub)
  1600. else
  1601. begin
  1602. list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
  1603. pairreg:=NR_NO
  1604. end;
  1605. { There can't be any register left }
  1606. if pairreg<>NR_NO then
  1607. internalerror(2014112602);
  1608. end;
  1609. procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
  1610. var
  1611. ref: treference;
  1612. regsstored: boolean;
  1613. sr: tsuperregister;
  1614. begin
  1615. if not(nostackframe) and
  1616. { we do not need an exit stack frame when we never return
  1617. * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
  1618. * the entry stack frame must be normally generated because the subroutine could be still left by
  1619. an exception and then the unwinding code might need to restore the registers stored by the entry code
  1620. }
  1621. not(po_noreturn in current_procinfo.procdef.procoptions) then
  1622. begin
  1623. { if no registers have been stored, we don't have to subtract the
  1624. allocated temp space from the stack pointer }
  1625. regsstored:=false;
  1626. for sr:=RS_X19 to RS_X28 do
  1627. if sr in rg[R_INTREGISTER].used_in_proc then
  1628. begin
  1629. regsstored:=true;
  1630. break;
  1631. end;
  1632. if not regsstored then
  1633. for sr:=RS_D8 to RS_D15 do
  1634. if sr in rg[R_MMREGISTER].used_in_proc then
  1635. begin
  1636. regsstored:=true;
  1637. break;
  1638. end;
  1639. { restore registers (and stack pointer) }
  1640. if regsstored then
  1641. begin
  1642. if current_procinfo.final_localsize<>0 then
  1643. handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
  1644. load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
  1645. load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
  1646. end
  1647. else if current_procinfo.final_localsize<>0 then
  1648. { restore stack pointer }
  1649. a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
  1650. { restore framepointer and return address }
  1651. reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
  1652. ref.addressmode:=AM_POSTINDEXED;
  1653. list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
  1654. end;
  1655. { return }
  1656. list.concat(taicpu.op_none(A_RET));
  1657. end;
  1658. procedure tcgaarch64.g_save_registers(list : TAsmList);
  1659. begin
  1660. { done in g_proc_entry }
  1661. end;
  1662. { ************* concatcopy ************ }
  1663. procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
  1664. var
  1665. paraloc1,paraloc2,paraloc3 : TCGPara;
  1666. pd : tprocdef;
  1667. begin
  1668. pd:=search_system_proc('MOVE');
  1669. paraloc1.init;
  1670. paraloc2.init;
  1671. paraloc3.init;
  1672. paramanager.getcgtempparaloc(list,pd,1,paraloc1);
  1673. paramanager.getcgtempparaloc(list,pd,2,paraloc2);
  1674. paramanager.getcgtempparaloc(list,pd,3,paraloc3);
  1675. a_load_const_cgpara(list,OS_SINT,len,paraloc3);
  1676. a_loadaddr_ref_cgpara(list,dest,paraloc2);
  1677. a_loadaddr_ref_cgpara(list,source,paraloc1);
  1678. paramanager.freecgpara(list,paraloc3);
  1679. paramanager.freecgpara(list,paraloc2);
  1680. paramanager.freecgpara(list,paraloc1);
  1681. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1682. alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1683. a_call_name(list,'FPC_MOVE',false);
  1684. dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
  1685. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  1686. paraloc3.done;
  1687. paraloc2.done;
  1688. paraloc1.done;
  1689. end;
  1690. procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
  1691. var
  1692. sourcebasereplaced, destbasereplaced: boolean;
  1693. { get optimal memory operation to use for loading/storing data
  1694. in an unrolled loop }
  1695. procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
  1696. begin
  1697. if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
  1698. (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
  1699. begin
  1700. memop:=unscaledop;
  1701. needsimplify:=true;
  1702. end
  1703. else if (unscaledop<>A_NONE) and
  1704. (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
  1705. (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
  1706. begin
  1707. memop:=unscaledop;
  1708. needsimplify:=false;
  1709. end
  1710. else
  1711. begin
  1712. memop:=scaledop;
  1713. needsimplify:=true;
  1714. end;
  1715. end;
  1716. { adjust the offset and/or addressing mode after a load/store so it's
  1717. correct for the next one of the same size }
  1718. procedure updaterefafterloadstore(var ref: treference; oplen: longint);
  1719. begin
  1720. case ref.addressmode of
  1721. AM_OFFSET:
  1722. inc(ref.offset,oplen);
  1723. AM_POSTINDEXED:
  1724. { base register updated by instruction, next offset can remain
  1725. the same }
  1726. ;
  1727. AM_PREINDEXED:
  1728. begin
  1729. { base register updated by instruction -> next instruction can
  1730. use post-indexing with offset = sizeof(operation) }
  1731. ref.offset:=0;
  1732. ref.addressmode:=AM_OFFSET;
  1733. end;
  1734. end;
  1735. end;
  1736. { generate a load/store and adjust the reference offset to the next
  1737. memory location if necessary }
  1738. procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1739. begin
  1740. list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
  1741. updaterefafterloadstore(ref,tcgsize2size[opsize]);
  1742. end;
  1743. { generate a dual load/store (ldp/stp) and adjust the reference offset to
  1744. the next memory location if necessary }
  1745. procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
  1746. begin
  1747. list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
  1748. updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
  1749. end;
  1750. { turn a reference into a pre- or post-indexed reference for use in a
  1751. load/store of a particular size }
  1752. procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
  1753. var
  1754. tmpreg: tregister;
  1755. scaledoffset: longint;
  1756. orgaddressmode: taddressmode;
  1757. begin
  1758. scaledoffset:=tcgsize2size[opsize];
  1759. if scaledop in [A_LDP,A_STP] then
  1760. scaledoffset:=scaledoffset*2;
  1761. { can we use the reference as post-indexed without changes? }
  1762. if forcepostindexing then
  1763. begin
  1764. orgaddressmode:=ref.addressmode;
  1765. ref.addressmode:=AM_POSTINDEXED;
  1766. if (orgaddressmode=AM_POSTINDEXED) or
  1767. ((ref.offset=0) and
  1768. (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
  1769. begin
  1770. { just change the post-indexed offset to the access size }
  1771. ref.offset:=scaledoffset;
  1772. { and replace the base register if that didn't happen yet
  1773. (could be sp or a regvar) }
  1774. if not basereplaced then
  1775. begin
  1776. tmpreg:=getaddressregister(list);
  1777. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1778. ref.base:=tmpreg;
  1779. basereplaced:=true;
  1780. end;
  1781. exit;
  1782. end;
  1783. ref.addressmode:=orgaddressmode;
  1784. end;
  1785. {$ifdef dummy}
  1786. This could in theory be useful in case you have a concatcopy from
  1787. e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
  1788. very unlikely. Disabled because it still needs fixes, as it
  1789. also generates pre-indexed loads right now at the very end for the
  1790. left-over gencopies
  1791. { can we turn it into a pre-indexed reference for free? (after the
  1792. first operation, it will be turned into an offset one) }
  1793. if not forcepostindexing and
  1794. (ref.offset<>0) then
  1795. begin
  1796. orgaddressmode:=ref.addressmode;
  1797. ref.addressmode:=AM_PREINDEXED;
  1798. tmpreg:=ref.base;
  1799. if not basereplaced and
  1800. (ref.base=tmpreg) then
  1801. begin
  1802. tmpreg:=getaddressregister(list);
  1803. a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
  1804. ref.base:=tmpreg;
  1805. basereplaced:=true;
  1806. end;
  1807. if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
  1808. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1809. exit;
  1810. end;
  1811. {$endif dummy}
  1812. if not forcepostindexing then
  1813. begin
  1814. ref.addressmode:=AM_OFFSET;
  1815. make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
  1816. { this may still cause problems if the final offset is no longer
  1817. a simple ref; it's a bit complicated to pass all information
  1818. through at all places and check that here, so play safe: we
  1819. currently never generate unrolled copies for more than 64
  1820. bytes (32 with non-double-register copies) }
  1821. if ref.index=NR_NO then
  1822. begin
  1823. if ((scaledop in [A_LDP,A_STP]) and
  1824. (ref.offset<((64-8)*tcgsize2size[opsize]))) or
  1825. ((scaledop in [A_LDUR,A_STUR]) and
  1826. (ref.offset<(255-8*tcgsize2size[opsize]))) or
  1827. ((scaledop in [A_LDR,A_STR]) and
  1828. (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
  1829. exit;
  1830. end;
  1831. end;
  1832. tmpreg:=getaddressregister(list);
  1833. a_loadaddr_ref_reg(list,ref,tmpreg);
  1834. basereplaced:=true;
  1835. if forcepostindexing then
  1836. begin
  1837. reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
  1838. ref.addressmode:=AM_POSTINDEXED;
  1839. end
  1840. else
  1841. begin
  1842. reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
  1843. ref.addressmode:=AM_OFFSET;
  1844. end
  1845. end;
  1846. { prepare a reference for use by gencopy. This is done both after the
  1847. unrolled and regular copy loop -> get rid of post-indexing mode, make
  1848. sure ref is valid }
  1849. procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
  1850. var
  1851. simplify: boolean;
  1852. begin
  1853. if ref.addressmode=AM_POSTINDEXED then
  1854. ref.offset:=tcgsize2size[opsize];
  1855. getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
  1856. if simplify then
  1857. begin
  1858. makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
  1859. op:=scaledop;
  1860. end;
  1861. end;
  1862. { generate a copy from source to dest of size opsize/postfix }
  1863. procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
  1864. var
  1865. reg: tregister;
  1866. loadop, storeop: tasmop;
  1867. begin
  1868. preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
  1869. preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
  1870. reg:=getintregister(list,opsize);
  1871. genloadstore(list,loadop,reg,source,postfix,opsize);
  1872. genloadstore(list,storeop,reg,dest,postfix,opsize);
  1873. end;
  1874. { copy the leftovers after an unrolled or regular copy loop }
  1875. procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
  1876. begin
  1877. { stop post-indexing if we did so in the loop, since in that case all
  1878. offsets definitely can be represented now }
  1879. if source.addressmode=AM_POSTINDEXED then
  1880. begin
  1881. source.addressmode:=AM_OFFSET;
  1882. source.offset:=0;
  1883. end;
  1884. if dest.addressmode=AM_POSTINDEXED then
  1885. begin
  1886. dest.addressmode:=AM_OFFSET;
  1887. dest.offset:=0;
  1888. end;
  1889. { transfer the leftovers }
  1890. if len>=8 then
  1891. begin
  1892. dec(len,8);
  1893. gencopy(list,source,dest,PF_NONE,OS_64);
  1894. end;
  1895. if len>=4 then
  1896. begin
  1897. dec(len,4);
  1898. gencopy(list,source,dest,PF_NONE,OS_32);
  1899. end;
  1900. if len>=2 then
  1901. begin
  1902. dec(len,2);
  1903. gencopy(list,source,dest,PF_H,OS_16);
  1904. end;
  1905. if len>=1 then
  1906. begin
  1907. dec(len);
  1908. gencopy(list,source,dest,PF_B,OS_8);
  1909. end;
  1910. end;
  1911. const
  1912. { load_length + loop dec + cbnz }
  1913. loopoverhead=12;
  1914. { loop overhead + load + store }
  1915. totallooplen=loopoverhead + 8;
  1916. var
  1917. totalalign: longint;
  1918. maxlenunrolled: tcgint;
  1919. loadop, storeop: tasmop;
  1920. opsize: tcgsize;
  1921. postfix: toppostfix;
  1922. tmpsource, tmpdest: treference;
  1923. scaledstoreop, unscaledstoreop,
  1924. scaledloadop, unscaledloadop: tasmop;
  1925. regs: array[1..8] of tregister;
  1926. countreg: tregister;
  1927. i, regcount: longint;
  1928. hl: tasmlabel;
  1929. simplifysource, simplifydest: boolean;
  1930. begin
  1931. if len=0 then
  1932. exit;
  1933. sourcebasereplaced:=false;
  1934. destbasereplaced:=false;
  1935. { maximum common alignment }
  1936. totalalign:=max(1,newalignment(source.alignment,dest.alignment));
  1937. { use a simple load/store? }
  1938. if (len in [1,2,4,8]) and
  1939. ((totalalign>=(len div 2)) or
  1940. (source.alignment=len) or
  1941. (dest.alignment=len)) then
  1942. begin
  1943. opsize:=int_cgsize(len);
  1944. a_load_ref_ref(list,opsize,opsize,source,dest);
  1945. exit;
  1946. end;
  1947. { alignment > length is not useful, and would break some checks below }
  1948. while totalalign>len do
  1949. totalalign:=totalalign div 2;
  1950. { operation sizes to use based on common alignment }
  1951. case totalalign of
  1952. 1:
  1953. begin
  1954. postfix:=PF_B;
  1955. opsize:=OS_8;
  1956. end;
  1957. 2:
  1958. begin
  1959. postfix:=PF_H;
  1960. opsize:=OS_16;
  1961. end;
  1962. 4:
  1963. begin
  1964. postfix:=PF_None;
  1965. opsize:=OS_32;
  1966. end
  1967. else
  1968. begin
  1969. totalalign:=8;
  1970. postfix:=PF_None;
  1971. opsize:=OS_64;
  1972. end;
  1973. end;
  1974. { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
  1975. maxlenunrolled:=min(totalalign,8)*4;
  1976. { ldp/stp -> 2 registers per instruction }
  1977. if (totalalign>=4) and
  1978. (len>=totalalign*2) then
  1979. begin
  1980. maxlenunrolled:=maxlenunrolled*2;
  1981. scaledstoreop:=A_STP;
  1982. scaledloadop:=A_LDP;
  1983. unscaledstoreop:=A_NONE;
  1984. unscaledloadop:=A_NONE;
  1985. end
  1986. else
  1987. begin
  1988. scaledstoreop:=A_STR;
  1989. scaledloadop:=A_LDR;
  1990. unscaledstoreop:=A_STUR;
  1991. unscaledloadop:=A_LDUR;
  1992. end;
  1993. { we only need 4 instructions extra to call FPC_MOVE }
  1994. if cs_opt_size in current_settings.optimizerswitches then
  1995. maxlenunrolled:=maxlenunrolled div 2;
  1996. if (len>maxlenunrolled) and
  1997. (len>totalalign*8) then
  1998. begin
  1999. g_concatcopy_move(list,source,dest,len);
  2000. exit;
  2001. end;
  2002. simplifysource:=true;
  2003. simplifydest:=true;
  2004. tmpsource:=source;
  2005. tmpdest:=dest;
  2006. { can we directly encode all offsets in an unrolled loop? }
  2007. if len<=maxlenunrolled then
  2008. begin
  2009. {$ifdef extdebug}
  2010. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
  2011. {$endif extdebug}
  2012. { the leftovers will be handled separately -> -(len mod opsize) }
  2013. inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
  2014. { additionally, the last regular load/store will be at
  2015. offset+len-opsize (if len-(len mod opsize)>len) }
  2016. if tmpsource.offset>source.offset then
  2017. dec(tmpsource.offset,tcgsize2size[opsize]);
  2018. getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
  2019. inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
  2020. if tmpdest.offset>dest.offset then
  2021. dec(tmpdest.offset,tcgsize2size[opsize]);
  2022. getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
  2023. tmpsource:=source;
  2024. tmpdest:=dest;
  2025. { if we can't directly encode all offsets, simplify }
  2026. if simplifysource then
  2027. begin
  2028. loadop:=scaledloadop;
  2029. makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
  2030. end;
  2031. if simplifydest then
  2032. begin
  2033. storeop:=scaledstoreop;
  2034. makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
  2035. end;
  2036. regcount:=len div tcgsize2size[opsize];
  2037. { in case we transfer two registers at a time, we copy an even
  2038. number of registers }
  2039. if loadop=A_LDP then
  2040. regcount:=regcount and not(1);
  2041. { initialise for dfa }
  2042. regs[low(regs)]:=NR_NO;
  2043. { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
  2044. for i:=1 to regcount do
  2045. regs[i]:=getintregister(list,opsize);
  2046. if loadop=A_LDP then
  2047. begin
  2048. { load registers }
  2049. for i:=1 to (regcount div 2) do
  2050. gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
  2051. { store registers }
  2052. for i:=1 to (regcount div 2) do
  2053. gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
  2054. end
  2055. else
  2056. begin
  2057. for i:=1 to regcount do
  2058. genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
  2059. for i:=1 to regcount do
  2060. genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
  2061. end;
  2062. { leftover }
  2063. len:=len-regcount*tcgsize2size[opsize];
  2064. {$ifdef extdebug}
  2065. list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
  2066. {$endif extdebug}
  2067. end
  2068. else
  2069. begin
  2070. {$ifdef extdebug}
  2071. list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
  2072. {$endif extdebug}
  2073. { regular loop -> definitely use post-indexing }
  2074. loadop:=scaledloadop;
  2075. makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
  2076. storeop:=scaledstoreop;
  2077. makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
  2078. current_asmdata.getjumplabel(hl);
  2079. countreg:=getintregister(list,OS_32);
  2080. if loadop=A_LDP then
  2081. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
  2082. else
  2083. a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
  2084. a_label(list,hl);
  2085. a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
  2086. if loadop=A_LDP then
  2087. begin
  2088. regs[1]:=getintregister(list,opsize);
  2089. regs[2]:=getintregister(list,opsize);
  2090. gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
  2091. gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
  2092. end
  2093. else
  2094. begin
  2095. regs[1]:=getintregister(list,opsize);
  2096. genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
  2097. genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
  2098. end;
  2099. list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
  2100. len:=len mod tcgsize2size[opsize];
  2101. end;
  2102. gencopyleftovers(list,tmpsource,tmpdest,len);
  2103. end;
  2104. procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
  2105. begin
  2106. { This method is integrated into g_intf_wrapper and shouldn't be called separately }
  2107. InternalError(2013020102);
  2108. end;
  2109. procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
  2110. var
  2111. r : TRegister;
  2112. ai: taicpu;
  2113. l1,l2: TAsmLabel;
  2114. begin
  2115. { so far, we assume all flavours of AArch64 need explicit floating point exception checking }
  2116. if ((cs_check_fpu_exceptions in current_settings.localswitches) and
  2117. (force or current_procinfo.FPUExceptionCheckNeeded)) then
  2118. begin
  2119. r:=getintregister(list,OS_INT);
  2120. list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
  2121. list.concat(taicpu.op_reg_const(A_TST,r,$1f));
  2122. current_asmdata.getjumplabel(l1);
  2123. current_asmdata.getjumplabel(l2);
  2124. ai:=taicpu.op_sym(A_B,l1);
  2125. ai.is_jmp:=true;
  2126. ai.condition:=C_NE;
  2127. list.concat(ai);
  2128. list.concat(taicpu.op_reg_const(A_TST,r,$80));
  2129. ai:=taicpu.op_sym(A_B,l2);
  2130. ai.is_jmp:=true;
  2131. ai.condition:=C_EQ;
  2132. list.concat(ai);
  2133. a_label(list,l1);
  2134. alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2135. cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
  2136. dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  2137. a_label(list,l2);
  2138. if clear then
  2139. current_procinfo.FPUExceptionCheckNeeded:=false;
  2140. end;
  2141. end;
  2142. procedure tcgaarch64.g_profilecode(list : TAsmList);
  2143. begin
  2144. if target_info.system = system_aarch64_linux then
  2145. begin
  2146. list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
  2147. a_call_name(list,'_mcount',false);
  2148. end
  2149. else
  2150. internalerror(2020021901);
  2151. end;
  2152. {$endif dummy}
  2153. procedure create_codegen;
  2154. begin
  2155. //!!! cg:=tcgcpu.Create;
  2156. //!!! cg64:=tcg64.Create;
  2157. end;
  2158. end.