cgcpu.pas 79 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. This unit implements the code generator for the PowerPC
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$I fpcdefs.inc}
  19. interface
  20. uses
  21. globtype, symtype, symdef, symsym,
  22. cgbase, cgobj,cgppc,
  23. aasmbase, aasmcpu, aasmtai,aasmdata,
  24. cpubase, cpuinfo, cgutils, rgcpu,
  25. parabase;
  26. type
  27. tcgppc = class(tcgppcgen)
  28. procedure init_register_allocators; override;
  29. procedure done_register_allocators; override;
  30. { passing parameters, per default the parameter is pushed }
  31. { nr gives the number of the parameter (enumerated from }
  32. { left to right), this allows to move the parameter to }
  33. { register, if the cpu supports register calling }
  34. { conventions }
  35. procedure a_load_ref_cgpara(list: TAsmList; size: tcgsize; const r: treference;
  36. const paraloc: tcgpara); override;
  37. procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
  38. procedure a_call_reg(list: TAsmList; reg: tregister); override;
  39. procedure a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
  40. aint; reg: TRegister); override;
  41. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
  42. dst: TRegister); override;
  43. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  44. size: tcgsize; a: aint; src, dst: tregister); override;
  45. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  46. size: tcgsize; src1, src2, dst: tregister); override;
  47. { move instructions }
  48. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: aint; reg:
  49. tregister); override;
  50. { loads the memory pointed to by ref into register reg }
  51. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const
  52. Ref: treference; reg: tregister); override;
  53. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1,
  54. reg2: tregister); override;
  55. procedure a_load_subsetreg_reg(list : TAsmList; subsetsize, tosize: tcgsize; const sreg: tsubsetregister; destreg: tregister); override;
  56. procedure a_load_const_subsetreg(list: TAsmlist; subsetsize: tcgsize; a: aint; const sreg: tsubsetregister); override;
  57. { comparison operations }
  58. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op:
  59. topcmp; a: aint; reg: tregister;
  60. l: tasmlabel); override;
  61. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op:
  62. topcmp; reg1, reg2: tregister; l: tasmlabel); override;
  63. procedure a_jmp_name(list: TAsmList; const s: string); override;
  64. procedure a_jmp_always(list: TAsmList; l: tasmlabel); override;
  65. procedure a_jmp_flags(list: TAsmList; const f: TResFlags; l: tasmlabel);
  66. override;
  67. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags;
  68. reg: TRegister); override;
  69. { need to override this for ppc64 to avoid calling CG methods which allocate
  70. registers during creation of the interface wrappers to subtract ioffset from
  71. the self pointer. But register allocation does not take place for them (which
  72. would probably be the generic fix) so we need to have a specialized method
  73. that uses the R11 scratch register in these cases.
  74. At the same time this allows > 32 bit offsets as well.
  75. }
  76. procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: aint);override;
  77. procedure g_profilecode(list: TAsmList); override;
  78. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe:
  79. boolean); override;
  80. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
  81. boolean); override;
  82. procedure g_save_registers(list: TAsmList); override;
  83. procedure g_restore_registers(list: TAsmList); override;
  84. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
  85. tregister); override;
  86. procedure g_concatcopy(list: TAsmList; const source, dest: treference;
  87. len: aint); override;
  88. procedure g_external_wrapper(list: TAsmList; pd: TProcDef; const externalname: string); override;
  89. private
  90. procedure a_load_regconst_subsetreg_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
  91. procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  92. { returns whether a reference can be used immediately in a powerpc }
  93. { instruction }
  94. function issimpleref(const ref: treference): boolean;
  95. { contains the common code of a_load_reg_ref and a_load_ref_reg }
  96. procedure a_load_store(list: TAsmList; op: tasmop; reg: tregister;
  97. ref: treference); override;
  98. { returns the lowest numbered FP register in use, and the number of used FP registers
  99. for the current procedure }
  100. procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  101. { returns the lowest numbered GP register in use, and the number of used GP registers
  102. for the current procedure }
  103. procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  104. { generates code to call a method with the given string name. The boolean options
  105. control code generation. If prependDot is true, a single dot character is prepended to
  106. the string, if addNOP is true a single NOP instruction is added after the call, and
  107. if includeCall is true, the method is marked as having a call, not if false. This
  108. option is particularly useful to prevent generation of a larger stack frame for the
  109. register save and restore helper functions. }
  110. procedure a_call_name_direct(list: TAsmList; s: string; weak: boolean; prependDot : boolean;
  111. addNOP : boolean; includeCall : boolean = true);
  112. procedure a_jmp_name_direct(list : TAsmList; s : string; prependDot : boolean);
  113. { emits code to store the given value a into the TOC (if not already in there), and load it from there
  114. as well }
  115. procedure loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
  116. procedure profilecode_savepara(para : tparavarsym; list : TAsmList);
  117. procedure profilecode_restorepara(para : tparavarsym; list : TAsmList);
  118. end;
  119. procedure create_codegen;
  120. const
  121. TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
  122. (A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
  123. );
  124. implementation
  125. uses
  126. sysutils, cclasses,
  127. globals, verbose, systems, cutils,
  128. symconst, fmodule,
  129. rgobj, tgobj, cpupi, procinfo, paramgr, cpupara;
  130. function is_signed_cgsize(const size : TCgSize) : Boolean;
  131. begin
  132. case size of
  133. OS_S8,OS_S16,OS_S32,OS_S64 : result := true;
  134. OS_8,OS_16,OS_32,OS_64 : result := false;
  135. else
  136. internalerror(2006050701);
  137. end;
  138. end;
  139. {$push}
  140. {$r-}
  141. {$q-}
  142. { helper function which calculate "magic" values for replacement of unsigned
  143. division by constant operation by multiplication. See the PowerPC compiler
  144. developer manual for more information }
  145. procedure getmagic_unsignedN(const N : byte; const d : aWord;
  146. out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
  147. var
  148. p : aInt;
  149. nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
  150. begin
  151. assert(d > 0);
  152. two_N_minus_1 := aWord(1) shl (N-1);
  153. magic_add := false;
  154. nc := - 1 - (-d) mod d;
  155. p := N-1; { initialize p }
  156. q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
  157. r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
  158. q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
  159. r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
  160. repeat
  161. inc(p);
  162. if (r1 >= (nc - r1)) then begin
  163. q1 := 2 * q1 + 1; { update q1 }
  164. r1 := 2*r1 - nc; { update r1 }
  165. end else begin
  166. q1 := 2*q1; { update q1 }
  167. r1 := 2*r1; { update r1 }
  168. end;
  169. if ((r2 + 1) >= (d - r2)) then begin
  170. if (q2 >= (two_N_minus_1-1)) then
  171. magic_add := true;
  172. q2 := 2*q2 + 1; { update q2 }
  173. r2 := 2*r2 + 1 - d; { update r2 }
  174. end else begin
  175. if (q2 >= two_N_minus_1) then
  176. magic_add := true;
  177. q2 := 2*q2; { update q2 }
  178. r2 := 2*r2 + 1; { update r2 }
  179. end;
  180. delta := d - 1 - r2;
  181. until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
  182. magic_m := q2 + 1; { resulting magic number }
  183. magic_shift := p - N; { resulting shift }
  184. end;
  185. { helper function which calculate "magic" values for replacement of signed
  186. division by constant operation by multiplication. See the PowerPC compiler
  187. developer manual for more information }
  188. procedure getmagic_signedN(const N : byte; const d : aInt;
  189. out magic_m : aInt; out magic_s : aInt);
  190. var
  191. p : aInt;
  192. ad, anc, delta, q1, r1, q2, r2, t : aWord;
  193. two_N_minus_1 : aWord;
  194. begin
  195. assert((d < -1) or (d > 1));
  196. two_N_minus_1 := aWord(1) shl (N-1);
  197. ad := abs(d);
  198. t := two_N_minus_1 + (aWord(d) shr (N-1));
  199. anc := t - 1 - t mod ad; { absolute value of nc }
  200. p := (N-1); { initialize p }
  201. q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
  202. r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
  203. q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
  204. r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
  205. repeat
  206. inc(p);
  207. q1 := 2*q1; { update q1 = 2p/abs(nc) }
  208. r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
  209. if (r1 >= anc) then begin { must be unsigned comparison }
  210. inc(q1);
  211. dec(r1, anc);
  212. end;
  213. q2 := 2*q2; { update q2 = 2p/abs(d) }
  214. r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
  215. if (r2 >= ad) then begin { must be unsigned comparison }
  216. inc(q2);
  217. dec(r2, ad);
  218. end;
  219. delta := ad - r2;
  220. until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
  221. magic_m := q2 + 1;
  222. if (d < 0) then begin
  223. magic_m := -magic_m; { resulting magic number }
  224. end;
  225. magic_s := p - N; { resulting shift }
  226. end;
  227. {$pop}
  228. { finds positive and negative powers of two of the given value, returning the
  229. power and whether it's a negative power or not in addition to the actual result
  230. of the function }
  231. function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
  232. var
  233. i : longint;
  234. hl : aInt;
  235. begin
  236. neg := false;
  237. { also try to find negative power of two's by negating if the
  238. value is negative. low(aInt) is special because it can not be
  239. negated. Simply return the appropriate values for it }
  240. if (value < 0) then begin
  241. neg := true;
  242. if (value = low(aInt)) then begin
  243. power := sizeof(aInt)*8-1;
  244. result := true;
  245. exit;
  246. end;
  247. value := -value;
  248. end;
  249. if ((value and (value-1)) <> 0) then begin
  250. result := false;
  251. exit;
  252. end;
  253. hl := 1;
  254. for i := 0 to (sizeof(aInt)*8-1) do begin
  255. if (hl = value) then begin
  256. result := true;
  257. power := i;
  258. exit;
  259. end;
  260. hl := hl shl 1;
  261. end;
  262. end;
  263. { returns the number of instruction required to load the given integer into a register.
  264. This is basically a stripped down version of a_load_const_reg, increasing a counter
  265. instead of emitting instructions. }
  266. function getInstructionLength(a : aint) : longint;
  267. function get32bitlength(a : longint; var length : longint) : boolean; inline;
  268. var
  269. is_half_signed : byte;
  270. begin
  271. { if the lower 16 bits are zero, do a single LIS }
  272. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  273. inc(length);
  274. get32bitlength := longint(a) < 0;
  275. end else begin
  276. is_half_signed := ord(smallint(lo(a)) < 0);
  277. inc(length);
  278. if smallint(hi(a) + is_half_signed) <> 0 then
  279. inc(length);
  280. get32bitlength := (smallint(a) < 0) or (a < 0);
  281. end;
  282. end;
  283. var
  284. extendssign : boolean;
  285. begin
  286. result := 0;
  287. if (lo(a) = 0) and (hi(a) <> 0) then begin
  288. get32bitlength(hi(a), result);
  289. inc(result);
  290. end else begin
  291. extendssign := get32bitlength(lo(a), result);
  292. if (extendssign) and (hi(a) = 0) then
  293. inc(result)
  294. else if (not
  295. ((extendssign and (longint(hi(a)) = -1)) or
  296. ((not extendssign) and (hi(a)=0)))
  297. ) then begin
  298. get32bitlength(hi(a), result);
  299. inc(result);
  300. end;
  301. end;
  302. end;
  303. procedure tcgppc.init_register_allocators;
  304. begin
  305. inherited init_register_allocators;
  306. if (target_info.system <> system_powerpc64_darwin) then
  307. // r13 is tls, do not use, r2 is not available
  308. rg[R_INTREGISTER] := trgintcpu.create(R_INTREGISTER, R_SUBWHOLE,
  309. [{$ifdef user0} RS_R0, {$endif} RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
  310. RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
  311. RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
  312. RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
  313. RS_R14], first_int_imreg, [])
  314. else
  315. { special for darwin/ppc64: r2 available volatile, r13 = tls }
  316. rg[R_INTREGISTER] := trgintcpu.create(R_INTREGISTER, R_SUBWHOLE,
  317. [{$ifdef user0} RS_R0, {$endif} RS_R2, RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
  318. RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
  319. RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
  320. RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
  321. RS_R14], first_int_imreg, []);
  322. rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE,
  323. [RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9,
  324. RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27,
  325. RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18,
  326. RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []);
  327. { TODO: FIX ME}
  328. rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE,
  329. [RS_M0, RS_M1, RS_M2], first_mm_imreg, []);
  330. end;
  331. procedure tcgppc.done_register_allocators;
  332. begin
  333. rg[R_INTREGISTER].free;
  334. rg[R_FPUREGISTER].free;
  335. rg[R_MMREGISTER].free;
  336. inherited done_register_allocators;
  337. end;
  338. procedure tcgppc.a_load_ref_cgpara(list: TAsmList; size: tcgsize; const r:
  339. treference; const paraloc: tcgpara);
  340. var
  341. tmpref, ref: treference;
  342. location: pcgparalocation;
  343. sizeleft: aint;
  344. adjusttail : boolean;
  345. begin
  346. location := paraloc.location;
  347. tmpref := r;
  348. sizeleft := paraloc.intsize;
  349. adjusttail := false;
  350. while assigned(location) do begin
  351. paramanager.allocparaloc(list,location);
  352. case location^.loc of
  353. LOC_REGISTER, LOC_CREGISTER:
  354. begin
  355. if not(size in [OS_NO,OS_128,OS_S128]) then
  356. a_load_ref_reg(list, size, location^.size, tmpref,
  357. location^.register)
  358. else begin
  359. { load non-integral sized memory location into register. This
  360. memory location be 1-sizeleft byte sized.
  361. Always assume that this memory area is properly aligned, eg. start
  362. loading the larger quantities for "odd" quantities first }
  363. case sizeleft of
  364. 1,2,4,8 :
  365. a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
  366. location^.register);
  367. 3 : begin
  368. a_reg_alloc(list, NR_R12);
  369. a_load_ref_reg(list, OS_16, location^.size, tmpref,
  370. NR_R12);
  371. inc(tmpref.offset, tcgsize2size[OS_16]);
  372. a_load_ref_reg(list, OS_8, location^.size, tmpref,
  373. location^.register);
  374. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
  375. a_reg_dealloc(list, NR_R12);
  376. end;
  377. 5 : begin
  378. a_reg_alloc(list, NR_R12);
  379. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  380. inc(tmpref.offset, tcgsize2size[OS_32]);
  381. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  382. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
  383. a_reg_dealloc(list, NR_R12);
  384. end;
  385. 6 : begin
  386. a_reg_alloc(list, NR_R12);
  387. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  388. inc(tmpref.offset, tcgsize2size[OS_32]);
  389. a_load_ref_reg(list, OS_16, location^.size, tmpref, location^.register);
  390. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 16, 16));
  391. a_reg_dealloc(list, NR_R12);
  392. end;
  393. 7 : begin
  394. a_reg_alloc(list, NR_R12);
  395. a_reg_alloc(list, NR_R0);
  396. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  397. inc(tmpref.offset, tcgsize2size[OS_32]);
  398. a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
  399. inc(tmpref.offset, tcgsize2size[OS_16]);
  400. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  401. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
  402. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
  403. a_reg_dealloc(list, NR_R0);
  404. a_reg_dealloc(list, NR_R12);
  405. end;
  406. else begin
  407. { still > 8 bytes to load, so load data single register now }
  408. a_load_ref_reg(list, location^.size, location^.size, tmpref,
  409. location^.register);
  410. { the block is > 8 bytes, so we have to store any bytes not
  411. a multiple of the register size beginning with the MSB }
  412. adjusttail := true;
  413. end;
  414. end;
  415. if (adjusttail) and (sizeleft < sizeof(pint)) then
  416. a_op_const_reg(list, OP_SHL, OS_INT,
  417. (sizeof(pint) - sizeleft) * sizeof(pint),
  418. location^.register);
  419. end;
  420. end;
  421. LOC_REFERENCE:
  422. begin
  423. reference_reset_base(ref, location^.reference.index,
  424. location^.reference.offset,paraloc.alignment);
  425. g_concatcopy(list, tmpref, ref, sizeleft);
  426. if assigned(location^.next) then
  427. internalerror(2005010710);
  428. end;
  429. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  430. case location^.size of
  431. OS_F32, OS_F64:
  432. a_loadfpu_ref_reg(list, location^.size, location^.size, tmpref, location^.register);
  433. else
  434. internalerror(2002072801);
  435. end;
  436. LOC_VOID:
  437. { nothing to do }
  438. ;
  439. else
  440. internalerror(2002081103);
  441. end;
  442. inc(tmpref.offset, tcgsize2size[location^.size]);
  443. dec(sizeleft, tcgsize2size[location^.size]);
  444. location := location^.next;
  445. end;
  446. end;
  447. { calling a procedure by name }
  448. procedure tcgppc.a_call_name(list: TAsmList; const s: string; weak: boolean);
  449. begin
  450. if (target_info.system <> system_powerpc64_darwin) then
  451. a_call_name_direct(list, s, weak, false, true)
  452. else
  453. begin
  454. list.concat(taicpu.op_sym(A_BL,get_darwin_call_stub(s,weak)));
  455. include(current_procinfo.flags,pi_do_call);
  456. end;
  457. end;
  458. procedure tcgppc.a_call_name_direct(list: TAsmList; s: string; weak: boolean; prependDot : boolean; addNOP : boolean; includeCall : boolean);
  459. begin
  460. if (prependDot) then
  461. s := '.' + s;
  462. if not(weak) then
  463. list.concat(taicpu.op_sym(A_BL, current_asmdata.RefAsmSymbol(s)))
  464. else
  465. list.concat(taicpu.op_sym(A_BL, current_asmdata.WeakRefAsmSymbol(s)));
  466. if (addNOP) then
  467. list.concat(taicpu.op_none(A_NOP));
  468. if (includeCall) then
  469. include(current_procinfo.flags, pi_do_call);
  470. end;
  471. { calling a procedure by address }
  472. procedure tcgppc.a_call_reg(list: TAsmList; reg: tregister);
  473. var
  474. tmpref: treference;
  475. tempreg : TRegister;
  476. begin
  477. if (target_info.system = system_powerpc64_darwin) then
  478. inherited a_call_reg(list,reg)
  479. else if (not (cs_opt_size in current_settings.optimizerswitches)) then begin
  480. tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  481. { load actual function entry (reg contains the reference to the function descriptor)
  482. into tempreg }
  483. reference_reset_base(tmpref, reg, 0, sizeof(pint));
  484. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, tempreg);
  485. { save TOC pointer in stackframe }
  486. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF, 8);
  487. a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
  488. { move actual function pointer to CTR register }
  489. list.concat(taicpu.op_reg(A_MTCTR, tempreg));
  490. { load new TOC pointer from function descriptor into RTOC register }
  491. reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR], 8);
  492. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  493. { load new environment pointer from function descriptor into R11 register }
  494. reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR], 8);
  495. a_reg_alloc(list, NR_R11);
  496. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
  497. { call function }
  498. list.concat(taicpu.op_none(A_BCTRL));
  499. a_reg_dealloc(list, NR_R11);
  500. end else begin
  501. { call ptrgl helper routine which expects the pointer to the function descriptor
  502. in R11 }
  503. a_reg_alloc(list, NR_R11);
  504. a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
  505. a_call_name_direct(list, '.ptrgl', false, false, false);
  506. a_reg_dealloc(list, NR_R11);
  507. end;
  508. { we need to load the old RTOC from stackframe because we changed it}
  509. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF, 8);
  510. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  511. include(current_procinfo.flags, pi_do_call);
  512. end;
  513. {********************** load instructions ********************}
  514. procedure tcgppc.a_load_const_reg(list: TAsmList; size: TCGSize; a: aint;
  515. reg: TRegister);
  516. { loads a 32 bit constant into the given register, using an optimal instruction sequence.
  517. This is either LIS, LI or LI+ADDIS.
  518. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  519. sign extension was performed) }
  520. function load32bitconstant(list : TAsmList; size : TCGSize; a : longint;
  521. reg : TRegister) : boolean;
  522. var
  523. is_half_signed : byte;
  524. begin
  525. { if the lower 16 bits are zero, do a single LIS }
  526. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  527. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
  528. load32bitconstant := longint(a) < 0;
  529. end else begin
  530. is_half_signed := ord(smallint(lo(a)) < 0);
  531. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
  532. if smallint(hi(a) + is_half_signed) <> 0 then begin
  533. list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
  534. end;
  535. load32bitconstant := (smallint(a) < 0) or (a < 0);
  536. end;
  537. end;
  538. { loads a 32 bit constant into R0, using an optimal instruction sequence.
  539. This is either LIS, LI or LI+ORIS.
  540. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  541. sign extension was performed) }
  542. function load32bitconstantR0(list : TAsmList; size : TCGSize; a : longint) : boolean;
  543. begin
  544. { if it's a value we can load with a single LI, do it }
  545. if (a >= low(smallint)) and (a <= high(smallint)) then begin
  546. list.concat(taicpu.op_reg_const(A_LI, NR_R0, smallint(a)));
  547. end else begin
  548. { if the lower 16 bits are zero, do a single LIS }
  549. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, smallint(a shr 16)));
  550. if (smallint(a) <> 0) then begin
  551. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(a)));
  552. end;
  553. end;
  554. load32bitconstantR0 := a < 0;
  555. end;
  556. { emits the code to load a constant by emitting various instructions into the output
  557. code}
  558. procedure loadConstantNormal(list: TAsmList; size : TCgSize; a: aint; reg: TRegister);
  559. var
  560. extendssign : boolean;
  561. instr : taicpu;
  562. begin
  563. if (lo(a) = 0) and (hi(a) <> 0) then begin
  564. { load only upper 32 bits, and shift }
  565. load32bitconstant(list, size, longint(hi(a)), reg);
  566. list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));
  567. end else begin
  568. { load lower 32 bits }
  569. extendssign := load32bitconstant(list, size, longint(lo(a)), reg);
  570. if (extendssign) and (hi(a) = 0) then
  571. { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic
  572. sign extension, clear those bits }
  573. list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, reg, reg, 0, 32))
  574. else if (not
  575. ((extendssign and (longint(hi(a)) = -1)) or
  576. ((not extendssign) and (hi(a)=0)))
  577. ) then begin
  578. { only load the upper 32 bits, if the automatic sign extension is not okay,
  579. that is, _not_ if
  580. - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper
  581. 32 bits should contain -1
  582. - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
  583. 32 bits should contain 0 }
  584. a_reg_alloc(list, NR_R0);
  585. load32bitconstantR0(list, size, longint(hi(a)));
  586. { combine both registers }
  587. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
  588. a_reg_dealloc(list, NR_R0);
  589. end;
  590. end;
  591. end;
  592. {$IFDEF EXTDEBUG}
  593. var
  594. astring : string;
  595. {$ENDIF EXTDEBUG}
  596. begin
  597. {$IFDEF EXTDEBUG}
  598. astring := 'a_load_const_reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]) + ' ' + hexstr(a, 16);
  599. list.concat(tai_comment.create(strpnew(astring)));
  600. {$ENDIF EXTDEBUG}
  601. if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  602. internalerror(2002090902);
  603. { if PIC or basic optimizations are enabled, and the number of instructions which would be
  604. required to load the value is greater than 2, store (and later load) the value from there }
  605. // if (((cs_opt_peephole in current_settings.optimizerswitches) or (cs_create_pic in current_settings.moduleswitches)) and
  606. // (getInstructionLength(a) > 2)) then
  607. // loadConstantPIC(list, size, a, reg)
  608. // else
  609. loadConstantNormal(list, size, a, reg);
  610. end;
  611. procedure tcgppc.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize;
  612. const ref: treference; reg: tregister);
  613. const
  614. LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
  615. { indexed? updating? }
  616. (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  617. ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
  618. ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
  619. ((A_LD, A_LDU), (A_LDX, A_LDUX)),
  620. { 128bit stuff too }
  621. ((A_NONE, A_NONE), (A_NONE, A_NONE)),
  622. { there's no load-byte-with-sign-extend :( }
  623. ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  624. ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
  625. { there's no load-word-arithmetic-indexed with update, simulate it in code :( }
  626. ((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
  627. ((A_LD, A_LDU), (A_LDX, A_LDUX))
  628. );
  629. var
  630. op: tasmop;
  631. ref2: treference;
  632. tmpreg: tregister;
  633. begin
  634. {$IFDEF EXTDEBUG}
  635. list.concat(tai_comment.create(strpnew('a_load_ref_reg ' + ref2string(ref))));
  636. {$ENDIF EXTDEBUG}
  637. if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  638. internalerror(2002090904);
  639. { the caller is expected to have adjusted the reference already
  640. in this case }
  641. if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
  642. fromsize := tosize;
  643. ref2 := ref;
  644. fixref(list, ref2);
  645. op := loadinstr[fromsize, ref2.index <> NR_NO, false];
  646. { there is no LWAU instruction, simulate using ADDI and LWA }
  647. if (op = A_NOP) then begin
  648. list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
  649. ref2.offset := 0;
  650. op := A_LWA;
  651. end;
  652. a_load_store(list, op, reg, ref2);
  653. { sign extend shortint if necessary (because there is
  654. no load instruction to sign extend an 8 bit value automatically)
  655. and mask out extra sign bits when loading from a smaller
  656. signed to a larger unsigned type (where it matters) }
  657. if (fromsize = OS_S8) then begin
  658. a_load_reg_reg(list, OS_8, OS_S8, reg, reg);
  659. a_load_reg_reg(list, OS_S8, tosize, reg, reg);
  660. end else if (fromsize = OS_S16) and (tosize = OS_32) then
  661. a_load_reg_reg(list, fromsize, tosize, reg, reg);
  662. end;
  663. procedure tcgppc.a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize;
  664. reg1, reg2: tregister);
  665. var
  666. instr: TAiCpu;
  667. bytesize : byte;
  668. begin
  669. {$ifdef extdebug}
  670. list.concat(tai_comment.create(strpnew('a_load_reg_reg from : ' + cgsize2string(fromsize) + ' to ' + cgsize2string(tosize))));
  671. {$endif}
  672. if (tcgsize2size[fromsize] > tcgsize2size[tosize]) or
  673. ((tcgsize2size[fromsize] = tcgsize2size[tosize]) and (fromsize <> tosize)) or
  674. { do we need to mask out the sign when loading from smaller signed to larger unsigned type? }
  675. ( is_signed_cgsize(fromsize) and (not is_signed_cgsize(tosize)) and
  676. (tcgsize2size[fromsize] < tcgsize2size[tosize]) and (tcgsize2size[tosize] <> sizeof(pint)) ) then begin
  677. case tosize of
  678. OS_S8:
  679. instr := taicpu.op_reg_reg(A_EXTSB,reg2,reg1);
  680. OS_S16:
  681. instr := taicpu.op_reg_reg(A_EXTSH,reg2,reg1);
  682. OS_S32:
  683. instr := taicpu.op_reg_reg(A_EXTSW,reg2,reg1);
  684. OS_8, OS_16, OS_32:
  685. instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[tosize])*8);
  686. OS_S64, OS_64:
  687. instr := taicpu.op_reg_reg(A_MR, reg2, reg1);
  688. end;
  689. end else
  690. instr := taicpu.op_reg_reg(A_MR, reg2, reg1);
  691. list.concat(instr);
  692. rg[R_INTREGISTER].add_move_instruction(instr);
  693. end;
  694. procedure tcgppc.a_load_subsetreg_reg(list : TAsmList; subsetsize, tosize: tcgsize; const sreg: tsubsetregister; destreg: tregister);
  695. begin
  696. {$ifdef extdebug}
  697. list.concat(tai_comment.create(strpnew('a_load_subsetreg_reg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' tosize = ' + cgsize2string(tosize))));
  698. {$endif}
  699. { do the extraction if required and then extend the sign correctly. (The latter is actually required only for signed subsets
  700. and if that subset is not >= the tosize). }
  701. if (sreg.startbit <> 0) or
  702. (sreg.bitlen <> tcgsize2size[subsetsize]*8) then begin
  703. list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, destreg, sreg.subsetreg, (64 - sreg.startbit) and 63, 64 - sreg.bitlen));
  704. if (subsetsize in [OS_S8..OS_S128]) then
  705. if ((sreg.bitlen mod 8) = 0) then begin
  706. a_load_reg_reg(list, tcgsize2unsigned[subsetsize], subsetsize, destreg, destreg);
  707. a_load_reg_reg(list, subsetsize, tosize, destreg, destreg);
  708. end else begin
  709. a_op_const_reg(list,OP_SHL,OS_INT,64-sreg.bitlen,destreg);
  710. a_op_const_reg(list,OP_SAR,OS_INT,64-sreg.bitlen,destreg);
  711. end;
  712. end else begin
  713. a_load_reg_reg(list, tcgsize2unsigned[sreg.subsetregsize], subsetsize, sreg.subsetreg, destreg);
  714. a_load_reg_reg(list, subsetsize, tosize, destreg, destreg);
  715. end;
  716. end;
  717. procedure tcgppc.a_load_regconst_subsetreg_intern(list : TAsmList; fromsize, subsetsize: tcgsize; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
  718. begin
  719. {$ifdef extdebug}
  720. list.concat(tai_comment.create(strpnew('a_load_reg_subsetreg fromsize = ' + cgsize2string(fromsize) + ' subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + IntToStr(sreg.startbit))));
  721. {$endif}
  722. if (slopt in [SL_SETZERO,SL_SETMAX]) then
  723. inherited a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,sreg,slopt)
  724. else if (sreg.bitlen <> sizeof(aint)*8) then
  725. { simply use the INSRDI instruction }
  726. list.concat(taicpu.op_reg_reg_const_const(A_INSRDI, sreg.subsetreg, fromreg, sreg.bitlen, (64 - (sreg.startbit + sreg.bitlen)) and 63))
  727. else
  728. a_load_reg_reg(list, fromsize, subsetsize, fromreg, sreg.subsetreg);
  729. end;
  730. procedure tcgppc.a_load_const_subsetreg(list: TAsmlist; subsetsize: tcgsize;
  731. a: aint; const sreg: tsubsetregister);
  732. var
  733. tmpreg : TRegister;
  734. begin
  735. {$ifdef extdebug}
  736. list.concat(tai_comment.create(strpnew('a_load_const_subsetreg subsetregsize = ' + cgsize2string(sreg.subsetregsize) + ' subsetsize = ' + cgsize2string(subsetsize) + ' startbit = ' + intToStr(sreg.startbit) + ' a = ' + intToStr(a))));
  737. {$endif}
  738. { loading the constant into the lowest bits of a temp register and then inserting is
  739. better than loading some usually large constants and do some masking and shifting on ppc64 }
  740. tmpreg := getintregister(list,subsetsize);
  741. a_load_const_reg(list,subsetsize,a,tmpreg);
  742. a_load_reg_subsetreg(list, subsetsize, subsetsize, tmpreg, sreg);
  743. end;
  744. procedure tcgppc.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
  745. aint; reg: TRegister);
  746. begin
  747. a_op_const_reg_reg(list, op, size, a, reg, reg);
  748. end;
  749. procedure tcgppc.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
  750. dst: TRegister);
  751. begin
  752. a_op_reg_reg_reg(list, op, size, src, dst, dst);
  753. end;
  754. procedure tcgppc.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  755. size: tcgsize; a: aint; src, dst: tregister);
  756. var
  757. useReg : boolean;
  758. procedure do_lo_hi(loOp, hiOp : TAsmOp);
  759. begin
  760. { Optimization for logical ops (excluding AND), trying to do this as efficiently
  761. as possible by only generating code for the affected halfwords. Note that all
  762. the instructions handled here must have "X op 0 = X" for every halfword. }
  763. usereg := false;
  764. if (aword(a) > high(dword)) then begin
  765. usereg := true;
  766. end else begin
  767. if (word(a) <> 0) then begin
  768. list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
  769. if (word(a shr 16) <> 0) then
  770. list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
  771. end else if (word(a shr 16) <> 0) then
  772. list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
  773. end;
  774. end;
  775. procedure do_lo_hi_and;
  776. begin
  777. { optimization logical and with immediate: only use "andi." for 16 bit
  778. ands, otherwise use register method. Doing this for 32 bit constants
  779. would not give any advantage to the register method (via useReg := true),
  780. requiring a scratch register and three instructions. }
  781. usereg := false;
  782. if (aword(a) > high(word)) then
  783. usereg := true
  784. else
  785. list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
  786. end;
  787. procedure do_constant_div(list : TAsmList; size : TCgSize; a : aint; src, dst : TRegister;
  788. signed : boolean);
  789. const
  790. negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
  791. var
  792. magic, shift : int64;
  793. u_magic : qword;
  794. u_shift : byte;
  795. u_add : boolean;
  796. power : byte;
  797. isNegPower : boolean;
  798. divreg : tregister;
  799. begin
  800. if (a = 0) then begin
  801. internalerror(2005061701);
  802. end else if (a = 1) then begin
  803. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, src, dst);
  804. end else if (a = -1) and (signed) then begin
  805. { note: only in the signed case possible..., may overflow }
  806. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(negops[cs_check_overflow in current_settings.localswitches], dst, src));
  807. end else if (ispowerof2(a, power, isNegPower)) then begin
  808. if (signed) then begin
  809. { From "The PowerPC Compiler Writer's Guide", pg. 52ff }
  810. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, power,
  811. src, dst);
  812. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
  813. if (isNegPower) then
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  815. end else begin
  816. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, power, src, dst)
  817. end;
  818. end else begin
  819. { replace division by multiplication, both implementations }
  820. { from "The PowerPC Compiler Writer's Guide" pg. 53ff }
  821. divreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  822. if (signed) then begin
  823. getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
  824. { load magic value }
  825. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, magic, divreg);
  826. { multiply }
  827. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
  828. { add/subtract numerator }
  829. if (a > 0) and (magic < 0) then begin
  830. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, src, dst, dst);
  831. end else if (a < 0) and (magic > 0) then begin
  832. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, src, dst, dst);
  833. end;
  834. { shift shift places to the right (arithmetic) }
  835. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, shift, dst, dst);
  836. { extract and add sign bit }
  837. if (a >= 0) then begin
  838. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, src, divreg);
  839. end else begin
  840. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, dst, divreg);
  841. end;
  842. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, dst, divreg, dst);
  843. end else begin
  844. getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
  845. { load magic in divreg }
  846. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, aint(u_magic), divreg);
  847. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
  848. if (u_add) then begin
  849. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, dst, src, divreg);
  850. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 1, divreg, divreg);
  851. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, divreg, dst, divreg);
  852. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift-1, divreg, dst);
  853. end else begin
  854. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift, dst, dst);
  855. end;
  856. end;
  857. end;
  858. end;
  859. var
  860. scratchreg: tregister;
  861. shift : byte;
  862. shiftmask : longint;
  863. isneg : boolean;
  864. begin
  865. { subtraction is the same as addition with negative constant }
  866. if op = OP_SUB then begin
  867. a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
  868. exit;
  869. end;
  870. {$IFDEF EXTDEBUG}
  871. list.concat(tai_comment.create(strpnew('a_op_const_reg_reg ' + cgop2string(op))));
  872. {$ENDIF EXTDEBUG}
  873. { This case includes some peephole optimizations for the various operations,
  874. (e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
  875. independent of architecture? }
  876. { assume that we do not need a scratch register for the operation }
  877. useReg := false;
  878. case (op) of
  879. OP_DIV, OP_IDIV:
  880. if (cs_opt_level1 in current_settings.optimizerswitches) then
  881. do_constant_div(list, size, a, src, dst, op = OP_IDIV)
  882. else
  883. usereg := true;
  884. OP_IMUL, OP_MUL:
  885. { idea: factorize constant multiplicands and use adds/shifts with few factors;
  886. however, even a 64 bit multiply is already quite fast on PPC64 }
  887. if (a = 0) then
  888. a_load_const_reg(list, size, 0, dst)
  889. else if (a = -1) then
  890. list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
  891. else if (a = 1) then
  892. a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
  893. else if ispowerof2(a, shift, isneg) then begin
  894. list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
  895. if (isneg) then
  896. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  897. end else if (a >= low(smallint)) and (a <= high(smallint)) then
  898. list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
  899. smallint(a)))
  900. else
  901. usereg := true;
  902. OP_ADD:
  903. if (a = 0) then
  904. a_load_reg_reg(list, size, size, src, dst)
  905. else if (a >= low(smallint)) and (a <= high(smallint)) then
  906. list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
  907. else
  908. useReg := true;
  909. OP_OR:
  910. if (a = 0) then
  911. a_load_reg_reg(list, size, size, src, dst)
  912. else if (a = -1) then
  913. a_load_const_reg(list, size, -1, dst)
  914. else
  915. do_lo_hi(A_ORI, A_ORIS);
  916. OP_AND:
  917. if (a = 0) then
  918. a_load_const_reg(list, size, 0, dst)
  919. else if (a = -1) then
  920. a_load_reg_reg(list, size, size, src, dst)
  921. else
  922. do_lo_hi_and;
  923. OP_XOR:
  924. if (a = 0) then
  925. a_load_reg_reg(list, size, size, src, dst)
  926. else if (a = -1) then
  927. list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
  928. else
  929. do_lo_hi(A_XORI, A_XORIS);
  930. OP_ROL:
  931. begin
  932. if (size in [OS_64, OS_S64]) then begin
  933. list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, dst, src, a and 63, 0));
  934. end else if (size in [OS_32, OS_S32]) then begin
  935. list.concat(taicpu.op_reg_reg_const_const_const(A_RLWINM, dst, src, a and 31, 0, 31));
  936. end else begin
  937. internalerror(2008091303);
  938. end;
  939. end;
  940. OP_ROR:
  941. begin
  942. if (size in [OS_64, OS_S64]) then begin
  943. list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, dst, src, ((64 - a) and 63), 0));
  944. end else if (size in [OS_32, OS_S32]) then begin
  945. list.concat(taicpu.op_reg_reg_const_const_const(A_RLWINM, dst, src, (32 - a) and 31, 0, 31));
  946. end else begin
  947. internalerror(2008091304);
  948. end;
  949. end;
  950. OP_SHL, OP_SHR, OP_SAR:
  951. begin
  952. if (size in [OS_64, OS_S64]) then
  953. shift := 6
  954. else
  955. shift := 5;
  956. shiftmask := (1 shl shift)-1;
  957. if (a and shiftmask) <> 0 then begin
  958. list.concat(taicpu.op_reg_reg_const(
  959. TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask));
  960. end else
  961. a_load_reg_reg(list, size, size, src, dst);
  962. if ((a shr shift) <> 0) then
  963. internalError(68991);
  964. end
  965. else
  966. internalerror(200109091);
  967. end;
  968. { if all else failed, load the constant in a register and then
  969. perform the operation }
  970. if (useReg) then begin
  971. scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  972. a_load_const_reg(list, size, a, scratchreg);
  973. a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
  974. end else
  975. maybeadjustresult(list, op, size, dst);
  976. end;
  977. procedure tcgppc.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  978. size: tcgsize; src1, src2, dst: tregister);
  979. const
  980. op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
  981. (A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
  982. A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR, A_NONE, A_NONE);
  983. op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
  984. (A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
  985. A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR, A_NONE, A_NONE);
  986. var
  987. tmpreg : TRegister;
  988. begin
  989. case op of
  990. OP_NEG, OP_NOT:
  991. begin
  992. list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
  993. if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
  994. { zero/sign extend result again, fromsize is not important here }
  995. a_load_reg_reg(list, OS_S64, size, dst, dst)
  996. end;
  997. OP_ROL:
  998. begin
  999. if (size in [OS_64, OS_S64]) then begin
  1000. list.concat(taicpu.op_reg_reg_reg_const(A_RLDCL, dst, src2, src1, 0));
  1001. end else if (size in [OS_32, OS_S32]) then begin
  1002. list.concat(taicpu.op_reg_reg_reg_const_const(A_RLWNM, dst, src2, src1, 0, 31));
  1003. end else begin
  1004. internalerror(2008091301);
  1005. end;
  1006. end;
  1007. OP_ROR:
  1008. begin
  1009. tmpreg := getintregister(current_asmdata.CurrAsmList, OS_INT);
  1010. list.concat(taicpu.op_reg_reg(A_NEG, tmpreg, src1));
  1011. if (size in [OS_64, OS_S64]) then begin
  1012. list.concat(taicpu.op_reg_reg_reg_const(A_RLDCL, dst, src2, tmpreg, 0));
  1013. end else if (size in [OS_32, OS_S32]) then begin
  1014. list.concat(taicpu.op_reg_reg_reg_const_const(A_RLWNM, dst, src2, tmpreg, 0, 31));
  1015. end else begin
  1016. internalerror(2008091302);
  1017. end;
  1018. end;
  1019. else
  1020. if (size in [OS_64, OS_S64]) then begin
  1021. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
  1022. src1));
  1023. end else begin
  1024. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
  1025. src1));
  1026. maybeadjustresult(list, op, size, dst);
  1027. end;
  1028. end;
  1029. end;
  1030. {*************** compare instructructions ****************}
  1031. procedure tcgppc.a_cmp_const_reg_label(list: TAsmList; size: tcgsize;
  1032. cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
  1033. const
  1034. { unsigned useconst 32bit-op }
  1035. cmpop_table : array[boolean, boolean, boolean] of TAsmOp = (
  1036. ((A_CMPD, A_CMPW), (A_CMPDI, A_CMPWI)),
  1037. ((A_CMPLD, A_CMPLW), (A_CMPLDI, A_CMPLWI))
  1038. );
  1039. var
  1040. tmpreg : TRegister;
  1041. signed, useconst : boolean;
  1042. opsize : TCgSize;
  1043. op : TAsmOp;
  1044. begin
  1045. {$IFDEF EXTDEBUG}
  1046. list.concat(tai_comment.create(strpnew('a_cmp_const_reg_label ' + cgsize2string(size) + ' ' + booltostr(cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE]) + ' ' + inttostr(a) )));
  1047. {$ENDIF EXTDEBUG}
  1048. signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
  1049. { in the following case, we generate more efficient code when
  1050. signed is true }
  1051. if (cmp_op in [OC_EQ, OC_NE]) and
  1052. (aword(a) > $FFFF) then
  1053. signed := true;
  1054. opsize := size;
  1055. { do we need to change the operand size because ppc64 only supports 32 and
  1056. 64 bit compares? }
  1057. if (not (size in [OS_32, OS_S32, OS_64, OS_S64])) then begin
  1058. if (signed) then
  1059. opsize := OS_S32
  1060. else
  1061. opsize := OS_32;
  1062. a_load_reg_reg(current_asmdata.CurrAsmList, size, opsize, reg, reg);
  1063. end;
  1064. { can we use immediate compares? }
  1065. useconst := (signed and ( (a >= low(smallint)) and (a <= high(smallint)))) or
  1066. ((not signed) and (aword(a) <= $FFFF));
  1067. op := cmpop_table[not signed, useconst, opsize in [OS_32, OS_S32]];
  1068. if (useconst) then begin
  1069. list.concat(taicpu.op_reg_reg_const(op, NR_CR0, reg, a));
  1070. end else begin
  1071. tmpreg := getintregister(current_asmdata.CurrAsmList, OS_INT);
  1072. a_load_const_reg(current_asmdata.CurrAsmList, opsize, a, tmpreg);
  1073. list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg, tmpreg));
  1074. end;
  1075. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1076. end;
  1077. procedure tcgppc.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize;
  1078. cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
  1079. var
  1080. op: tasmop;
  1081. begin
  1082. {$IFDEF extdebug}
  1083. list.concat(tai_comment.create(strpnew('a_cmp_reg_reg_label, size ' + cgsize2string(size) + ' op ' + inttostr(ord(cmp_op)))));
  1084. {$ENDIF extdebug}
  1085. {$note Commented out below check because of compiler weirdness}
  1086. {
  1087. if (not (size in [OS_32, OS_S32, OS_64, OS_S64])) then
  1088. internalerror(200606041);
  1089. }
  1090. if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
  1091. if (size in [OS_64, OS_S64]) then
  1092. op := A_CMPD
  1093. else
  1094. op := A_CMPW
  1095. else
  1096. if (size in [OS_64, OS_S64]) then
  1097. op := A_CMPLD
  1098. else
  1099. op := A_CMPLW;
  1100. list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1));
  1101. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1102. end;
  1103. procedure tcgppc.a_jmp_name_direct(list : TAsmList; s : string; prependDot : boolean);
  1104. var
  1105. p: taicpu;
  1106. begin
  1107. if (prependDot) then
  1108. s := '.' + s;
  1109. p := taicpu.op_sym(A_B, current_asmdata.RefAsmSymbol(s));
  1110. p.is_jmp := true;
  1111. list.concat(p)
  1112. end;
  1113. procedure tcgppc.a_jmp_name(list: TAsmList; const s: string);
  1114. var
  1115. p: taicpu;
  1116. begin
  1117. if (target_info.system = system_powerpc64_darwin) then
  1118. begin
  1119. p := taicpu.op_sym(A_B,get_darwin_call_stub(s,false));
  1120. p.is_jmp := true;
  1121. list.concat(p)
  1122. end
  1123. else
  1124. a_jmp_name_direct(list, s, true);
  1125. end;
  1126. procedure tcgppc.a_jmp_always(list: TAsmList; l: tasmlabel);
  1127. begin
  1128. a_jmp(list, A_B, C_None, 0, l);
  1129. end;
  1130. procedure tcgppc.a_jmp_flags(list: TAsmList; const f: TResFlags; l:
  1131. tasmlabel);
  1132. var
  1133. c: tasmcond;
  1134. begin
  1135. c := flags_to_cond(f);
  1136. a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l);
  1137. end;
  1138. procedure tcgppc.g_flags2reg(list: TAsmList; size: TCgSize; const f:
  1139. TResFlags; reg: TRegister);
  1140. var
  1141. testbit: byte;
  1142. bitvalue: boolean;
  1143. begin
  1144. { get the bit to extract from the conditional register + its requested value (0 or 1) }
  1145. testbit := ((f.cr - RS_CR0) * 4);
  1146. case f.flag of
  1147. F_EQ, F_NE:
  1148. begin
  1149. inc(testbit, 2);
  1150. bitvalue := f.flag = F_EQ;
  1151. end;
  1152. F_LT, F_GE:
  1153. begin
  1154. bitvalue := f.flag = F_LT;
  1155. end;
  1156. F_GT, F_LE:
  1157. begin
  1158. inc(testbit);
  1159. bitvalue := f.flag = F_GT;
  1160. end;
  1161. else
  1162. internalerror(200112261);
  1163. end;
  1164. { load the conditional register in the destination reg }
  1165. list.concat(taicpu.op_reg(A_MFCR, reg));
  1166. { we will move the bit that has to be tested to bit 0 by rotating left }
  1167. testbit := (testbit + 1) and 31;
  1168. { extract bit }
  1169. list.concat(taicpu.op_reg_reg_const_const_const(
  1170. A_RLWINM,reg,reg,testbit,31,31));
  1171. { if we need the inverse, xor with 1 }
  1172. if not bitvalue then
  1173. list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1));
  1174. end;
  1175. { *********** entry/exit code and address loading ************ }
  1176. procedure tcgppc.g_save_registers(list: TAsmList);
  1177. begin
  1178. { this work is done in g_proc_entry; additionally it is not safe
  1179. to use it because it is called at some weird time }
  1180. end;
  1181. procedure tcgppc.g_restore_registers(list: TAsmList);
  1182. begin
  1183. { this work is done in g_proc_exit; mainly because it is not safe to
  1184. put the register restore code here because it is called at some weird time }
  1185. end;
  1186. procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  1187. var
  1188. reg : TSuperRegister;
  1189. begin
  1190. fprcount := 0;
  1191. firstfpr := RS_F31;
  1192. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1193. for reg := RS_F14 to RS_F31 do
  1194. if reg in rg[R_FPUREGISTER].used_in_proc then begin
  1195. fprcount := ord(RS_F31)-ord(reg)+1;
  1196. firstfpr := reg;
  1197. break;
  1198. end;
  1199. end;
  1200. procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  1201. var
  1202. reg : TSuperRegister;
  1203. begin
  1204. gprcount := 0;
  1205. firstgpr := RS_R31;
  1206. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1207. for reg := RS_R14 to RS_R31 do
  1208. if reg in rg[R_INTREGISTER].used_in_proc then begin
  1209. gprcount := ord(RS_R31)-ord(reg)+1;
  1210. firstgpr := reg;
  1211. break;
  1212. end;
  1213. end;
  1214. procedure tcgppc.profilecode_savepara(para : tparavarsym; list : TAsmList);
  1215. begin
  1216. case (para.paraloc[calleeside].location^.loc) of
  1217. LOC_REGISTER, LOC_CREGISTER:
  1218. a_load_reg_ref(list, OS_INT, para.paraloc[calleeside].Location^.size,
  1219. para.paraloc[calleeside].Location^.register, para.localloc.reference);
  1220. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  1221. a_loadfpu_reg_ref(list, para.paraloc[calleeside].Location^.size,
  1222. para.paraloc[calleeside].Location^.size,
  1223. para.paraloc[calleeside].Location^.register, para.localloc.reference);
  1224. LOC_MMREGISTER, LOC_CMMREGISTER:
  1225. { not supported }
  1226. internalerror(2006041801);
  1227. end;
  1228. end;
  1229. procedure tcgppc.profilecode_restorepara(para : tparavarsym; list : TAsmList);
  1230. begin
  1231. case (para.paraloc[calleeside].Location^.loc) of
  1232. LOC_REGISTER, LOC_CREGISTER:
  1233. a_load_ref_reg(list, para.paraloc[calleeside].Location^.size, OS_INT,
  1234. para.localloc.reference, para.paraloc[calleeside].Location^.register);
  1235. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  1236. a_loadfpu_ref_reg(list, para.paraloc[calleeside].Location^.size,
  1237. para.paraloc[calleeside].Location^.size,
  1238. para.localloc.reference, para.paraloc[calleeside].Location^.register);
  1239. LOC_MMREGISTER, LOC_CMMREGISTER:
  1240. { not supported }
  1241. internalerror(2006041802);
  1242. end;
  1243. end;
  1244. procedure tcgppc.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: aint);
  1245. var
  1246. hsym : tsym;
  1247. href : treference;
  1248. paraloc : Pcgparalocation;
  1249. begin
  1250. if ((ioffset >= low(smallint)) and (ioffset < high(smallint))) then begin
  1251. { the original method can handle this }
  1252. inherited g_adjust_self_value(list, procdef, ioffset);
  1253. exit;
  1254. end;
  1255. { calculate the parameter info for the procdef }
  1256. procdef.init_paraloc_info(callerside);
  1257. hsym:=tsym(procdef.parast.Find('self'));
  1258. if not(assigned(hsym) and
  1259. (hsym.typ=paravarsym)) then
  1260. internalerror(2010103101);
  1261. paraloc:=tparavarsym(hsym).paraloc[callerside].location;
  1262. while paraloc<>nil do
  1263. with paraloc^ do begin
  1264. case loc of
  1265. LOC_REGISTER:
  1266. begin
  1267. a_load_const_reg(list, size, ioffset, NR_R11);
  1268. a_op_reg_reg(list, OP_SUB, size, NR_R11, register);
  1269. end else
  1270. internalerror(2010103102);
  1271. end;
  1272. paraloc:=next;
  1273. end;
  1274. end;
  1275. procedure tcgppc.g_profilecode(list: TAsmList);
  1276. begin
  1277. current_procinfo.procdef.paras.ForEachCall(TObjectListCallback(@profilecode_savepara), list);
  1278. a_call_name_direct(list, '_mcount', false, false, true);
  1279. current_procinfo.procdef.paras.ForEachCall(TObjectListCallback(@profilecode_restorepara), list);
  1280. end;
  1281. { Generates the entry code of a procedure/function.
  1282. This procedure may be called before, as well as after g_return_from_proc
  1283. is called. localsize is the sum of the size necessary for local variables
  1284. and the maximum possible combined size of ALL the parameters of a procedure
  1285. called by the current one
  1286. IMPORTANT: registers are not to be allocated through the register
  1287. allocator here, because the register colouring has already occured !!
  1288. }
  1289. procedure tcgppc.g_proc_entry(list: TAsmList; localsize: longint;
  1290. nostackframe: boolean);
  1291. var
  1292. firstregfpu, firstreggpr: TSuperRegister;
  1293. needslinkreg: boolean;
  1294. fprcount, gprcount : aint;
  1295. { Save standard registers, both FPR and GPR; does not support VMX/Altivec }
  1296. procedure save_standard_registers;
  1297. var
  1298. regcount : TSuperRegister;
  1299. href : TReference;
  1300. mayNeedLRStore : boolean;
  1301. begin
  1302. { there are two ways to do this: manually, by generating a few "std" instructions,
  1303. or via the restore helper functions. The latter are selected by the -Og switch,
  1304. i.e. "optimize for size" }
  1305. if (cs_opt_size in current_settings.optimizerswitches) and
  1306. (target_info.system <> system_powerpc64_darwin) then begin
  1307. mayNeedLRStore := false;
  1308. if ((fprcount > 0) and (gprcount > 0)) then begin
  1309. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1310. a_call_name_direct(list, '_savegpr1_' + intToStr(32-gprcount), false, false, false, false);
  1311. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false, false);
  1312. end else if (gprcount > 0) then
  1313. a_call_name_direct(list, '_savegpr0_' + intToStr(32-gprcount), false, false, false, false)
  1314. else if (fprcount > 0) then
  1315. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false, false)
  1316. else
  1317. mayNeedLRStore := true;
  1318. end else begin
  1319. { save registers, FPU first, then GPR }
  1320. reference_reset_base(href, NR_STACK_POINTER_REG, -8, 8);
  1321. if (fprcount > 0) then
  1322. for regcount := RS_F31 downto firstregfpu do begin
  1323. a_loadfpu_reg_ref(list, OS_FLOAT, OS_FLOAT, newreg(R_FPUREGISTER,
  1324. regcount, R_SUBNONE), href);
  1325. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1326. end;
  1327. if (gprcount > 0) then
  1328. for regcount := RS_R31 downto firstreggpr do begin
  1329. a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount,
  1330. R_SUBNONE), href);
  1331. dec(href.offset, sizeof(pint));
  1332. end;
  1333. { VMX registers not supported by FPC atm }
  1334. { in this branch we always need to store LR ourselves}
  1335. mayNeedLRStore := true;
  1336. end;
  1337. { we may need to store R0 (=LR) ourselves }
  1338. if ((cs_profile in init_settings.moduleswitches) or (mayNeedLRStore)) and (needslinkreg) then begin
  1339. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF, 8);
  1340. list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
  1341. end;
  1342. end;
  1343. var
  1344. href: treference;
  1345. begin
  1346. calcFirstUsedFPR(firstregfpu, fprcount);
  1347. calcFirstUsedGPR(firstreggpr, gprcount);
  1348. { calculate real stack frame size }
  1349. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1350. gprcount, fprcount);
  1351. { determine whether we need to save the link register }
  1352. needslinkreg :=
  1353. not(nostackframe) and
  1354. (save_lr_in_prologue or
  1355. ((cs_opt_size in current_settings.optimizerswitches) and
  1356. ((fprcount > 0) or
  1357. (gprcount > 0))));
  1358. a_reg_alloc(list, NR_STACK_POINTER_REG);
  1359. a_reg_alloc(list, NR_R0);
  1360. { move link register to r0 }
  1361. if (needslinkreg) then
  1362. list.concat(taicpu.op_reg(A_MFLR, NR_R0));
  1363. save_standard_registers;
  1364. { save old stack frame pointer }
  1365. if (tppcprocinfo(current_procinfo).needs_frame_pointer) then begin
  1366. a_reg_alloc(list, NR_OLD_STACK_POINTER_REG);
  1367. list.concat(taicpu.op_reg_reg(A_MR, NR_OLD_STACK_POINTER_REG, NR_STACK_POINTER_REG));
  1368. end;
  1369. { create stack frame }
  1370. if (not nostackframe) and (localsize > 0) and
  1371. tppcprocinfo(current_procinfo).needstackframe then begin
  1372. if (localsize <= high(smallint)) then begin
  1373. reference_reset_base(href, NR_STACK_POINTER_REG, -localsize, 8);
  1374. a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href);
  1375. end else begin
  1376. reference_reset_base(href, NR_NO, -localsize, 8);
  1377. { Use R0 for loading the constant (which is definitely > 32k when entering
  1378. this branch).
  1379. Inlined at this position because it must not use temp registers because
  1380. register allocations have already been done }
  1381. { Code template:
  1382. lis r0,ofs@highest
  1383. ori r0,r0,ofs@higher
  1384. sldi r0,r0,32
  1385. oris r0,r0,ofs@h
  1386. ori r0,r0,ofs@l
  1387. }
  1388. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1389. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1390. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1391. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1392. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1393. list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
  1394. end;
  1395. end;
  1396. { CR register not used by FPC atm }
  1397. { keep R1 allocated??? }
  1398. a_reg_dealloc(list, NR_R0);
  1399. end;
  1400. { Generates the exit code for a method.
  1401. This procedure may be called before, as well as after g_stackframe_entry
  1402. is called.
  1403. IMPORTANT: registers are not to be allocated through the register
  1404. allocator here, because the register colouring has already occured !!
  1405. }
  1406. procedure tcgppc.g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
  1407. boolean);
  1408. var
  1409. firstregfpu, firstreggpr: TSuperRegister;
  1410. needslinkreg : boolean;
  1411. fprcount, gprcount: aint;
  1412. { Restore standard registers, both FPR and GPR; does not support VMX/Altivec }
  1413. procedure restore_standard_registers;
  1414. var
  1415. { flag indicating whether we need to manually add the exit code (e.g. blr instruction)
  1416. or not }
  1417. needsExitCode : Boolean;
  1418. href : treference;
  1419. regcount : TSuperRegister;
  1420. begin
  1421. { there are two ways to do this: manually, by generating a few "ld" instructions,
  1422. or via the restore helper functions. The latter are selected by the -Og switch,
  1423. i.e. "optimize for size" }
  1424. if (cs_opt_size in current_settings.optimizerswitches) then begin
  1425. needsExitCode := false;
  1426. if ((fprcount > 0) and (gprcount > 0)) then begin
  1427. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1428. a_call_name_direct(list, '_restgpr1_' + intToStr(32-gprcount), false, false, false, false);
  1429. a_jmp_name_direct(list, '_restfpr_' + intToStr(32-fprcount), false);
  1430. end else if (gprcount > 0) then
  1431. a_jmp_name_direct(list, '_restgpr0_' + intToStr(32-gprcount), false)
  1432. else if (fprcount > 0) then
  1433. a_jmp_name_direct(list, '_restfpr_' + intToStr(32-fprcount), false)
  1434. else
  1435. needsExitCode := true;
  1436. end else begin
  1437. needsExitCode := true;
  1438. { restore registers, FPU first, GPR next }
  1439. reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT], 8);
  1440. if (fprcount > 0) then
  1441. for regcount := RS_F31 downto firstregfpu do begin
  1442. a_loadfpu_ref_reg(list, OS_FLOAT, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount,
  1443. R_SUBNONE));
  1444. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1445. end;
  1446. if (gprcount > 0) then
  1447. for regcount := RS_R31 downto firstreggpr do begin
  1448. a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount,
  1449. R_SUBNONE));
  1450. dec(href.offset, sizeof(pint));
  1451. end;
  1452. { VMX not supported by FPC atm }
  1453. end;
  1454. if (needsExitCode) then begin
  1455. { restore LR (if needed) }
  1456. if (needslinkreg) then begin
  1457. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF, 8);
  1458. list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
  1459. list.concat(taicpu.op_reg(A_MTLR, NR_R0));
  1460. end;
  1461. { generate return instruction }
  1462. list.concat(taicpu.op_none(A_BLR));
  1463. end;
  1464. end;
  1465. var
  1466. href: treference;
  1467. localsize : aint;
  1468. begin
  1469. calcFirstUsedFPR(firstregfpu, fprcount);
  1470. calcFirstUsedGPR(firstreggpr, gprcount);
  1471. { determine whether we need to restore the link register }
  1472. needslinkreg :=
  1473. not(nostackframe) and
  1474. (((not (po_assembler in current_procinfo.procdef.procoptions)) and
  1475. ((pi_do_call in current_procinfo.flags) or (cs_profile in init_settings.moduleswitches))) or
  1476. ((cs_opt_size in current_settings.optimizerswitches) and ((fprcount > 0) or (gprcount > 0))) or
  1477. ([cs_lineinfo, cs_debuginfo] * current_settings.moduleswitches <> []));
  1478. { calculate stack frame }
  1479. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1480. gprcount, fprcount);
  1481. { CR register not supported }
  1482. { restore stack pointer }
  1483. if (not nostackframe) and (localsize > 0) and
  1484. tppcprocinfo(current_procinfo).needstackframe then begin
  1485. if (localsize <= high(smallint)) then begin
  1486. list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
  1487. end else begin
  1488. reference_reset_base(href, NR_NO, localsize, 8);
  1489. { use R0 for loading the constant (which is definitely > 32k when entering
  1490. this branch)
  1491. Inlined because it must not use temp registers because register allocations
  1492. have already been done
  1493. }
  1494. { Code template:
  1495. lis r0,ofs@highest
  1496. ori r0,ofs@higher
  1497. sldi r0,r0,32
  1498. oris r0,r0,ofs@h
  1499. ori r0,r0,ofs@l
  1500. }
  1501. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1502. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1503. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1504. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1505. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1506. list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0));
  1507. end;
  1508. end;
  1509. restore_standard_registers;
  1510. end;
  1511. procedure tcgppc.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
  1512. tregister);
  1513. var
  1514. ref2, tmpref: treference;
  1515. { register used to construct address }
  1516. tempreg : TRegister;
  1517. begin
  1518. if (target_info.system = system_powerpc64_darwin) then
  1519. begin
  1520. inherited a_loadaddr_ref_reg(list,ref,r);
  1521. exit;
  1522. end;
  1523. ref2 := ref;
  1524. fixref(list, ref2);
  1525. { load a symbol }
  1526. if (assigned(ref2.symbol) or (hasLargeOffset(ref2))) then begin
  1527. { add the symbol's value to the base of the reference, and if the }
  1528. { reference doesn't have a base, create one }
  1529. reference_reset(tmpref, ref2.alignment);
  1530. tmpref.offset := ref2.offset;
  1531. tmpref.symbol := ref2.symbol;
  1532. tmpref.relsymbol := ref2.relsymbol;
  1533. { load 64 bit reference into r. If the reference already has a base register,
  1534. first load the 64 bit value into a temp register, then add it to the result
  1535. register rD }
  1536. if (ref2.base <> NR_NO) then begin
  1537. { already have a base register, so allocate a new one }
  1538. tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1539. end else begin
  1540. tempreg := r;
  1541. end;
  1542. { code for loading a reference from a symbol into a register rD }
  1543. (*
  1544. lis rX,SYM@highest
  1545. ori rX,SYM@higher
  1546. sldi rX,rX,32
  1547. oris rX,rX,SYM@h
  1548. ori rX,rX,SYM@l
  1549. *)
  1550. {$IFDEF EXTDEBUG}
  1551. list.concat(tai_comment.create(strpnew('loadaddr_ref_reg ')));
  1552. {$ENDIF EXTDEBUG}
  1553. if (assigned(tmpref.symbol)) then begin
  1554. tmpref.refaddr := addr_highest;
  1555. list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref));
  1556. tmpref.refaddr := addr_higher;
  1557. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1558. list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32));
  1559. tmpref.refaddr := addr_high;
  1560. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref));
  1561. tmpref.refaddr := addr_low;
  1562. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1563. end else
  1564. a_load_const_reg(list, OS_ADDR, tmpref.offset, tempreg);
  1565. { if there's already a base register, add the temp register contents to
  1566. the base register }
  1567. if (ref2.base <> NR_NO) then begin
  1568. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
  1569. end;
  1570. end else if (ref2.offset <> 0) then begin
  1571. { no symbol, but offset <> 0 }
  1572. if (ref2.base <> NR_NO) then begin
  1573. a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
  1574. { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
  1575. occurs, so now only ref.offset has to be loaded }
  1576. end else begin
  1577. a_load_const_reg(list, OS_64, ref2.offset, r);
  1578. end;
  1579. end else if (ref2.index <> NR_NO) then begin
  1580. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index))
  1581. end else if (ref2.base <> NR_NO) and
  1582. (r <> ref2.base) then begin
  1583. a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r)
  1584. end else begin
  1585. list.concat(taicpu.op_reg_const(A_LI, r, 0));
  1586. end;
  1587. end;
  1588. { ************* concatcopy ************ }
  1589. procedure tcgppc.g_concatcopy(list: TAsmList; const source, dest: treference;
  1590. len: aint);
  1591. var
  1592. countreg, tempreg:TRegister;
  1593. src, dst: TReference;
  1594. lab: tasmlabel;
  1595. count, count2, step: longint;
  1596. size: tcgsize;
  1597. begin
  1598. {$IFDEF extdebug}
  1599. if len > high(aint) then
  1600. internalerror(2002072704);
  1601. list.concat(tai_comment.create(strpnew('g_concatcopy1 ' + inttostr(len) + ' bytes left ')));
  1602. {$ENDIF extdebug}
  1603. { if the references are equal, exit, there is no need to copy anything }
  1604. if references_equal(source, dest) or
  1605. (len=0) then
  1606. exit;
  1607. { make sure short loads are handled as optimally as possible;
  1608. note that the data here never overlaps, so we can do a forward
  1609. copy at all times.
  1610. NOTE: maybe use some scratch registers to pair load/store instructions
  1611. }
  1612. if (len <= 8) then begin
  1613. src := source; dst := dest;
  1614. {$IFDEF extdebug}
  1615. list.concat(tai_comment.create(strpnew('g_concatcopy3 ' + inttostr(src.offset) + ' ' + inttostr(dst.offset))));
  1616. {$ENDIF extdebug}
  1617. while (len <> 0) do begin
  1618. if (len = 8) then begin
  1619. a_load_ref_ref(list, OS_64, OS_64, src, dst);
  1620. dec(len, 8);
  1621. end else if (len >= 4) then begin
  1622. a_load_ref_ref(list, OS_32, OS_32, src, dst);
  1623. inc(src.offset, 4); inc(dst.offset, 4);
  1624. dec(len, 4);
  1625. end else if (len >= 2) then begin
  1626. a_load_ref_ref(list, OS_16, OS_16, src, dst);
  1627. inc(src.offset, 2); inc(dst.offset, 2);
  1628. dec(len, 2);
  1629. end else begin
  1630. a_load_ref_ref(list, OS_8, OS_8, src, dst);
  1631. inc(src.offset, 1); inc(dst.offset, 1);
  1632. dec(len, 1);
  1633. end;
  1634. end;
  1635. exit;
  1636. end;
  1637. {$IFDEF extdebug}
  1638. list.concat(tai_comment.create(strpnew('g_concatcopy2 ' + inttostr(len) + ' bytes left ')));
  1639. {$ENDIF extdebug}
  1640. if not(source.alignment in [1,2]) and
  1641. not(dest.alignment in [1,2]) then
  1642. begin
  1643. count:=len div 8;
  1644. step:=8;
  1645. size:=OS_64;
  1646. end
  1647. else
  1648. begin
  1649. count:=len div 4;
  1650. step:=4;
  1651. size:=OS_32;
  1652. end;
  1653. tempreg:=getintregister(list,size);
  1654. reference_reset(src,source.alignment);
  1655. reference_reset(dst,dest.alignment);
  1656. { load the address of source into src.base }
  1657. if (count > 4) or
  1658. not issimpleref(source) or
  1659. ((source.index <> NR_NO) and
  1660. ((source.offset + len) > high(smallint))) then begin
  1661. src.base := getaddressregister(list);
  1662. a_loadaddr_ref_reg(list, source, src.base);
  1663. end else begin
  1664. src := source;
  1665. end;
  1666. { load the address of dest into dst.base }
  1667. if (count > 4) or
  1668. not issimpleref(dest) or
  1669. ((dest.index <> NR_NO) and
  1670. ((dest.offset + len) > high(smallint))) then begin
  1671. dst.base := getaddressregister(list);
  1672. a_loadaddr_ref_reg(list, dest, dst.base);
  1673. end else begin
  1674. dst := dest;
  1675. end;
  1676. { generate a loop }
  1677. if count > 4 then begin
  1678. { the offsets are zero after the a_loadaddress_ref_reg and just
  1679. have to be set to step. I put an Inc there so debugging may be
  1680. easier (should offset be different from zero here, it will be
  1681. easy to notice in the generated assembler }
  1682. inc(dst.offset, step);
  1683. inc(src.offset, step);
  1684. list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, step));
  1685. list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, step));
  1686. countreg := getintregister(list, OS_INT);
  1687. a_load_const_reg(list, OS_INT, count, countreg);
  1688. current_asmdata.getjumplabel(lab);
  1689. a_label(list, lab);
  1690. list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
  1691. if (size=OS_64) then
  1692. begin
  1693. list.concat(taicpu.op_reg_ref(A_LDU, tempreg, src));
  1694. list.concat(taicpu.op_reg_ref(A_STDU, tempreg, dst));
  1695. end
  1696. else
  1697. begin
  1698. list.concat(taicpu.op_reg_ref(A_LWZU, tempreg, src));
  1699. list.concat(taicpu.op_reg_ref(A_STWU, tempreg, dst));
  1700. end;
  1701. a_jmp(list, A_BC, C_NE, 0, lab);
  1702. a_reg_sync(list,src.base);
  1703. a_reg_sync(list,dst.base);
  1704. a_reg_sync(list,countreg);
  1705. len := len mod step;
  1706. count := 0;
  1707. end;
  1708. { unrolled loop }
  1709. if count > 0 then begin
  1710. for count2 := 1 to count do begin
  1711. a_load_ref_reg(list, size, size, src, tempreg);
  1712. a_load_reg_ref(list, size, size, tempreg, dst);
  1713. inc(src.offset, step);
  1714. inc(dst.offset, step);
  1715. end;
  1716. len := len mod step;
  1717. end;
  1718. if (len and 4) <> 0 then begin
  1719. a_load_ref_reg(list, OS_32, OS_32, src, tempreg);
  1720. a_load_reg_ref(list, OS_32, OS_32, tempreg, dst);
  1721. inc(src.offset, 4);
  1722. inc(dst.offset, 4);
  1723. end;
  1724. { copy the leftovers }
  1725. if (len and 2) <> 0 then begin
  1726. a_load_ref_reg(list, OS_16, OS_16, src, tempreg);
  1727. a_load_reg_ref(list, OS_16, OS_16, tempreg, dst);
  1728. inc(src.offset, 2);
  1729. inc(dst.offset, 2);
  1730. end;
  1731. if (len and 1) <> 0 then begin
  1732. a_load_ref_reg(list, OS_8, OS_8, src, tempreg);
  1733. a_load_reg_ref(list, OS_8, OS_8, tempreg, dst);
  1734. end;
  1735. end;
  1736. procedure tcgppc.g_external_wrapper(list: TAsmList; pd: TProcDef; const externalname: string);
  1737. var
  1738. href : treference;
  1739. begin
  1740. if (target_info.system <> system_powerpc64_linux) then begin
  1741. inherited;
  1742. exit;
  1743. end;
  1744. { for ppc64/linux emit correct code which sets up a stack frame and then calls the
  1745. external method normally to ensure that the GOT/TOC will be loaded correctly if
  1746. required.
  1747. It's not really advantageous to use cg methods here because they are too specialized.
  1748. I.e. the resulting code sequence looks as follows:
  1749. mflr r0
  1750. std r0, 16(r1)
  1751. stdu r1, -112(r1)
  1752. bl <external_method>
  1753. nop
  1754. addi r1, r1, 112
  1755. ld r0, 16(r1)
  1756. mtlr r0
  1757. blr
  1758. }
  1759. list.concat(taicpu.op_reg(A_MFLR, NR_R0));
  1760. reference_reset_base(href, NR_STACK_POINTER_REG, 16, 8);
  1761. list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
  1762. reference_reset_base(href, NR_STACK_POINTER_REG, -MINIMUM_STACKFRAME_SIZE, 8);
  1763. list.concat(taicpu.op_reg_ref(A_STDU, NR_STACK_POINTER_REG, href));
  1764. list.concat(taicpu.op_sym(A_BL, current_asmdata.RefAsmSymbol(externalname)));
  1765. list.concat(taicpu.op_none(A_NOP));
  1766. list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, MINIMUM_STACKFRAME_SIZE));
  1767. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF, 8);
  1768. list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
  1769. list.concat(taicpu.op_reg(A_MTLR, NR_R0));
  1770. list.concat(taicpu.op_none(A_BLR));
  1771. end;
  1772. {***************** This is private property, keep out! :) *****************}
  1773. procedure tcgppc.maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
  1774. const
  1775. overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
  1776. begin
  1777. {$IFDEF EXTDEBUG}
  1778. list.concat(tai_comment.create(strpnew('maybeadjustresult op = ' + cgop2string(op) + ' size = ' + cgsize2string(size))));
  1779. {$ENDIF EXTDEBUG}
  1780. if (op in overflowops) and (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32]) then
  1781. a_load_reg_reg(list, OS_64, size, dst, dst);
  1782. end;
  1783. function tcgppc.issimpleref(const ref: treference): boolean;
  1784. begin
  1785. if (ref.base = NR_NO) and
  1786. (ref.index <> NR_NO) then
  1787. internalerror(200208101);
  1788. result :=
  1789. not (assigned(ref.symbol)) and
  1790. (((ref.index = NR_NO) and
  1791. (ref.offset >= low(smallint)) and
  1792. (ref.offset <= high(smallint))) or
  1793. ((ref.index <> NR_NO) and
  1794. (ref.offset = 0)));
  1795. end;
  1796. procedure tcgppc.a_load_store(list: TAsmList; op: tasmop; reg: tregister;
  1797. ref: treference);
  1798. procedure maybefixup64bitoffset;
  1799. var
  1800. tmpreg: tregister;
  1801. begin
  1802. { for some instructions we need to check that the offset is divisible by at
  1803. least four. If not, add the bytes which are "off" to the base register and
  1804. adjust the offset accordingly }
  1805. case op of
  1806. A_LD, A_LDU, A_STD, A_STDU, A_LWA :
  1807. if ((ref.offset mod 4) <> 0) then begin
  1808. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1809. if (ref.base <> NR_NO) then begin
  1810. a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
  1811. ref.base := tmpreg;
  1812. end else begin
  1813. list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
  1814. ref.base := tmpreg;
  1815. end;
  1816. ref.offset := (ref.offset div 4) * 4;
  1817. end;
  1818. end;
  1819. end;
  1820. var
  1821. tmpreg, tmpreg2: tregister;
  1822. tmpref: treference;
  1823. largeOffset: Boolean;
  1824. begin
  1825. if (target_info.system = system_powerpc64_darwin) then
  1826. begin
  1827. { darwin/ppc64 works with 32 bit relocatable symbol addresses }
  1828. maybefixup64bitoffset;
  1829. inherited a_load_store(list,op,reg,ref);
  1830. exit
  1831. end;
  1832. { at this point there must not be a combination of values in the ref treference
  1833. which is not possible to directly map to instructions of the PowerPC architecture }
  1834. if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
  1835. internalerror(200310131);
  1836. { if this is a PIC'ed address, handle it and exit }
  1837. if (ref.refaddr = addr_pic) then begin
  1838. if (ref.offset <> 0) then
  1839. internalerror(2006010501);
  1840. if (ref.index <> NR_NO) then
  1841. internalerror(2006010502);
  1842. if (not assigned(ref.symbol)) then
  1843. internalerror(200601050);
  1844. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1845. exit;
  1846. end;
  1847. maybefixup64bitoffset;
  1848. {$IFDEF EXTDEBUG}
  1849. list.concat(tai_comment.create(strpnew('a_load_store1 ' + BoolToStr(ref.refaddr = addr_pic))));
  1850. {$ENDIF EXTDEBUG}
  1851. { if we have to load/store from a symbol or large addresses, use a temporary register
  1852. containing the address }
  1853. if (assigned(ref.symbol) or (hasLargeOffset(ref))) then begin
  1854. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1855. if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
  1856. ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1857. a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
  1858. ref.offset := 0;
  1859. end;
  1860. reference_reset(tmpref, ref.alignment);
  1861. tmpref.symbol := ref.symbol;
  1862. tmpref.relsymbol := ref.relsymbol;
  1863. tmpref.offset := ref.offset;
  1864. if (ref.base <> NR_NO) then begin
  1865. { As long as the TOC isn't working we try to achieve highest speed (in this
  1866. case by allowing instructions execute in parallel) as possible at the cost
  1867. of using another temporary register. So the code template when there is
  1868. a base register and an offset is the following:
  1869. lis rT1, SYM+offs@highest
  1870. ori rT1, rT1, SYM+offs@higher
  1871. lis rT2, SYM+offs@hi
  1872. ori rT2, SYM+offs@lo
  1873. rldimi rT2, rT1, 32
  1874. <op>X reg, base, rT2
  1875. }
  1876. tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1877. if (assigned(tmpref.symbol)) then begin
  1878. tmpref.refaddr := addr_highest;
  1879. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1880. tmpref.refaddr := addr_higher;
  1881. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1882. tmpref.refaddr := addr_high;
  1883. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
  1884. tmpref.refaddr := addr_low;
  1885. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
  1886. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
  1887. end else
  1888. a_load_const_reg(list, OS_ADDR, tmpref.offset, tmpreg2);
  1889. reference_reset(tmpref, ref.alignment);
  1890. tmpref.base := ref.base;
  1891. tmpref.index := tmpreg2;
  1892. case op of
  1893. { the code generator doesn't generate update instructions anyway, so
  1894. error out on those instructions }
  1895. A_LBZ : op := A_LBZX;
  1896. A_LHZ : op := A_LHZX;
  1897. A_LWZ : op := A_LWZX;
  1898. A_LD : op := A_LDX;
  1899. A_LHA : op := A_LHAX;
  1900. A_LWA : op := A_LWAX;
  1901. A_LFS : op := A_LFSX;
  1902. A_LFD : op := A_LFDX;
  1903. A_STB : op := A_STBX;
  1904. A_STH : op := A_STHX;
  1905. A_STW : op := A_STWX;
  1906. A_STD : op := A_STDX;
  1907. A_STFS : op := A_STFSX;
  1908. A_STFD : op := A_STFDX;
  1909. else
  1910. { unknown load/store opcode }
  1911. internalerror(2005101302);
  1912. end;
  1913. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1914. end else begin
  1915. { when accessing value from a reference without a base register, use the
  1916. following code template:
  1917. lis rT,SYM+offs@highesta
  1918. ori rT,SYM+offs@highera
  1919. sldi rT,rT,32
  1920. oris rT,rT,SYM+offs@ha
  1921. ld rD,SYM+offs@l(rT)
  1922. }
  1923. tmpref.refaddr := addr_highesta;
  1924. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1925. tmpref.refaddr := addr_highera;
  1926. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1927. list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32));
  1928. tmpref.refaddr := addr_higha;
  1929. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
  1930. tmpref.base := tmpreg;
  1931. tmpref.refaddr := addr_low;
  1932. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1933. end;
  1934. end else begin
  1935. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1936. end;
  1937. end;
  1938. procedure tcgppc.loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
  1939. var
  1940. l: tasmsymbol;
  1941. ref: treference;
  1942. symname : string;
  1943. begin
  1944. maybe_new_object_file(current_asmdata.asmlists[al_picdata]);
  1945. symname := '_$' + current_asmdata.name + '$toc$' + hexstr(a, sizeof(a)*2);
  1946. l:=current_asmdata.getasmsymbol(symname);
  1947. if not(assigned(l)) then begin
  1948. l:=current_asmdata.DefineAsmSymbol(symname,AB_GLOBAL, AT_DATA);
  1949. new_section(current_asmdata.asmlists[al_picdata],sec_toc, '.toc', 8);
  1950. current_asmdata.asmlists[al_picdata].concat(tai_symbol.create_global(l,0));
  1951. current_asmdata.asmlists[al_picdata].concat(tai_directive.create(asd_toc_entry, symname + '[TC], ' + inttostr(a)));
  1952. end;
  1953. reference_reset_symbol(ref,l,0, 8);
  1954. ref.base := NR_R2;
  1955. ref.refaddr := addr_no;
  1956. {$IFDEF EXTDEBUG}
  1957. list.concat(tai_comment.create(strpnew('loading value from TOC reference for ' + symname)));
  1958. {$ENDIF EXTDEBUG}
  1959. cg.a_load_ref_reg(list, OS_INT, OS_INT, ref, reg);
  1960. end;
  1961. procedure create_codegen;
  1962. begin
  1963. cg := tcgppc.create;
  1964. end;
  1965. end.