cgcpu.pas 77 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. This unit implements the code generator for the PowerPC
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$I fpcdefs.inc}
  19. interface
  20. uses
  21. globtype, symtype, symdef,
  22. cgbase, cgobj,
  23. aasmbase, aasmcpu, aasmtai,aasmdata,
  24. cpubase, cpuinfo, cgutils, rgcpu,
  25. parabase;
  26. type
  27. tcgppc = class(tcg)
  28. procedure init_register_allocators; override;
  29. procedure done_register_allocators; override;
  30. { passing parameters, per default the parameter is pushed }
  31. { nr gives the number of the parameter (enumerated from }
  32. { left to right), this allows to move the parameter to }
  33. { register, if the cpu supports register calling }
  34. { conventions }
  35. procedure a_param_const(list: TAsmList; size: tcgsize; a: aint; const
  36. paraloc: tcgpara); override;
  37. procedure a_param_ref(list: TAsmList; size: tcgsize; const r: treference;
  38. const paraloc: tcgpara); override;
  39. procedure a_paramaddr_ref(list: TAsmList; const r: treference; const
  40. paraloc: tcgpara); override;
  41. procedure a_call_name(list: TAsmList; const s: string); override;
  42. procedure a_call_reg(list: TAsmList; reg: tregister); override;
  43. procedure a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
  44. aint; reg: TRegister); override;
  45. procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
  46. dst: TRegister); override;
  47. procedure a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  48. size: tcgsize; a: aint; src, dst: tregister); override;
  49. procedure a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  50. size: tcgsize; src1, src2, dst: tregister); override;
  51. { move instructions }
  52. procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: aint; reg:
  53. tregister); override;
  54. { stores the contents of register reg to the memory location described by
  55. ref }
  56. procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg:
  57. tregister; const ref: treference); override;
  58. { loads the memory pointed to by ref into register reg }
  59. procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const
  60. Ref: treference; reg: tregister); override;
  61. procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1,
  62. reg2: tregister); override;
  63. { fpu move instructions }
  64. procedure a_loadfpu_reg_reg(list: TAsmList; size: tcgsize; reg1, reg2:
  65. tregister); override;
  66. procedure a_loadfpu_ref_reg(list: TAsmList; size: tcgsize; const ref:
  67. treference; reg: tregister); override;
  68. procedure a_loadfpu_reg_ref(list: TAsmList; size: tcgsize; reg:
  69. tregister; const ref: treference); override;
  70. { comparison operations }
  71. procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op:
  72. topcmp; a: aint; reg: tregister;
  73. l: tasmlabel); override;
  74. procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op:
  75. topcmp; reg1, reg2: tregister; l: tasmlabel); override;
  76. procedure a_jmp_name(list: TAsmList; const s: string); override;
  77. procedure a_jmp_always(list: TAsmList; l: tasmlabel); override;
  78. procedure a_jmp_flags(list: TAsmList; const f: TResFlags; l: tasmlabel);
  79. override;
  80. procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags;
  81. reg: TRegister); override;
  82. procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe:
  83. boolean); override;
  84. procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
  85. boolean); override;
  86. procedure g_save_standard_registers(list: TAsmList); override;
  87. procedure g_restore_standard_registers(list: TAsmList); override;
  88. procedure a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
  89. tregister); override;
  90. procedure g_concatcopy(list: TAsmList; const source, dest: treference;
  91. len: aint); override;
  92. procedure g_overflowcheck(list: TAsmList; const l: tlocation; def: tdef);
  93. override;
  94. procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);
  95. procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const
  96. labelname: string; ioffset: longint); override;
  97. private
  98. { Make sure ref is a valid reference for the PowerPC and sets the }
  99. { base to the value of the index if (base = R_NO). }
  100. { Returns true if the reference contained a base, index and an }
  101. { offset or symbol, in which case the base will have been changed }
  102. { to a tempreg (which has to be freed by the caller) containing }
  103. { the sum of part of the original reference }
  104. function fixref(list: TAsmList; var ref: treference; const size : TCgsize): boolean;
  105. function load_got_symbol(list : TAsmList; symbol : string) : tregister;
  106. { returns whether a reference can be used immediately in a powerpc }
  107. { instruction }
  108. function issimpleref(const ref: treference): boolean;
  109. { contains the common code of a_load_reg_ref and a_load_ref_reg }
  110. procedure a_load_store(list: TAsmList; op: tasmop; reg: tregister;
  111. ref: treference);
  112. { creates the correct branch instruction for a given combination }
  113. { of asmcondflags and destination addressing mode }
  114. procedure a_jmp(list: TAsmList; op: tasmop;
  115. c: tasmcondflag; crval: longint; l: tasmlabel);
  116. { returns the lowest numbered FP register in use, and the number of used FP registers
  117. for the current procedure }
  118. procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  119. { returns the lowest numbered GP register in use, and the number of used GP registers
  120. for the current procedure }
  121. procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  122. { returns true if the offset of the given reference can not be represented by a 16 bit
  123. immediate as required by some PowerPC instructions }
  124. function hasLargeOffset(const ref : TReference) : Boolean; inline;
  125. { generates code to call a method with the given string name. The boolean options
  126. control code generation. If prependDot is true, a single dot character is prepended to
  127. the string, if addNOP is true a single NOP instruction is added after the call, and
  128. if includeCall is true, the method is marked as having a call, not if false. This
  129. option is particularly useful to prevent generation of a larger stack frame for the
  130. register save and restore helper functions. }
  131. procedure a_call_name_direct(list: TAsmList; s: string; prependDot : boolean;
  132. addNOP : boolean; includeCall : boolean = true);
  133. { emits code to store the given value a into the TOC (if not already in there), and load it from there
  134. as well }
  135. procedure loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
  136. end;
  137. const
  138. TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
  139. (A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
  140. );
  141. TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT,
  142. C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT);
  143. implementation
  144. uses
  145. sysutils,
  146. globals, verbose, systems, cutils,
  147. symconst, symsym, fmodule,
  148. rgobj, tgobj, cpupi, procinfo, paramgr;
  149. function ref2string(const ref : treference) : string;
  150. begin
  151. result := 'base : ' + inttostr(ord(ref.base)) + ' index : ' + inttostr(ord(ref.index)) + ' refaddr : ' + inttostr(ord(ref.refaddr)) + ' offset : ' + inttostr(ref.offset) + ' symbol : ';
  152. if (assigned(ref.symbol)) then
  153. result := result + ref.symbol.name;
  154. end;
  155. { helper function which calculate "magic" values for replacement of unsigned
  156. division by constant operation by multiplication. See the PowerPC compiler
  157. developer manual for more information }
  158. procedure getmagic_unsignedN(const N : byte; const d : aWord;
  159. out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
  160. var
  161. p : aInt;
  162. nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
  163. begin
  164. assert(d > 0);
  165. two_N_minus_1 := aWord(1) shl (N-1);
  166. magic_add := false;
  167. nc := - 1 - (-d) mod d;
  168. p := N-1; { initialize p }
  169. q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
  170. r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
  171. q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
  172. r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
  173. repeat
  174. inc(p);
  175. if (r1 >= (nc - r1)) then begin
  176. q1 := 2 * q1 + 1; { update q1 }
  177. r1 := 2*r1 - nc; { update r1 }
  178. end else begin
  179. q1 := 2*q1; { update q1 }
  180. r1 := 2*r1; { update r1 }
  181. end;
  182. if ((r2 + 1) >= (d - r2)) then begin
  183. if (q2 >= (two_N_minus_1-1)) then
  184. magic_add := true;
  185. q2 := 2*q2 + 1; { update q2 }
  186. r2 := 2*r2 + 1 - d; { update r2 }
  187. end else begin
  188. if (q2 >= two_N_minus_1) then
  189. magic_add := true;
  190. q2 := 2*q2; { update q2 }
  191. r2 := 2*r2 + 1; { update r2 }
  192. end;
  193. delta := d - 1 - r2;
  194. until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
  195. magic_m := q2 + 1; { resulting magic number }
  196. magic_shift := p - N; { resulting shift }
  197. end;
  198. { helper function which calculate "magic" values for replacement of signed
  199. division by constant operation by multiplication. See the PowerPC compiler
  200. developer manual for more information }
  201. procedure getmagic_signedN(const N : byte; const d : aInt;
  202. out magic_m : aInt; out magic_s : aInt);
  203. var
  204. p : aInt;
  205. ad, anc, delta, q1, r1, q2, r2, t : aWord;
  206. two_N_minus_1 : aWord;
  207. begin
  208. assert((d < -1) or (d > 1));
  209. two_N_minus_1 := aWord(1) shl (N-1);
  210. ad := abs(d);
  211. t := two_N_minus_1 + (aWord(d) shr (N-1));
  212. anc := t - 1 - t mod ad; { absolute value of nc }
  213. p := (N-1); { initialize p }
  214. q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
  215. r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
  216. q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
  217. r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
  218. repeat
  219. inc(p);
  220. q1 := 2*q1; { update q1 = 2p/abs(nc) }
  221. r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
  222. if (r1 >= anc) then begin { must be unsigned comparison }
  223. inc(q1);
  224. dec(r1, anc);
  225. end;
  226. q2 := 2*q2; { update q2 = 2p/abs(d) }
  227. r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
  228. if (r2 >= ad) then begin { must be unsigned comparison }
  229. inc(q2);
  230. dec(r2, ad);
  231. end;
  232. delta := ad - r2;
  233. until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
  234. magic_m := q2 + 1;
  235. if (d < 0) then begin
  236. magic_m := -magic_m; { resulting magic number }
  237. end;
  238. magic_s := p - N; { resulting shift }
  239. end;
  240. { finds positive and negative powers of two of the given value, returning the
  241. power and whether it's a negative power or not in addition to the actual result
  242. of the function }
  243. function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
  244. var
  245. i : longint;
  246. hl : aInt;
  247. begin
  248. neg := false;
  249. { also try to find negative power of two's by negating if the
  250. value is negative. low(aInt) is special because it can not be
  251. negated. Simply return the appropriate values for it }
  252. if (value < 0) then begin
  253. neg := true;
  254. if (value = low(aInt)) then begin
  255. power := sizeof(aInt)*8-1;
  256. result := true;
  257. exit;
  258. end;
  259. value := -value;
  260. end;
  261. if ((value and (value-1)) <> 0) then begin
  262. result := false;
  263. exit;
  264. end;
  265. hl := 1;
  266. for i := 0 to (sizeof(aInt)*8-1) do begin
  267. if (hl = value) then begin
  268. result := true;
  269. power := i;
  270. exit;
  271. end;
  272. hl := hl shl 1;
  273. end;
  274. end;
  275. { returns the number of instruction required to load the given integer into a register.
  276. This is basically a stripped down version of a_load_const_reg, increasing a counter
  277. instead of emitting instructions. }
  278. function getInstructionLength(a : aint) : longint;
  279. function get32bitlength(a : longint; var length : longint) : boolean; inline;
  280. var
  281. is_half_signed : byte;
  282. begin
  283. { if the lower 16 bits are zero, do a single LIS }
  284. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  285. inc(length);
  286. get32bitlength := longint(a) < 0;
  287. end else begin
  288. is_half_signed := ord(smallint(lo(a)) < 0);
  289. inc(length);
  290. if smallint(hi(a) + is_half_signed) <> 0 then
  291. inc(length);
  292. get32bitlength := (smallint(a) < 0) or (a < 0);
  293. end;
  294. end;
  295. var
  296. extendssign : boolean;
  297. begin
  298. result := 0;
  299. if (lo(a) = 0) and (hi(a) <> 0) then begin
  300. get32bitlength(hi(a), result);
  301. inc(result);
  302. end else begin
  303. extendssign := get32bitlength(lo(a), result);
  304. if (extendssign) and (hi(a) = 0) then
  305. inc(result)
  306. else if (not
  307. ((extendssign and (longint(hi(a)) = -1)) or
  308. ((not extendssign) and (hi(a)=0)))
  309. ) then begin
  310. get32bitlength(hi(a), result);
  311. inc(result);
  312. end;
  313. end;
  314. end;
  315. procedure tcgppc.init_register_allocators;
  316. begin
  317. inherited init_register_allocators;
  318. rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE,
  319. [RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
  320. RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
  321. RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
  322. RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
  323. RS_R14, RS_R13], first_int_imreg, []);
  324. rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE,
  325. [RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9,
  326. RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27,
  327. RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18,
  328. RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []);
  329. {$WARNING FIX ME}
  330. rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE,
  331. [RS_M0, RS_M1, RS_M2], first_mm_imreg, []);
  332. end;
  333. procedure tcgppc.done_register_allocators;
  334. begin
  335. rg[R_INTREGISTER].free;
  336. rg[R_FPUREGISTER].free;
  337. rg[R_MMREGISTER].free;
  338. inherited done_register_allocators;
  339. end;
  340. procedure tcgppc.a_param_const(list: TAsmList; size: tcgsize; a: aint; const
  341. paraloc: tcgpara);
  342. var
  343. ref: treference;
  344. begin
  345. paraloc.check_simple_location;
  346. case paraloc.location^.loc of
  347. LOC_REGISTER, LOC_CREGISTER:
  348. a_load_const_reg(list, size, a, paraloc.location^.register);
  349. LOC_REFERENCE:
  350. begin
  351. reference_reset(ref);
  352. ref.base := paraloc.location^.reference.index;
  353. ref.offset := paraloc.location^.reference.offset;
  354. a_load_const_ref(list, size, a, ref);
  355. end;
  356. else
  357. internalerror(2002081101);
  358. end;
  359. end;
  360. procedure tcgppc.a_param_ref(list: TAsmList; size: tcgsize; const r:
  361. treference; const paraloc: tcgpara);
  362. var
  363. tmpref, ref: treference;
  364. location: pcgparalocation;
  365. sizeleft: aint;
  366. adjusttail : boolean;
  367. begin
  368. location := paraloc.location;
  369. tmpref := r;
  370. sizeleft := paraloc.intsize;
  371. adjusttail := false;
  372. while assigned(location) do begin
  373. case location^.loc of
  374. LOC_REGISTER, LOC_CREGISTER:
  375. begin
  376. if (size <> OS_NO) then
  377. a_load_ref_reg(list, size, location^.size, tmpref,
  378. location^.register)
  379. else
  380. {$IFDEF extdebug}
  381. list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO, sizeleft ' + inttostr(sizeleft))));
  382. {$ENDIF extdebug}
  383. { load non-integral sized memory location into register. This
  384. memory location be 1-sizeleft byte sized.
  385. Always assume that this memory area is properly aligned, eg. start
  386. loading the larger quantities for "odd" quantities first }
  387. case sizeleft of
  388. 1,2,4,8 :
  389. a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
  390. location^.register);
  391. 3 : begin
  392. a_reg_alloc(list, NR_R12);
  393. a_load_ref_reg(list, OS_16, location^.size, tmpref,
  394. NR_R12);
  395. inc(tmpref.offset, tcgsize2size[OS_16]);
  396. a_load_ref_reg(list, OS_8, location^.size, tmpref,
  397. location^.register);
  398. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
  399. a_reg_dealloc(list, NR_R12);
  400. end;
  401. 5 : begin
  402. a_reg_alloc(list, NR_R12);
  403. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  404. inc(tmpref.offset, tcgsize2size[OS_32]);
  405. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  406. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
  407. a_reg_dealloc(list, NR_R12);
  408. end;
  409. 6 : begin
  410. a_reg_alloc(list, NR_R12);
  411. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  412. inc(tmpref.offset, tcgsize2size[OS_32]);
  413. a_load_ref_reg(list, OS_16, location^.size, tmpref, location^.register);
  414. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 16, 16));
  415. a_reg_dealloc(list, NR_R12);
  416. end;
  417. 7 : begin
  418. a_reg_alloc(list, NR_R12);
  419. a_reg_alloc(list, NR_R0);
  420. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  421. inc(tmpref.offset, tcgsize2size[OS_32]);
  422. a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
  423. inc(tmpref.offset, tcgsize2size[OS_16]);
  424. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  425. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
  426. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
  427. a_reg_dealloc(list, NR_R0);
  428. a_reg_dealloc(list, NR_R12);
  429. end;
  430. else
  431. { still > 8 bytes to load, so load data single register now }
  432. a_load_ref_reg(list, location^.size, location^.size, tmpref,
  433. location^.register);
  434. { the block is > 8 bytes, so we have to store any bytes not
  435. a multiple of the register size beginning with the MSB }
  436. adjusttail := true;
  437. end;
  438. if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
  439. a_op_const_reg(list, OP_SHL, OS_INT,
  440. (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT],
  441. location^.register);
  442. end;
  443. LOC_REFERENCE:
  444. begin
  445. reference_reset_base(ref, location^.reference.index,
  446. location^.reference.offset);
  447. g_concatcopy(list, tmpref, ref, sizeleft);
  448. if assigned(location^.next) then
  449. internalerror(2005010710);
  450. end;
  451. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  452. case location^.size of
  453. OS_F32, OS_F64:
  454. a_loadfpu_ref_reg(list, location^.size, tmpref, location^.register);
  455. else
  456. internalerror(2002072801);
  457. end;
  458. LOC_VOID:
  459. { nothing to do }
  460. ;
  461. else
  462. internalerror(2002081103);
  463. end;
  464. inc(tmpref.offset, tcgsize2size[location^.size]);
  465. dec(sizeleft, tcgsize2size[location^.size]);
  466. location := location^.next;
  467. end;
  468. end;
  469. procedure tcgppc.a_paramaddr_ref(list: TAsmList; const r: treference; const
  470. paraloc: tcgpara);
  471. var
  472. ref: treference;
  473. tmpreg: tregister;
  474. begin
  475. paraloc.check_simple_location;
  476. case paraloc.location^.loc of
  477. LOC_REGISTER, LOC_CREGISTER:
  478. a_loadaddr_ref_reg(list, r, paraloc.location^.register);
  479. LOC_REFERENCE:
  480. begin
  481. reference_reset(ref);
  482. ref.base := paraloc.location^.reference.index;
  483. ref.offset := paraloc.location^.reference.offset;
  484. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  485. a_loadaddr_ref_reg(list, r, tmpreg);
  486. a_load_reg_ref(list, OS_ADDR, OS_ADDR, tmpreg, ref);
  487. end;
  488. else
  489. internalerror(2002080701);
  490. end;
  491. end;
  492. { calling a procedure by name }
  493. procedure tcgppc.a_call_name(list: TAsmList; const s: string);
  494. begin
  495. a_call_name_direct(list, s, true, true);
  496. end;
  497. procedure tcgppc.a_call_name_direct(list: TAsmList; s: string; prependDot : boolean; addNOP : boolean; includeCall : boolean);
  498. begin
  499. if (prependDot) then
  500. s := '.' + s;
  501. list.concat(taicpu.op_sym(A_BL, current_asmdata.newasmsymbol(s, AB_EXTERNAL,
  502. AT_FUNCTION)));
  503. if (addNOP) then
  504. list.concat(taicpu.op_none(A_NOP));
  505. if (includeCall) then
  506. include(current_procinfo.flags, pi_do_call);
  507. end;
  508. { calling a procedure by address }
  509. procedure tcgppc.a_call_reg(list: TAsmList; reg: tregister);
  510. var
  511. tmpref: treference;
  512. begin
  513. if (not (cs_opt_size in aktoptimizerswitches)) then begin
  514. { load actual function entry (reg contains the reference to the function descriptor)
  515. into R0 }
  516. reference_reset_base(tmpref, reg, 0);
  517. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R0);
  518. { save TOC pointer in stackframe }
  519. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  520. a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
  521. { move actual function pointer to CTR register }
  522. list.concat(taicpu.op_reg(A_MTCTR, NR_R0));
  523. { load new TOC pointer from function descriptor into RTOC register }
  524. reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
  525. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  526. { load new environment pointer from function descriptor into R11 register }
  527. reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
  528. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
  529. { call function }
  530. list.concat(taicpu.op_none(A_BCTRL));
  531. end else begin
  532. { call ptrgl helper routine which expects the pointer to the function descriptor
  533. in R11 }
  534. a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
  535. a_call_name_direct(list, '.ptrgl', false, false);
  536. end;
  537. { we need to load the old RTOC from stackframe because we changed it}
  538. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  539. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  540. include(current_procinfo.flags, pi_do_call);
  541. end;
  542. {********************** load instructions ********************}
  543. procedure tcgppc.a_load_const_reg(list: TAsmList; size: TCGSize; a: aint;
  544. reg: TRegister);
  545. { loads a 32 bit constant into the given register, using an optimal instruction sequence.
  546. This is either LIS, LI or LI+ADDIS.
  547. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  548. sign extension was performed) }
  549. function load32bitconstant(list : TAsmList; size : TCGSize; a : longint;
  550. reg : TRegister) : boolean;
  551. var
  552. is_half_signed : byte;
  553. begin
  554. { if the lower 16 bits are zero, do a single LIS }
  555. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  556. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
  557. load32bitconstant := longint(a) < 0;
  558. end else begin
  559. is_half_signed := ord(smallint(lo(a)) < 0);
  560. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
  561. if smallint(hi(a) + is_half_signed) <> 0 then begin
  562. list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
  563. end;
  564. load32bitconstant := (smallint(a) < 0) or (a < 0);
  565. end;
  566. end;
  567. { loads a 32 bit constant into R0, using an optimal instruction sequence.
  568. This is either LIS, LI or LI+ORIS.
  569. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  570. sign extension was performed) }
  571. function load32bitconstantR0(list : TAsmList; size : TCGSize; a : longint) : boolean;
  572. begin
  573. { if it's a value we can load with a single LI, do it }
  574. if (a >= low(smallint)) and (a <= high(smallint)) then begin
  575. list.concat(taicpu.op_reg_const(A_LI, NR_R0, smallint(a)));
  576. end else begin
  577. { if the lower 16 bits are zero, do a single LIS }
  578. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, smallint(a shr 16)));
  579. if (smallint(a) <> 0) then begin
  580. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(a)));
  581. end;
  582. end;
  583. load32bitconstantR0 := a < 0;
  584. end;
  585. { emits the code to load a constant by emitting various instructions into the output
  586. code}
  587. procedure loadConstantNormal(list: TAsmList; size : TCgSize; a: aint; reg: TRegister);
  588. var
  589. extendssign : boolean;
  590. begin
  591. if (lo(a) = 0) and (hi(a) <> 0) then begin
  592. { load only upper 32 bits, and shift }
  593. load32bitconstant(list, size, hi(a), reg);
  594. list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));
  595. end else begin
  596. { load lower 32 bits }
  597. extendssign := load32bitconstant(list, size, lo(a), reg);
  598. if (extendssign) and (hi(a) = 0) then
  599. { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic
  600. sign extension, clear those bits }
  601. a_load_reg_reg(list, OS_32, OS_64, reg, reg)
  602. else if (not
  603. ((extendssign and (longint(hi(a)) = -1)) or
  604. ((not extendssign) and (hi(a)=0)))
  605. ) then begin
  606. { only load the upper 32 bits, if the automatic sign extension is not okay,
  607. that is, _not_ if
  608. - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper
  609. 32 bits should contain -1
  610. - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
  611. 32 bits should contain 0 }
  612. load32bitconstantR0(list, size, hi(a));
  613. { combine both registers }
  614. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
  615. end;
  616. end;
  617. end;
  618. {$IFDEF EXTDEBUG}
  619. var
  620. astring : string;
  621. {$ENDIF EXTDEBUG}
  622. begin
  623. {$IFDEF EXTDEBUG}
  624. astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
  625. list.concat(tai_comment.create(strpnew(astring)));
  626. {$ENDIF EXTDEBUG}
  627. if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  628. internalerror(2002090902);
  629. { if PIC or basic optimizations are enabled, and the number of instructions which would be
  630. required to load the value is greater than 2, store (and later load) the value from there }
  631. if (false) {(((cs_opt_peephole in aktoptimizerswitches in aktglobalswitches) or (cs_create_pic in aktmoduleswitches)) and
  632. (getInstructionLength(a) > 2))} then
  633. loadConstantPIC(list, size, a, reg)
  634. else
  635. loadConstantNormal(list, size, a, reg);
  636. end;
  637. procedure tcgppc.a_load_reg_ref(list: TAsmList; fromsize, tosize: TCGSize;
  638. reg: tregister; const ref: treference);
  639. const
  640. StoreInstr: array[OS_8..OS_64, boolean, boolean] of TAsmOp =
  641. { indexed? updating?}
  642. (((A_STB, A_STBU), (A_STBX, A_STBUX)),
  643. ((A_STH, A_STHU), (A_STHX, A_STHUX)),
  644. ((A_STW, A_STWU), (A_STWX, A_STWUX)),
  645. ((A_STD, A_STDU), (A_STDX, A_STDUX))
  646. );
  647. var
  648. op: TAsmOp;
  649. ref2: TReference;
  650. begin
  651. ref2 := ref;
  652. fixref(list, ref2, tosize);
  653. if tosize in [OS_S8..OS_S64] then
  654. { storing is the same for signed and unsigned values }
  655. tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8)));
  656. op := storeinstr[tcgsize2unsigned[tosize], ref2.index <> NR_NO, false];
  657. a_load_store(list, op, reg, ref2);
  658. end;
  659. procedure tcgppc.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize;
  660. const ref: treference; reg: tregister);
  661. const
  662. LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
  663. { indexed? updating? }
  664. (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  665. ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
  666. ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
  667. ((A_LD, A_LDU), (A_LDX, A_LDUX)),
  668. { 128bit stuff too }
  669. ((A_NONE, A_NONE), (A_NONE, A_NONE)),
  670. { there's no load-byte-with-sign-extend :( }
  671. ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  672. ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
  673. { there's no load-word-arithmetic-indexed with update, simulate it in code :( }
  674. ((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
  675. ((A_LD, A_LDU), (A_LDX, A_LDUX))
  676. );
  677. var
  678. op: tasmop;
  679. ref2: treference;
  680. begin
  681. {$IFDEF EXTDEBUG}
  682. list.concat(tai_comment.create(strpnew('a_load_ref_reg ' + ref2string(ref))));
  683. {$ENDIF EXTDEBUG}
  684. if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  685. internalerror(2002090902);
  686. ref2 := ref;
  687. fixref(list, ref2, tosize);
  688. { the caller is expected to have adjusted the reference already
  689. in this case }
  690. if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
  691. fromsize := tosize;
  692. op := loadinstr[fromsize, ref2.index <> NR_NO, false];
  693. { there is no LWAU instruction, simulate using ADDI and LWA }
  694. if (op = A_NOP) then begin
  695. list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
  696. ref2.offset := 0;
  697. op := A_LWA;
  698. end;
  699. a_load_store(list, op, reg, ref2);
  700. { sign extend shortint if necessary, since there is no
  701. load instruction that does that automatically (JM) }
  702. if fromsize = OS_S8 then
  703. list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
  704. end;
  705. procedure tcgppc.a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize;
  706. reg1, reg2: tregister);
  707. const
  708. movemap : array[OS_8..OS_S128, OS_8..OS_S128] of tasmop = (
  709. { to -> OS_8 OS_16 OS_32 OS_64 OS_128 OS_S8 OS_S16 OS_S32 OS_S64 OS_S128 }
  710. { from }
  711. { OS_8 } (A_MR, A_RLDICL, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  712. { OS_16 } (A_RLDICL, A_MR, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  713. { OS_32 } (A_RLDICL, A_RLDICL, A_MR, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  714. { OS_64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  715. { OS_128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP ),
  716. { OS_S8 } (A_EXTSB, A_EXTSB, A_EXTSB, A_EXTSB, A_NONE, A_MR, A_EXTSB, A_EXTSB, A_EXTSB, A_NOP ),
  717. { OS_S16 } (A_RLDICL, A_EXTSH, A_EXTSH, A_EXTSH, A_NONE, A_EXTSB, A_MR, A_EXTSH, A_EXTSH, A_NOP ),
  718. { OS_S32 } (A_RLDICL, A_RLDICL, A_EXTSW, A_EXTSW, A_NONE, A_EXTSB, A_EXTSH, A_MR, A_EXTSW, A_NOP ),
  719. { OS_S64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_EXTSB, A_EXTSH, A_EXTSW, A_MR, A_NOP ),
  720. { OS_S128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP )
  721. );
  722. var
  723. instr: taicpu;
  724. op : tasmop;
  725. begin
  726. op := movemap[fromsize, tosize];
  727. case op of
  728. A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
  729. A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
  730. else
  731. internalerror(2002090901);
  732. end;
  733. list.concat(instr);
  734. rg[R_INTREGISTER].add_move_instruction(instr);
  735. end;
  736. procedure tcgppc.a_loadfpu_reg_reg(list: TAsmList; size: tcgsize;
  737. reg1, reg2: tregister);
  738. var
  739. instr: taicpu;
  740. begin
  741. instr := taicpu.op_reg_reg(A_FMR, reg2, reg1);
  742. list.concat(instr);
  743. rg[R_FPUREGISTER].add_move_instruction(instr);
  744. end;
  745. procedure tcgppc.a_loadfpu_ref_reg(list: TAsmList; size: tcgsize;
  746. const ref: treference; reg: tregister);
  747. const
  748. FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  749. { indexed? updating?}
  750. (((A_LFS, A_LFSU), (A_LFSX, A_LFSUX)),
  751. ((A_LFD, A_LFDU), (A_LFDX, A_LFDUX)));
  752. var
  753. op: tasmop;
  754. ref2: treference;
  755. begin
  756. { several functions call this procedure with OS_32 or OS_64
  757. so this makes life easier (FK) }
  758. case size of
  759. OS_32, OS_F32:
  760. size := OS_F32;
  761. OS_64, OS_F64, OS_C64:
  762. size := OS_F64;
  763. else
  764. internalerror(200201121);
  765. end;
  766. ref2 := ref;
  767. fixref(list, ref2, size);
  768. op := fpuloadinstr[size, ref2.index <> NR_NO, false];
  769. a_load_store(list, op, reg, ref2);
  770. end;
  771. procedure tcgppc.a_loadfpu_reg_ref(list: TAsmList; size: tcgsize; reg:
  772. tregister; const ref: treference);
  773. const
  774. FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  775. { indexed? updating? }
  776. (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
  777. ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
  778. var
  779. op: tasmop;
  780. ref2: treference;
  781. begin
  782. if not (size in [OS_F32, OS_F64]) then
  783. internalerror(200201122);
  784. ref2 := ref;
  785. fixref(list, ref2, size);
  786. op := fpustoreinstr[size, ref2.index <> NR_NO, false];
  787. a_load_store(list, op, reg, ref2);
  788. end;
  789. procedure tcgppc.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize; a:
  790. aint; reg: TRegister);
  791. begin
  792. a_op_const_reg_reg(list, op, size, a, reg, reg);
  793. end;
  794. procedure tcgppc.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src,
  795. dst: TRegister);
  796. begin
  797. a_op_reg_reg_reg(list, op, size, src, dst, dst);
  798. end;
  799. procedure tcgppc.a_op_const_reg_reg(list: TAsmList; op: TOpCg;
  800. size: tcgsize; a: aint; src, dst: tregister);
  801. var
  802. useReg : boolean;
  803. procedure do_lo_hi(loOp, hiOp : TAsmOp);
  804. begin
  805. { Optimization for logical ops (excluding AND), trying to do this as efficiently
  806. as possible by only generating code for the affected halfwords. Note that all
  807. the instructions handled here must have "X op 0 = X" for every halfword. }
  808. usereg := false;
  809. if (aword(a) > high(dword)) then begin
  810. usereg := true;
  811. end else begin
  812. if (word(a) <> 0) then begin
  813. list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
  814. if (word(a shr 16) <> 0) then
  815. list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
  816. end else if (word(a shr 16) <> 0) then
  817. list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
  818. end;
  819. end;
  820. procedure do_lo_hi_and;
  821. begin
  822. { optimization logical and with immediate: only use "andi." for 16 bit
  823. ands, otherwise use register method. Doing this for 32 bit constants
  824. would not give any advantage to the register method (via useReg := true),
  825. requiring a scratch register and three instructions. }
  826. usereg := false;
  827. if (aword(a) > high(word)) then
  828. usereg := true
  829. else
  830. list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
  831. end;
  832. procedure do_constant_div(list : TAsmList; size : TCgSize; a : aint; src, dst : TRegister;
  833. signed : boolean);
  834. const
  835. negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
  836. var
  837. magic, shift : int64;
  838. u_magic : qword;
  839. u_shift : byte;
  840. u_add : boolean;
  841. power : byte;
  842. isNegPower : boolean;
  843. divreg : tregister;
  844. begin
  845. if (a = 0) then begin
  846. internalerror(2005061701);
  847. end else if (a = 1) then begin
  848. cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, src, dst);
  849. end else if (a = -1) and (signed) then begin
  850. { note: only in the signed case possible..., may overflow }
  851. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
  852. end else if (ispowerof2(a, power, isNegPower)) then begin
  853. if (signed) then begin
  854. { From "The PowerPC Compiler Writer's Guide", pg. 52ff }
  855. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, power,
  856. src, dst);
  857. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
  858. if (isNegPower) then
  859. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  860. end else begin
  861. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, power, src, dst)
  862. end;
  863. end else begin
  864. { replace division by multiplication, both implementations }
  865. { from "The PowerPC Compiler Writer's Guide" pg. 53ff }
  866. divreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
  867. if (signed) then begin
  868. getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
  869. { load magic value }
  870. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, magic, divreg);
  871. { multiply }
  872. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
  873. { add/subtract numerator }
  874. if (a > 0) and (magic < 0) then begin
  875. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, src, dst, dst);
  876. end else if (a < 0) and (magic > 0) then begin
  877. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, src, dst, dst);
  878. end;
  879. { shift shift places to the right (arithmetic) }
  880. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, shift, dst, dst);
  881. { extract and add sign bit }
  882. if (a >= 0) then begin
  883. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, src, divreg);
  884. end else begin
  885. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 63, dst, divreg);
  886. end;
  887. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, dst, divreg, dst);
  888. end else begin
  889. getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
  890. { load magic in divreg }
  891. cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, u_magic, divreg);
  892. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
  893. if (u_add) then begin
  894. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, dst, src, divreg);
  895. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, 1, divreg, divreg);
  896. cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, divreg, dst, divreg);
  897. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift-1, divreg, dst);
  898. end else begin
  899. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_INT, u_shift, dst, dst);
  900. end;
  901. end;
  902. end;
  903. end;
  904. var
  905. scratchreg: tregister;
  906. shift : byte;
  907. shiftmask : longint;
  908. isneg : boolean;
  909. begin
  910. { subtraction is the same as addition with negative constant }
  911. if op = OP_SUB then begin
  912. a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
  913. exit;
  914. end;
  915. { This case includes some peephole optimizations for the various operations,
  916. (e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
  917. independent of architecture? }
  918. { assume that we do not need a scratch register for the operation }
  919. useReg := false;
  920. case (op) of
  921. OP_DIV, OP_IDIV:
  922. if (cs_opt_level1 in aktoptimizerswitches) then
  923. do_constant_div(list, size, a, src, dst, op = OP_IDIV)
  924. else
  925. usereg := true;
  926. OP_IMUL, OP_MUL:
  927. { idea: factorize constant multiplicands and use adds/shifts with few factors;
  928. however, even a 64 bit multiply is already quite fast on PPC64 }
  929. if (a = 0) then
  930. a_load_const_reg(list, size, 0, dst)
  931. else if (a = -1) then
  932. list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
  933. else if (a = 1) then
  934. a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
  935. else if ispowerof2(a, shift, isneg) then begin
  936. list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
  937. if (isneg) then
  938. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  939. end else if (a >= low(smallint)) and (a <= high(smallint)) then
  940. list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
  941. smallint(a)))
  942. else
  943. usereg := true;
  944. OP_ADD:
  945. if (a = 0) then
  946. a_load_reg_reg(list, size, size, src, dst)
  947. else if (a >= low(smallint)) and (a <= high(smallint)) then
  948. list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
  949. else
  950. useReg := true;
  951. OP_OR:
  952. if (a = 0) then
  953. a_load_reg_reg(list, size, size, src, dst)
  954. else if (a = -1) then
  955. a_load_const_reg(list, size, -1, dst)
  956. else
  957. do_lo_hi(A_ORI, A_ORIS);
  958. OP_AND:
  959. if (a = 0) then
  960. a_load_const_reg(list, size, 0, dst)
  961. else if (a = -1) then
  962. a_load_reg_reg(list, size, size, src, dst)
  963. else
  964. do_lo_hi_and;
  965. OP_XOR:
  966. if (a = 0) then
  967. a_load_reg_reg(list, size, size, src, dst)
  968. else if (a = -1) then
  969. list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
  970. else
  971. do_lo_hi(A_XORI, A_XORIS);
  972. OP_SHL, OP_SHR, OP_SAR:
  973. begin
  974. if (size in [OS_64, OS_S64]) then
  975. shift := 6
  976. else
  977. shift := 5;
  978. shiftmask := (1 shl shift)-1;
  979. if (a and shiftmask) <> 0 then
  980. list.concat(taicpu.op_reg_reg_const(
  981. TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask))
  982. else
  983. a_load_reg_reg(list, size, size, src, dst);
  984. if ((a shr shift) <> 0) then
  985. internalError(68991);
  986. end
  987. else
  988. internalerror(200109091);
  989. end;
  990. { if all else failed, load the constant in a register and then
  991. perform the operation }
  992. if (useReg) then begin
  993. scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  994. a_load_const_reg(list, size, a, scratchreg);
  995. a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
  996. end;
  997. end;
  998. procedure tcgppc.a_op_reg_reg_reg(list: TAsmList; op: TOpCg;
  999. size: tcgsize; src1, src2, dst: tregister);
  1000. const
  1001. op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
  1002. (A_NONE, A_MR, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
  1003. A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
  1004. op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
  1005. (A_NONE, A_MR, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
  1006. A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
  1007. begin
  1008. case op of
  1009. OP_NEG, OP_NOT:
  1010. begin
  1011. list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
  1012. if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
  1013. { zero/sign extend result again, fromsize is not important here }
  1014. a_load_reg_reg(list, OS_S64, size, dst, dst)
  1015. end;
  1016. else
  1017. if (size in [OS_64, OS_S64]) then begin
  1018. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
  1019. src1));
  1020. end else begin
  1021. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
  1022. src1));
  1023. end;
  1024. end;
  1025. end;
  1026. {*************** compare instructructions ****************}
  1027. procedure tcgppc.a_cmp_const_reg_label(list: TAsmList; size: tcgsize;
  1028. cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
  1029. var
  1030. scratch_register: TRegister;
  1031. signed: boolean;
  1032. begin
  1033. signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
  1034. { in the following case, we generate more efficient code when }
  1035. { signed is true }
  1036. if (cmp_op in [OC_EQ, OC_NE]) and
  1037. (aword(a) > $FFFF) then
  1038. signed := true;
  1039. if signed then
  1040. if (a >= low(smallint)) and (a <= high(smallint)) then
  1041. list.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR0, reg, a))
  1042. else begin
  1043. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1044. a_load_const_reg(list, OS_64, a, scratch_register);
  1045. list.concat(taicpu.op_reg_reg_reg(A_CMPD, NR_CR0, reg, scratch_register));
  1046. end
  1047. else if (aword(a) <= $FFFF) then
  1048. list.concat(taicpu.op_reg_reg_const(A_CMPLDI, NR_CR0, reg, aword(a)))
  1049. else begin
  1050. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1051. a_load_const_reg(list, OS_64, a, scratch_register);
  1052. list.concat(taicpu.op_reg_reg_reg(A_CMPLD, NR_CR0, reg,
  1053. scratch_register));
  1054. end;
  1055. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1056. end;
  1057. procedure tcgppc.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize;
  1058. cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
  1059. var
  1060. op: tasmop;
  1061. begin
  1062. if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
  1063. if (size in [OS_64, OS_S64]) then
  1064. op := A_CMPD
  1065. else
  1066. op := A_CMPW
  1067. else
  1068. if (size in [OS_64, OS_S64]) then
  1069. op := A_CMPLD
  1070. else
  1071. op := A_CMPLW;
  1072. list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1));
  1073. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1074. end;
  1075. procedure tcgppc.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);
  1076. begin
  1077. a_jmp(list, A_BC, TOpCmp2AsmCond[cond], 0, l);
  1078. end;
  1079. procedure tcgppc.a_jmp_name(list: TAsmList; const s: string);
  1080. var
  1081. p: taicpu;
  1082. begin
  1083. p := taicpu.op_sym(A_B, current_asmdata.newasmsymbol(s, AB_EXTERNAL,
  1084. AT_LABEL));
  1085. p.is_jmp := true;
  1086. list.concat(p)
  1087. end;
  1088. procedure tcgppc.a_jmp_always(list: TAsmList; l: tasmlabel);
  1089. begin
  1090. a_jmp(list, A_B, C_None, 0, l);
  1091. end;
  1092. procedure tcgppc.a_jmp_flags(list: TAsmList; const f: TResFlags; l:
  1093. tasmlabel);
  1094. var
  1095. c: tasmcond;
  1096. begin
  1097. c := flags_to_cond(f);
  1098. a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l);
  1099. end;
  1100. procedure tcgppc.g_flags2reg(list: TAsmList; size: TCgSize; const f:
  1101. TResFlags; reg: TRegister);
  1102. var
  1103. testbit: byte;
  1104. bitvalue: boolean;
  1105. begin
  1106. { get the bit to extract from the conditional register + its requested value (0 or 1) }
  1107. testbit := ((f.cr - RS_CR0) * 4);
  1108. case f.flag of
  1109. F_EQ, F_NE:
  1110. begin
  1111. inc(testbit, 2);
  1112. bitvalue := f.flag = F_EQ;
  1113. end;
  1114. F_LT, F_GE:
  1115. begin
  1116. bitvalue := f.flag = F_LT;
  1117. end;
  1118. F_GT, F_LE:
  1119. begin
  1120. inc(testbit);
  1121. bitvalue := f.flag = F_GT;
  1122. end;
  1123. else
  1124. internalerror(200112261);
  1125. end;
  1126. { load the conditional register in the destination reg }
  1127. list.concat(taicpu.op_reg(A_MFCR, reg));
  1128. { we will move the bit that has to be tested to bit 0 by rotating left }
  1129. testbit := (testbit + 1) and 31;
  1130. { extract bit }
  1131. list.concat(taicpu.op_reg_reg_const_const_const(
  1132. A_RLWINM,reg,reg,testbit,31,31));
  1133. { if we need the inverse, xor with 1 }
  1134. if not bitvalue then
  1135. list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1));
  1136. end;
  1137. { *********** entry/exit code and address loading ************ }
  1138. procedure tcgppc.g_save_standard_registers(list: TAsmList);
  1139. begin
  1140. { this work is done in g_proc_entry; additionally it is not safe
  1141. to use it because it is called at some weird time }
  1142. end;
  1143. procedure tcgppc.g_restore_standard_registers(list: TAsmList);
  1144. begin
  1145. { this work is done in g_proc_exit; mainly because it is not safe to
  1146. put the register restore code here because it is called at some weird time }
  1147. end;
  1148. procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  1149. var
  1150. reg : TSuperRegister;
  1151. begin
  1152. fprcount := 0;
  1153. firstfpr := RS_F31;
  1154. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1155. for reg := RS_F14 to RS_F31 do
  1156. if reg in rg[R_FPUREGISTER].used_in_proc then begin
  1157. fprcount := ord(RS_F31)-ord(reg)+1;
  1158. firstfpr := reg;
  1159. break;
  1160. end;
  1161. end;
  1162. procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  1163. var
  1164. reg : TSuperRegister;
  1165. begin
  1166. gprcount := 0;
  1167. firstgpr := RS_R31;
  1168. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1169. for reg := RS_R14 to RS_R31 do
  1170. if reg in rg[R_INTREGISTER].used_in_proc then begin
  1171. gprcount := ord(RS_R31)-ord(reg)+1;
  1172. firstgpr := reg;
  1173. break;
  1174. end;
  1175. end;
  1176. { Generates the entry code of a procedure/function.
  1177. This procedure may be called before, as well as after g_return_from_proc
  1178. is called. localsize is the sum of the size necessary for local variables
  1179. and the maximum possible combined size of ALL the parameters of a procedure
  1180. called by the current one
  1181. IMPORTANT: registers are not to be allocated through the register
  1182. allocator here, because the register colouring has already occured !!
  1183. }
  1184. procedure tcgppc.g_proc_entry(list: TAsmList; localsize: longint;
  1185. nostackframe: boolean);
  1186. var
  1187. firstregfpu, firstreggpr: TSuperRegister;
  1188. needslinkreg: boolean;
  1189. fprcount, gprcount : aint;
  1190. { Save standard registers, both FPR and GPR; does not support VMX/Altivec }
  1191. procedure save_standard_registers;
  1192. var
  1193. regcount : TSuperRegister;
  1194. href : TReference;
  1195. mayNeedLRStore : boolean;
  1196. begin
  1197. { there are two ways to do this: manually, by generating a few "std" instructions,
  1198. or via the restore helper functions. The latter are selected by the -Og switch,
  1199. i.e. "optimize for size" }
  1200. if (cs_opt_size in aktoptimizerswitches) then begin
  1201. mayNeedLRStore := false;
  1202. if ((fprcount > 0) and (gprcount > 0)) then begin
  1203. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1204. a_call_name_direct(list, '_savegpr1_' + intToStr(32-gprcount), false, false, false);
  1205. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false);
  1206. end else if (gprcount > 0) then
  1207. a_call_name_direct(list, '_savegpr0_' + intToStr(32-gprcount), false, false, false)
  1208. else if (fprcount > 0) then
  1209. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false, false)
  1210. else
  1211. mayNeedLRStore := true;
  1212. end else begin
  1213. { save registers, FPU first, then GPR }
  1214. reference_reset_base(href, NR_STACK_POINTER_REG, -8);
  1215. if (fprcount > 0) then
  1216. for regcount := RS_F31 downto firstregfpu do begin
  1217. a_loadfpu_reg_ref(list, OS_FLOAT, newreg(R_FPUREGISTER, regcount,
  1218. R_SUBNONE), href);
  1219. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1220. end;
  1221. if (gprcount > 0) then
  1222. for regcount := RS_R31 downto firstreggpr do begin
  1223. a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount,
  1224. R_SUBNONE), href);
  1225. dec(href.offset, tcgsize2size[OS_INT]);
  1226. end;
  1227. { VMX registers not supported by FPC atm }
  1228. { in this branch we may always need to store LR ourselves}
  1229. mayNeedLRStore := true;
  1230. end;
  1231. { we may need to store R0 (=LR) ourselves }
  1232. if (mayNeedLRStore) and (needslinkreg) then begin
  1233. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1234. list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
  1235. end;
  1236. end;
  1237. var
  1238. href: treference;
  1239. begin
  1240. calcFirstUsedFPR(firstregfpu, fprcount);
  1241. calcFirstUsedGPR(firstreggpr, gprcount);
  1242. { calculate real stack frame size }
  1243. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1244. gprcount, fprcount);
  1245. { determine whether we need to save the link register }
  1246. needslinkreg :=
  1247. ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)) or
  1248. ((cs_opt_size in aktoptimizerswitches) and ((fprcount > 0) or (gprcount > 0))) or
  1249. ([cs_lineinfo, cs_debuginfo] * aktmoduleswitches <> []);
  1250. a_reg_alloc(list, NR_STACK_POINTER_REG);
  1251. a_reg_alloc(list, NR_R0);
  1252. { move link register to r0 }
  1253. if (needslinkreg) then
  1254. list.concat(taicpu.op_reg(A_MFLR, NR_R0));
  1255. save_standard_registers;
  1256. { save old stack frame pointer }
  1257. if (tppcprocinfo(current_procinfo).needs_frame_pointer) then begin
  1258. a_reg_alloc(list, NR_OLD_STACK_POINTER_REG);
  1259. list.concat(taicpu.op_reg_reg(A_MR, NR_OLD_STACK_POINTER_REG, NR_STACK_POINTER_REG));
  1260. end;
  1261. { create stack frame }
  1262. if (not nostackframe) and (localsize > 0) then begin
  1263. if (localsize <= high(smallint)) then begin
  1264. reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
  1265. a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href);
  1266. end else begin
  1267. reference_reset_base(href, NR_NO, -localsize);
  1268. { Use R0 for loading the constant (which is definitely > 32k when entering
  1269. this branch).
  1270. Inlined at this position because it must not use temp registers because
  1271. register allocations have already been done }
  1272. { Code template:
  1273. lis r0,ofs@highest
  1274. ori r0,r0,ofs@higher
  1275. sldi r0,r0,32
  1276. oris r0,r0,ofs@h
  1277. ori r0,r0,ofs@l
  1278. }
  1279. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1280. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1281. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1282. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1283. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1284. list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
  1285. end;
  1286. end;
  1287. { CR register not used by FPC atm }
  1288. { keep R1 allocated??? }
  1289. a_reg_dealloc(list, NR_R0);
  1290. end;
  1291. { Generates the exit code for a method.
  1292. This procedure may be called before, as well as after g_stackframe_entry
  1293. is called.
  1294. IMPORTANT: registers are not to be allocated through the register
  1295. allocator here, because the register colouring has already occured !!
  1296. }
  1297. procedure tcgppc.g_proc_exit(list: TAsmList; parasize: longint; nostackframe:
  1298. boolean);
  1299. var
  1300. firstregfpu, firstreggpr: TSuperRegister;
  1301. needslinkreg : boolean;
  1302. fprcount, gprcount: aint;
  1303. { Restore standard registers, both FPR and GPR; does not support VMX/Altivec }
  1304. procedure restore_standard_registers;
  1305. var
  1306. { flag indicating whether we need to manually add the exit code (e.g. blr instruction)
  1307. or not }
  1308. needsExitCode : Boolean;
  1309. href : treference;
  1310. regcount : TSuperRegister;
  1311. begin
  1312. { there are two ways to do this: manually, by generating a few "ld" instructions,
  1313. or via the restore helper functions. The latter are selected by the -Og switch,
  1314. i.e. "optimize for size" }
  1315. if (cs_opt_size in aktoptimizerswitches) then begin
  1316. needsExitCode := false;
  1317. if ((fprcount > 0) and (gprcount > 0)) then begin
  1318. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1319. a_call_name_direct(list, '_restgpr1_' + intToStr(32-gprcount), false, false, false);
  1320. a_jmp_name(list, '_restfpr_' + intToStr(32-fprcount));
  1321. end else if (gprcount > 0) then
  1322. a_jmp_name(list, '_restgpr0_' + intToStr(32-gprcount))
  1323. else if (fprcount > 0) then
  1324. a_jmp_name(list, '_restfpr_' + intToStr(32-fprcount))
  1325. else
  1326. needsExitCode := true;
  1327. end else begin
  1328. needsExitCode := true;
  1329. { restore registers, FPU first, GPR next }
  1330. reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
  1331. if (fprcount > 0) then
  1332. for regcount := RS_F31 downto firstregfpu do begin
  1333. a_loadfpu_ref_reg(list, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount,
  1334. R_SUBNONE));
  1335. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1336. end;
  1337. if (gprcount > 0) then
  1338. for regcount := RS_R31 downto firstreggpr do begin
  1339. a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount,
  1340. R_SUBNONE));
  1341. dec(href.offset, tcgsize2size[OS_INT]);
  1342. end;
  1343. { VMX not supported by FPC atm }
  1344. end;
  1345. if (needsExitCode) then begin
  1346. { restore LR (if needed) }
  1347. if (needslinkreg) then begin
  1348. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1349. list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
  1350. list.concat(taicpu.op_reg(A_MTLR, NR_R0));
  1351. end;
  1352. { generate return instruction }
  1353. list.concat(taicpu.op_none(A_BLR));
  1354. end;
  1355. end;
  1356. var
  1357. href: treference;
  1358. localsize : aint;
  1359. begin
  1360. calcFirstUsedFPR(firstregfpu, fprcount);
  1361. calcFirstUsedGPR(firstreggpr, gprcount);
  1362. { determine whether we need to restore the link register }
  1363. needslinkreg :=
  1364. ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)) or
  1365. ((cs_opt_size in aktoptimizerswitches) and ((fprcount > 0) or (gprcount > 0))) or
  1366. ([cs_lineinfo, cs_debuginfo] * aktmoduleswitches <> []);
  1367. { calculate stack frame }
  1368. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1369. gprcount, fprcount);
  1370. { CR register not supported }
  1371. { restore stack pointer }
  1372. if (not nostackframe) and (localsize > 0) then begin
  1373. if (localsize <= high(smallint)) then begin
  1374. list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
  1375. end else begin
  1376. reference_reset_base(href, NR_NO, localsize);
  1377. { use R0 for loading the constant (which is definitely > 32k when entering
  1378. this branch)
  1379. Inlined because it must not use temp registers because register allocations
  1380. have already been done
  1381. }
  1382. { Code template:
  1383. lis r0,ofs@highest
  1384. ori r0,ofs@higher
  1385. sldi r0,r0,32
  1386. oris r0,r0,ofs@h
  1387. ori r0,r0,ofs@l
  1388. }
  1389. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1390. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1391. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1392. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1393. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1394. list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0));
  1395. end;
  1396. end;
  1397. restore_standard_registers;
  1398. end;
  1399. procedure tcgppc.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r:
  1400. tregister);
  1401. var
  1402. ref2, tmpref: treference;
  1403. { register used to construct address }
  1404. tempreg : TRegister;
  1405. begin
  1406. ref2 := ref;
  1407. fixref(list, ref2, OS_64);
  1408. { load a symbol }
  1409. if (assigned(ref2.symbol) or (hasLargeOffset(ref2))) then begin
  1410. { add the symbol's value to the base of the reference, and if the }
  1411. { reference doesn't have a base, create one }
  1412. reference_reset(tmpref);
  1413. tmpref.offset := ref2.offset;
  1414. tmpref.symbol := ref2.symbol;
  1415. tmpref.relsymbol := ref2.relsymbol;
  1416. { load 64 bit reference into r. If the reference already has a base register,
  1417. first load the 64 bit value into a temp register, then add it to the result
  1418. register rD }
  1419. if (ref2.base <> NR_NO) then begin
  1420. { already have a base register, so allocate a new one }
  1421. tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1422. end else begin
  1423. tempreg := r;
  1424. end;
  1425. { code for loading a reference from a symbol into a register rD }
  1426. (*
  1427. lis rX,SYM@highest
  1428. ori rX,SYM@higher
  1429. sldi rX,rX,32
  1430. oris rX,rX,SYM@h
  1431. ori rX,rX,SYM@l
  1432. *)
  1433. {$IFDEF EXTDEBUG}
  1434. list.concat(tai_comment.create(strpnew('loadaddr_ref_reg ')));
  1435. {$ENDIF EXTDEBUG}
  1436. if (assigned(tmpref.symbol)) then begin
  1437. tmpref.refaddr := addr_highest;
  1438. list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref));
  1439. tmpref.refaddr := addr_higher;
  1440. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1441. list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32));
  1442. tmpref.refaddr := addr_high;
  1443. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref));
  1444. tmpref.refaddr := addr_low;
  1445. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1446. end else
  1447. a_load_const_reg(list, OS_ADDR, tmpref.offset, tempreg);
  1448. { if there's already a base register, add the temp register contents to
  1449. the base register }
  1450. if (ref2.base <> NR_NO) then begin
  1451. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
  1452. end;
  1453. end else if (ref2.offset <> 0) then begin
  1454. { no symbol, but offset <> 0 }
  1455. if (ref2.base <> NR_NO) then begin
  1456. a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
  1457. { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
  1458. occurs, so now only ref.offset has to be loaded }
  1459. end else begin
  1460. a_load_const_reg(list, OS_64, ref2.offset, r);
  1461. end;
  1462. end else if (ref2.index <> NR_NO) then begin
  1463. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index))
  1464. end else if (ref2.base <> NR_NO) and
  1465. (r <> ref2.base) then begin
  1466. a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r)
  1467. //list.concat(taicpu.op_reg_reg(A_MR, ref2.base, r));
  1468. end else begin
  1469. list.concat(taicpu.op_reg_const(A_LI, r, 0));
  1470. end;
  1471. end;
  1472. { ************* concatcopy ************ }
  1473. const
  1474. maxmoveunit = 8;
  1475. procedure tcgppc.g_concatcopy(list: TAsmList; const source, dest: treference;
  1476. len: aint);
  1477. var
  1478. countreg, tempreg: TRegister;
  1479. src, dst: TReference;
  1480. lab: tasmlabel;
  1481. count, count2: longint;
  1482. size: tcgsize;
  1483. begin
  1484. {$IFDEF extdebug}
  1485. if len > high(aint) then
  1486. internalerror(2002072704);
  1487. list.concat(tai_comment.create(strpnew('g_concatcopy1 ' + inttostr(len) + ' bytes left ')));
  1488. {$ENDIF extdebug}
  1489. { if the references are equal, exit, there is no need to copy anything }
  1490. if (references_equal(source, dest)) then
  1491. exit;
  1492. { make sure short loads are handled as optimally as possible;
  1493. note that the data here never overlaps, so we can do a forward
  1494. copy at all times.
  1495. NOTE: maybe use some scratch registers to pair load/store instructions
  1496. }
  1497. if (len <= maxmoveunit) then begin
  1498. src := source; dst := dest;
  1499. {$IFDEF extdebug}
  1500. list.concat(tai_comment.create(strpnew('g_concatcopy3 ' + inttostr(src.offset) + ' ' + inttostr(dst.offset))));
  1501. {$ENDIF extdebug}
  1502. while (len <> 0) do begin
  1503. if (len = 8) then begin
  1504. a_load_ref_ref(list, OS_64, OS_64, src, dst);
  1505. dec(len, 8);
  1506. end else if (len >= 4) then begin
  1507. a_load_ref_ref(list, OS_32, OS_32, src, dst);
  1508. inc(src.offset, 4); inc(dst.offset, 4);
  1509. dec(len, 4);
  1510. end else if (len >= 2) then begin
  1511. a_load_ref_ref(list, OS_16, OS_16, src, dst);
  1512. inc(src.offset, 2); inc(dst.offset, 2);
  1513. dec(len, 2);
  1514. end else begin
  1515. a_load_ref_ref(list, OS_8, OS_8, src, dst);
  1516. inc(src.offset, 1); inc(dst.offset, 1);
  1517. dec(len, 1);
  1518. end;
  1519. end;
  1520. exit;
  1521. end;
  1522. {$IFDEF extdebug}
  1523. list.concat(tai_comment.create(strpnew('g_concatcopy2 ' + inttostr(len) + ' bytes left ')));
  1524. {$ENDIF extdebug}
  1525. count := len div maxmoveunit;
  1526. reference_reset(src);
  1527. reference_reset(dst);
  1528. { load the address of source into src.base }
  1529. if (count > 4) or
  1530. not issimpleref(source) or
  1531. ((source.index <> NR_NO) and
  1532. ((source.offset + len) > high(smallint))) then begin
  1533. src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1534. a_loadaddr_ref_reg(list, source, src.base);
  1535. end else begin
  1536. src := source;
  1537. end;
  1538. { load the address of dest into dst.base }
  1539. if (count > 4) or
  1540. not issimpleref(dest) or
  1541. ((dest.index <> NR_NO) and
  1542. ((dest.offset + len) > high(smallint))) then begin
  1543. dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1544. a_loadaddr_ref_reg(list, dest, dst.base);
  1545. end else begin
  1546. dst := dest;
  1547. end;
  1548. { generate a loop }
  1549. if count > 4 then begin
  1550. { the offsets are zero after the a_loadaddress_ref_reg and just
  1551. have to be set to 8. I put an Inc there so debugging may be
  1552. easier (should offset be different from zero here, it will be
  1553. easy to notice in the generated assembler }
  1554. inc(dst.offset, 8);
  1555. inc(src.offset, 8);
  1556. list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
  1557. list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
  1558. countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1559. a_load_const_reg(list, OS_64, count, countreg);
  1560. { explicitely allocate F0 since it can be used safely here
  1561. (for holding date that's being copied) }
  1562. a_reg_alloc(list, NR_F0);
  1563. current_asmdata.getjumplabel(lab);
  1564. a_label(list, lab);
  1565. list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
  1566. list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src));
  1567. list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst));
  1568. a_jmp(list, A_BC, C_NE, 0, lab);
  1569. a_reg_dealloc(list, NR_F0);
  1570. len := len mod 8;
  1571. end;
  1572. count := len div 8;
  1573. { unrolled loop }
  1574. if count > 0 then begin
  1575. a_reg_alloc(list, NR_F0);
  1576. for count2 := 1 to count do begin
  1577. a_loadfpu_ref_reg(list, OS_F64, src, NR_F0);
  1578. a_loadfpu_reg_ref(list, OS_F64, NR_F0, dst);
  1579. inc(src.offset, 8);
  1580. inc(dst.offset, 8);
  1581. end;
  1582. a_reg_dealloc(list, NR_F0);
  1583. len := len mod 8;
  1584. end;
  1585. if (len and 4) <> 0 then begin
  1586. a_reg_alloc(list, NR_R0);
  1587. a_load_ref_reg(list, OS_32, OS_32, src, NR_R0);
  1588. a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst);
  1589. inc(src.offset, 4);
  1590. inc(dst.offset, 4);
  1591. a_reg_dealloc(list, NR_R0);
  1592. end;
  1593. { copy the leftovers }
  1594. if (len and 2) <> 0 then begin
  1595. a_reg_alloc(list, NR_R0);
  1596. a_load_ref_reg(list, OS_16, OS_16, src, NR_R0);
  1597. a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst);
  1598. inc(src.offset, 2);
  1599. inc(dst.offset, 2);
  1600. a_reg_dealloc(list, NR_R0);
  1601. end;
  1602. if (len and 1) <> 0 then begin
  1603. a_reg_alloc(list, NR_R0);
  1604. a_load_ref_reg(list, OS_8, OS_8, src, NR_R0);
  1605. a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst);
  1606. a_reg_dealloc(list, NR_R0);
  1607. end;
  1608. end;
  1609. procedure tcgppc.g_overflowcheck(list: TAsmList; const l: tlocation; def:
  1610. tdef);
  1611. var
  1612. hl: tasmlabel;
  1613. flags : TResFlags;
  1614. begin
  1615. if not (cs_check_overflow in aktlocalswitches) then
  1616. exit;
  1617. current_asmdata.getjumplabel(hl);
  1618. if not ((def.deftype = pointerdef) or
  1619. ((def.deftype = orddef) and
  1620. (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
  1621. bool8bit, bool16bit, bool32bit]))) then
  1622. begin
  1623. { ... instructions setting overflow flag ...
  1624. mfxerf R0
  1625. mtcrf 128, R0
  1626. ble cr0, label }
  1627. list.concat(taicpu.op_reg(A_MFXER, NR_R0));
  1628. list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
  1629. flags.cr := RS_CR0;
  1630. flags.flag := F_LE;
  1631. a_jmp_flags(list, flags, hl);
  1632. end else
  1633. a_jmp_cond(list, OC_AE, hl);
  1634. a_call_name(list, 'FPC_OVERFLOW');
  1635. a_label(list, hl);
  1636. end;
  1637. procedure tcgppc.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const
  1638. labelname: string; ioffset: longint);
  1639. procedure loadvmttor11;
  1640. var
  1641. href: treference;
  1642. begin
  1643. reference_reset_base(href, NR_R3, 0);
  1644. cg.a_load_ref_reg(list, OS_ADDR, OS_ADDR, href, NR_R11);
  1645. end;
  1646. procedure op_onr11methodaddr;
  1647. var
  1648. href: treference;
  1649. begin
  1650. if (procdef.extnumber = $FFFF) then
  1651. Internalerror(200006139);
  1652. { call/jmp vmtoffs(%eax) ; method offs }
  1653. reference_reset_base(href, NR_R11,
  1654. procdef._class.vmtmethodoffset(procdef.extnumber));
  1655. if not (hasLargeOffset(href)) then begin
  1656. list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
  1657. smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
  1658. 0))));
  1659. href.offset := smallint(href.offset and $FFFF);
  1660. end else
  1661. { add support for offsets > 16 bit }
  1662. internalerror(200510201);
  1663. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1664. { the loaded reference is a function descriptor reference, so deref again
  1665. (at ofs 0 there's the real pointer) }
  1666. {$warning ts:TODO: update GOT reference}
  1667. reference_reset_base(href, NR_R11, 0);
  1668. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1669. list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
  1670. list.concat(taicpu.op_none(A_BCTR));
  1671. { NOP needed for the linker...? }
  1672. list.concat(taicpu.op_none(A_NOP));
  1673. end;
  1674. var
  1675. make_global: boolean;
  1676. begin
  1677. if (not (procdef.proctypeoption in [potype_function, potype_procedure])) then
  1678. Internalerror(200006137);
  1679. if not assigned(procdef._class) or
  1680. (procdef.procoptions * [po_classmethod, po_staticmethod,
  1681. po_methodpointer, po_interrupt, po_iocheck] <> []) then
  1682. Internalerror(200006138);
  1683. if procdef.owner.symtabletype <> objectsymtable then
  1684. Internalerror(200109191);
  1685. make_global := false;
  1686. if (not current_module.is_unit) or
  1687. (cs_create_smart in aktmoduleswitches) or
  1688. (procdef.owner.defowner.owner.symtabletype = globalsymtable) then
  1689. make_global := true;
  1690. if make_global then
  1691. List.concat(Tai_symbol.Createname_global(labelname, AT_FUNCTION, 0))
  1692. else
  1693. List.concat(Tai_symbol.Createname(labelname, AT_FUNCTION, 0));
  1694. { set param1 interface to self }
  1695. g_adjust_self_value(list, procdef, ioffset);
  1696. if po_virtualmethod in procdef.procoptions then begin
  1697. loadvmttor11;
  1698. op_onr11methodaddr;
  1699. end else
  1700. {$note ts:todo add GOT change?? - think not needed :) }
  1701. list.concat(taicpu.op_sym(A_B,
  1702. current_asmdata.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,
  1703. AT_FUNCTION)));
  1704. List.concat(Tai_symbol_end.Createname(labelname));
  1705. end;
  1706. {***************** This is private property, keep out! :) *****************}
  1707. function tcgppc.issimpleref(const ref: treference): boolean;
  1708. begin
  1709. if (ref.base = NR_NO) and
  1710. (ref.index <> NR_NO) then
  1711. internalerror(200208101);
  1712. result :=
  1713. not (assigned(ref.symbol)) and
  1714. (((ref.index = NR_NO) and
  1715. (ref.offset >= low(smallint)) and
  1716. (ref.offset <= high(smallint))) or
  1717. ((ref.index <> NR_NO) and
  1718. (ref.offset = 0)));
  1719. end;
  1720. function tcgppc.load_got_symbol(list: TAsmList; symbol : string) : tregister;
  1721. var
  1722. l: tasmsymbol;
  1723. ref: treference;
  1724. symname : string;
  1725. begin
  1726. maybe_new_object_file(current_asmdata.asmlists[al_picdata]);
  1727. symname := '_$' + current_asmdata.name + '$got$' + symbol;
  1728. l:=current_asmdata.getasmsymbol(symname);
  1729. if not(assigned(l)) then begin
  1730. l:=current_asmdata.newasmsymbol(symname, AB_COMMON, AT_DATA);
  1731. current_asmdata.asmlists[al_picdata].concat(tai_section.create(sec_toc, '.toc', 8));
  1732. current_asmdata.asmlists[al_picdata].concat(tai_symbol.create_global(l,0));
  1733. current_asmdata.asmlists[al_picdata].concat(tai_directive.create(asd_toc_entry, symbol + '[TC], ' + symbol));
  1734. end;
  1735. reference_reset_symbol(ref,l,0);
  1736. ref.base := NR_R2;
  1737. ref.refaddr := addr_pic;
  1738. result := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1739. {$IFDEF EXTDEBUG}
  1740. list.concat(tai_comment.create(strpnew('loading got reference for ' + symbol)));
  1741. {$ENDIF EXTDEBUG}
  1742. // cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,result);
  1743. list.concat(taicpu.op_reg_ref(A_LD, result, ref));
  1744. end;
  1745. function tcgppc.fixref(list: TAsmList; var ref: treference; const size : TCgsize): boolean;
  1746. var
  1747. tmpreg: tregister;
  1748. name : string;
  1749. begin
  1750. result := false;
  1751. { Avoids recursion. }
  1752. if (ref.refaddr = addr_pic) then exit;
  1753. {$IFDEF EXTDEBUG}
  1754. list.concat(tai_comment.create(strpnew('fixref0 ' + ref2string(ref))));
  1755. {$ENDIF EXTDEBUG}
  1756. { if we have to create PIC, add the symbol to the TOC/GOT }
  1757. if (cs_create_pic in aktmoduleswitches) and (assigned(ref.symbol)) then begin
  1758. tmpreg := load_got_symbol(list, ref.symbol.name);
  1759. if (ref.base = NR_NO) then
  1760. ref.base := tmpreg
  1761. else if (ref.index = NR_NO) then
  1762. ref.index := tmpreg
  1763. else begin
  1764. a_op_reg_reg_reg(list, OP_ADD, OS_ADDR, ref.base, tmpreg, tmpreg);
  1765. ref.base := tmpreg;
  1766. end;
  1767. ref.symbol := nil;
  1768. {$IFDEF EXTDEBUG}
  1769. list.concat(tai_comment.create(strpnew('fixref-pic ' + ref2string(ref))));
  1770. {$ENDIF EXTDEBUG}
  1771. end;
  1772. if (ref.base = NR_NO) then begin
  1773. ref.base := ref.index;
  1774. ref.index := NR_NO;
  1775. end;
  1776. if (ref.base <> NR_NO) and (ref.index <> NR_NO) and
  1777. ((ref.offset <> 0) or assigned(ref.symbol)) then begin
  1778. result := true;
  1779. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1780. a_op_reg_reg_reg(list, OP_ADD, size, ref.base, ref.index, tmpreg);
  1781. ref.base := tmpreg;
  1782. ref.index := NR_NO;
  1783. end;
  1784. if (ref.index <> NR_NO) and (assigned(ref.symbol) or (ref.offset <> 0)) then
  1785. internalerror(2006010506);
  1786. {$IFDEF EXTDEBUG}
  1787. list.concat(tai_comment.create(strpnew('fixref1 ' + ref2string(ref))));
  1788. {$ENDIF EXTDEBUG}
  1789. end;
  1790. procedure tcgppc.a_load_store(list: TAsmList; op: tasmop; reg: tregister;
  1791. ref: treference);
  1792. var
  1793. tmpreg, tmpreg2: tregister;
  1794. tmpref: treference;
  1795. largeOffset: Boolean;
  1796. begin
  1797. { at this point there must not be a combination of values in the ref treference
  1798. which is not possible to directly map to instructions of the PowerPC architecture }
  1799. if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
  1800. internalerror(200310131);
  1801. { if this is a PIC'ed address, handle it and exit }
  1802. if (ref.refaddr = addr_pic) then begin
  1803. if (ref.offset <> 0) then
  1804. internalerror(2006010501);
  1805. if (ref.index <> NR_NO) then
  1806. internalerror(2006010502);
  1807. if (not assigned(ref.symbol)) then
  1808. internalerror(200601050);
  1809. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1810. exit;
  1811. end;
  1812. { for some instructions we need to check that the offset is divisible by at
  1813. least four. If not, add the bytes which are "off" to the base register and
  1814. adjust the offset accordingly }
  1815. case op of
  1816. A_LD, A_LDU, A_STD, A_STDU, A_LWA :
  1817. if ((ref.offset mod 4) <> 0) then begin
  1818. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1819. if (ref.base <> NR_NO) then begin
  1820. a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
  1821. ref.base := tmpreg;
  1822. end else begin
  1823. list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
  1824. ref.base := tmpreg;
  1825. end;
  1826. ref.offset := (ref.offset div 4) * 4;
  1827. end;
  1828. end;
  1829. {$IFDEF EXTDEBUG}
  1830. list.concat(tai_comment.create(strpnew('a_load_store1 ' + BoolToStr(ref.refaddr = addr_pic))));
  1831. {$ENDIF EXTDEBUG}
  1832. { if we have to load/store from a symbol or large addresses, use a temporary register
  1833. containing the address }
  1834. if (assigned(ref.symbol) or (hasLargeOffset(ref))) then begin
  1835. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1836. if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
  1837. ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1838. a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
  1839. ref.offset := 0;
  1840. end;
  1841. reference_reset(tmpref);
  1842. tmpref.symbol := ref.symbol;
  1843. tmpref.relsymbol := ref.relsymbol;
  1844. tmpref.offset := ref.offset;
  1845. if (ref.base <> NR_NO) then begin
  1846. { As long as the TOC isn't working we try to achieve highest speed (in this
  1847. case by allowing instructions execute in parallel) as possible at the cost
  1848. of using another temporary register. So the code template when there is
  1849. a base register and an offset is the following:
  1850. lis rT1, SYM+offs@highest
  1851. ori rT1, rT1, SYM+offs@higher
  1852. lis rT2, SYM+offs@hi
  1853. ori rT2, SYM+offs@lo
  1854. rldimi rT2, rT1, 32
  1855. <op>X reg, base, rT2
  1856. }
  1857. tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1858. if (assigned(tmpref.symbol)) then begin
  1859. tmpref.refaddr := addr_highest;
  1860. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1861. tmpref.refaddr := addr_higher;
  1862. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1863. tmpref.refaddr := addr_high;
  1864. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
  1865. tmpref.refaddr := addr_low;
  1866. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
  1867. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
  1868. end else
  1869. a_load_const_reg(list, OS_ADDR, tmpref.offset, tmpreg2);
  1870. reference_reset(tmpref);
  1871. tmpref.base := ref.base;
  1872. tmpref.index := tmpreg2;
  1873. case op of
  1874. { the code generator doesn't generate update instructions anyway, so
  1875. error out on those instructions }
  1876. A_LBZ : op := A_LBZX;
  1877. A_LHZ : op := A_LHZX;
  1878. A_LWZ : op := A_LWZX;
  1879. A_LD : op := A_LDX;
  1880. A_LHA : op := A_LHAX;
  1881. A_LWA : op := A_LWAX;
  1882. A_LFS : op := A_LFSX;
  1883. A_LFD : op := A_LFDX;
  1884. A_STB : op := A_STBX;
  1885. A_STH : op := A_STHX;
  1886. A_STW : op := A_STWX;
  1887. A_STD : op := A_STDX;
  1888. A_STFS : op := A_STFSX;
  1889. A_STFD : op := A_STFDX;
  1890. else
  1891. { unknown load/store opcode }
  1892. internalerror(2005101302);
  1893. end;
  1894. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1895. end else begin
  1896. { when accessing value from a reference without a base register, use the
  1897. following code template:
  1898. lis rT,SYM+offs@highesta
  1899. ori rT,SYM+offs@highera
  1900. sldi rT,rT,32
  1901. oris rT,rT,SYM+offs@ha
  1902. ld rD,SYM+offs@l(rT)
  1903. }
  1904. tmpref.refaddr := addr_highesta;
  1905. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1906. tmpref.refaddr := addr_highera;
  1907. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1908. list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32));
  1909. tmpref.refaddr := addr_higha;
  1910. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
  1911. tmpref.base := tmpreg;
  1912. tmpref.refaddr := addr_low;
  1913. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1914. end;
  1915. end else begin
  1916. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1917. end;
  1918. end;
  1919. procedure tcgppc.a_jmp(list: TAsmList; op: tasmop; c: tasmcondflag;
  1920. crval: longint; l: tasmlabel);
  1921. var
  1922. p: taicpu;
  1923. begin
  1924. p := taicpu.op_sym(op, current_asmdata.newasmsymbol(l.name, AB_EXTERNAL,
  1925. AT_LABEL));
  1926. if op <> A_B then
  1927. create_cond_norm(c, crval, p.condition);
  1928. p.is_jmp := true;
  1929. list.concat(p)
  1930. end;
  1931. function tcgppc.hasLargeOffset(const ref : TReference) : Boolean; {$ifdef ver2_0}inline;{$endif}
  1932. begin
  1933. { this rather strange calculation is required because offsets of TReferences are unsigned }
  1934. result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
  1935. end;
  1936. procedure tcgppc.loadConstantPIC(list : TAsmList; size : TCGSize; a : aint; reg : TRegister);
  1937. var
  1938. l: tasmsymbol;
  1939. ref: treference;
  1940. symname : string;
  1941. begin
  1942. maybe_new_object_file(current_asmdata.asmlists[al_picdata]);
  1943. symname := '_$' + current_asmdata.name + '$toc$' + hexstr(a, sizeof(a)*2);
  1944. l:=current_asmdata.getasmsymbol(symname);
  1945. if not(assigned(l)) then begin
  1946. l:=current_asmdata.newasmsymbol(symname,AB_GLOBAL, AT_DATA);
  1947. current_asmdata.asmlists[al_picdata].concat(tai_section.create(sec_toc, '.toc', 8));
  1948. current_asmdata.asmlists[al_picdata].concat(tai_symbol.create_global(l,0));
  1949. current_asmdata.asmlists[al_picdata].concat(tai_directive.create(asd_toc_entry, symname + '[TC], ' + inttostr(a)));
  1950. end;
  1951. reference_reset_symbol(ref,l,0);
  1952. ref.base := NR_R2;
  1953. ref.refaddr := addr_pic;
  1954. {$IFDEF EXTDEBUG}
  1955. list.concat(tai_comment.create(strpnew('loading value from TOC reference for ' + symname)));
  1956. {$ENDIF EXTDEBUG}
  1957. cg.a_load_ref_reg(list, OS_INT, OS_INT, ref, reg);
  1958. end;
  1959. begin
  1960. cg := tcgppc.create;
  1961. end.