cgcpu.pas 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. This unit implements the code generator for the PowerPC
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cgcpu;
  18. {$I fpcdefs.inc}
  19. interface
  20. uses
  21. globtype, symtype, symdef,
  22. cgbase, cgobj,
  23. aasmbase, aasmcpu, aasmtai,
  24. cpubase, cpuinfo, cgutils, rgcpu,
  25. parabase;
  26. type
  27. tcgppc = class(tcg)
  28. procedure init_register_allocators; override;
  29. procedure done_register_allocators; override;
  30. { passing parameters, per default the parameter is pushed }
  31. { nr gives the number of the parameter (enumerated from }
  32. { left to right), this allows to move the parameter to }
  33. { register, if the cpu supports register calling }
  34. { conventions }
  35. procedure a_param_const(list: taasmoutput; size: tcgsize; a: aint; const
  36. paraloc: tcgpara); override;
  37. procedure a_param_ref(list: taasmoutput; size: tcgsize; const r: treference;
  38. const paraloc: tcgpara); override;
  39. procedure a_paramaddr_ref(list: taasmoutput; const r: treference; const
  40. paraloc: tcgpara); override;
  41. procedure a_call_name(list: taasmoutput; const s: string); override;
  42. procedure a_call_reg(list: taasmoutput; reg: tregister); override;
  43. procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
  44. aint; reg: TRegister); override;
  45. procedure a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src,
  46. dst: TRegister); override;
  47. procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
  48. size: tcgsize; a: aint; src, dst: tregister); override;
  49. procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
  50. size: tcgsize; src1, src2, dst: tregister); override;
  51. { move instructions }
  52. procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg:
  53. tregister); override;
  54. { stores the contents of register reg to the memory location described by
  55. ref }
  56. procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg:
  57. tregister; const ref: treference); override;
  58. { loads the memory pointed to by ref into register reg }
  59. procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const
  60. Ref: treference; reg: tregister); override;
  61. procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1,
  62. reg2: tregister); override;
  63. { fpu move instructions }
  64. procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
  65. tregister); override;
  66. procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
  67. treference; reg: tregister); override;
  68. procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
  69. tregister; const ref: treference); override;
  70. { comparison operations }
  71. procedure a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
  72. topcmp; a: aint; reg: tregister;
  73. l: tasmlabel); override;
  74. procedure a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
  75. topcmp; reg1, reg2: tregister; l: tasmlabel); override;
  76. procedure a_jmp_name(list: taasmoutput; const s: string); override;
  77. procedure a_jmp_always(list: taasmoutput; l: tasmlabel); override;
  78. procedure a_jmp_flags(list: taasmoutput; const f: TResFlags; l: tasmlabel);
  79. override;
  80. procedure g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags;
  81. reg: TRegister); override;
  82. procedure g_proc_entry(list: taasmoutput; localsize: longint; nostackframe:
  83. boolean); override;
  84. procedure g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
  85. boolean); override;
  86. procedure g_save_standard_registers(list: Taasmoutput); override;
  87. procedure g_restore_standard_registers(list: Taasmoutput); override;
  88. procedure a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
  89. tregister); override;
  90. procedure g_concatcopy(list: taasmoutput; const source, dest: treference;
  91. len: aint); override;
  92. procedure g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef);
  93. override;
  94. procedure a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel);
  95. procedure g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
  96. labelname: string; ioffset: longint); override;
  97. private
  98. { Make sure ref is a valid reference for the PowerPC and sets the }
  99. { base to the value of the index if (base = R_NO). }
  100. { Returns true if the reference contained a base, index and an }
  101. { offset or symbol, in which case the base will have been changed }
  102. { to a tempreg (which has to be freed by the caller) containing }
  103. { the sum of part of the original reference }
  104. function fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
  105. { returns whether a reference can be used immediately in a powerpc }
  106. { instruction }
  107. function issimpleref(const ref: treference): boolean;
  108. { contains the common code of a_load_reg_ref and a_load_ref_reg }
  109. procedure a_load_store(list: taasmoutput; op: tasmop; reg: tregister;
  110. ref: treference);
  111. { creates the correct branch instruction for a given combination }
  112. { of asmcondflags and destination addressing mode }
  113. procedure a_jmp(list: taasmoutput; op: tasmop;
  114. c: tasmcondflag; crval: longint; l: tasmlabel);
  115. { returns the lowest numbered FP register in use, and the number of used FP registers
  116. for the current procedure }
  117. procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  118. { returns the lowest numbered GP register in use, and the number of used GP registers
  119. for the current procedure }
  120. procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  121. { returns true if the offset of the given reference can not be represented by a 16 bit
  122. immediate as required by some PowerPC instructions }
  123. function hasLargeOffset(const ref : TReference) : Boolean; inline;
  124. procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
  125. end;
  126. const
  127. TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
  128. (A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
  129. );
  130. TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT,
  131. C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT);
  132. implementation
  133. uses
  134. sysutils,
  135. globals, verbose, systems, cutils,
  136. symconst, symsym, fmodule,
  137. rgobj, tgobj, cpupi, procinfo, paramgr;
  138. { helper function which calculate "magic" values for replacement of unsigned
  139. division by constant operation by multiplication. See the PowerPC compiler
  140. developer manual for more information }
  141. procedure getmagic_unsignedN(const N : byte; const d : aWord;
  142. out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
  143. var
  144. p : aInt;
  145. nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
  146. begin
  147. assert(d > 0);
  148. two_N_minus_1 := aWord(1) shl (N-1);
  149. magic_add := false;
  150. nc := - 1 - (-d) mod d;
  151. p := N-1; { initialize p }
  152. q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
  153. r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
  154. q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
  155. r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
  156. repeat
  157. inc(p);
  158. if (r1 >= (nc - r1)) then begin
  159. q1 := 2 * q1 + 1; { update q1 }
  160. r1 := 2*r1 - nc; { update r1 }
  161. end else begin
  162. q1 := 2*q1; { update q1 }
  163. r1 := 2*r1; { update r1 }
  164. end;
  165. if ((r2 + 1) >= (d - r2)) then begin
  166. if (q2 >= (two_N_minus_1-1)) then
  167. magic_add := true;
  168. q2 := 2*q2 + 1; { update q2 }
  169. r2 := 2*r2 + 1 - d; { update r2 }
  170. end else begin
  171. if (q2 >= two_N_minus_1) then
  172. magic_add := true;
  173. q2 := 2*q2; { update q2 }
  174. r2 := 2*r2 + 1; { update r2 }
  175. end;
  176. delta := d - 1 - r2;
  177. until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
  178. magic_m := q2 + 1; { resulting magic number }
  179. magic_shift := p - N; { resulting shift }
  180. end;
  181. { helper function which calculate "magic" values for replacement of signed
  182. division by constant operation by multiplication. See the PowerPC compiler
  183. developer manual for more information }
  184. procedure getmagic_signedN(const N : byte; const d : aInt;
  185. out magic_m : aInt; out magic_s : aInt);
  186. var
  187. p : aInt;
  188. ad, anc, delta, q1, r1, q2, r2, t : aWord;
  189. two_N_minus_1 : aWord;
  190. begin
  191. assert((d < -1) or (d > 1));
  192. two_N_minus_1 := aWord(1) shl (N-1);
  193. ad := abs(d);
  194. t := two_N_minus_1 + (aWord(d) shr (N-1));
  195. anc := t - 1 - t mod ad; { absolute value of nc }
  196. p := (N-1); { initialize p }
  197. q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
  198. r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
  199. q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
  200. r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
  201. repeat
  202. inc(p);
  203. q1 := 2*q1; { update q1 = 2p/abs(nc) }
  204. r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
  205. if (r1 >= anc) then begin { must be unsigned comparison }
  206. inc(q1);
  207. dec(r1, anc);
  208. end;
  209. q2 := 2*q2; { update q2 = 2p/abs(d) }
  210. r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
  211. if (r2 >= ad) then begin { must be unsigned comparison }
  212. inc(q2);
  213. dec(r2, ad);
  214. end;
  215. delta := ad - r2;
  216. until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
  217. magic_m := q2 + 1;
  218. if (d < 0) then begin
  219. magic_m := -magic_m; { resulting magic number }
  220. end;
  221. magic_s := p - N; { resulting shift }
  222. end;
  223. { finds positive and negative powers of two of the given value, returning the
  224. power and whether it's a negative power or not in addition to the actual result
  225. of the function }
  226. function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
  227. var
  228. i : longint;
  229. hl : aInt;
  230. begin
  231. neg := false;
  232. { also try to find negative power of two's by negating if the
  233. value is negative. low(aInt) is special because it can not be
  234. negated. Simply return the appropriate values for it }
  235. if (value < 0) then begin
  236. neg := true;
  237. if (value = low(aInt)) then begin
  238. power := sizeof(aInt)*8-1;
  239. result := true;
  240. exit;
  241. end;
  242. value := -value;
  243. end;
  244. if ((value and (value-1)) <> 0) then begin
  245. result := false;
  246. exit;
  247. end;
  248. hl := 1;
  249. for i := 0 to (sizeof(aInt)*8-1) do begin
  250. if (hl = value) then begin
  251. result := true;
  252. power := i;
  253. exit;
  254. end;
  255. hl := hl shl 1;
  256. end;
  257. end;
  258. procedure tcgppc.init_register_allocators;
  259. begin
  260. inherited init_register_allocators;
  261. rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE,
  262. [RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8,
  263. RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29,
  264. RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22,
  265. RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15,
  266. RS_R14, RS_R13], first_int_imreg, []);
  267. rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE,
  268. [RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9,
  269. RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27,
  270. RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18,
  271. RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []);
  272. {$WARNING FIX ME}
  273. rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE,
  274. [RS_M0, RS_M1, RS_M2], first_mm_imreg, []);
  275. end;
  276. procedure tcgppc.done_register_allocators;
  277. begin
  278. rg[R_INTREGISTER].free;
  279. rg[R_FPUREGISTER].free;
  280. rg[R_MMREGISTER].free;
  281. inherited done_register_allocators;
  282. end;
  283. procedure tcgppc.a_param_const(list: taasmoutput; size: tcgsize; a: aint; const
  284. paraloc: tcgpara);
  285. var
  286. ref: treference;
  287. begin
  288. paraloc.check_simple_location;
  289. case paraloc.location^.loc of
  290. LOC_REGISTER, LOC_CREGISTER:
  291. a_load_const_reg(list, size, a, paraloc.location^.register);
  292. LOC_REFERENCE:
  293. begin
  294. reference_reset(ref);
  295. ref.base := paraloc.location^.reference.index;
  296. ref.offset := paraloc.location^.reference.offset;
  297. a_load_const_ref(list, size, a, ref);
  298. end;
  299. else
  300. internalerror(2002081101);
  301. end;
  302. end;
  303. procedure tcgppc.a_param_ref(list: taasmoutput; size: tcgsize; const r:
  304. treference; const paraloc: tcgpara);
  305. var
  306. tmpref, ref: treference;
  307. location: pcgparalocation;
  308. sizeleft: aint;
  309. adjusttail : boolean;
  310. begin
  311. location := paraloc.location;
  312. tmpref := r;
  313. sizeleft := paraloc.intsize;
  314. adjusttail := false;
  315. while assigned(location) do begin
  316. case location^.loc of
  317. LOC_REGISTER, LOC_CREGISTER:
  318. begin
  319. if (size <> OS_NO) then
  320. a_load_ref_reg(list, size, location^.size, tmpref,
  321. location^.register)
  322. else
  323. {$IFDEF extdebug}
  324. list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO')));
  325. {$ENDIF extdebug}
  326. { load non-integral sized memory location into register. This
  327. memory location be 1-sizeleft byte sized.
  328. Always assume that this memory area is properly aligned, eg. start
  329. loading the larger quantities for "odd" quantities first }
  330. case sizeleft of
  331. 1,2,4,8 :
  332. a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
  333. location^.register);
  334. 3 : begin
  335. a_reg_alloc(list, NR_R12);
  336. a_load_ref_reg(list, OS_16, location^.size, tmpref,
  337. NR_R12);
  338. inc(tmpref.offset, tcgsize2size[OS_16]);
  339. a_load_ref_reg(list, OS_8, location^.size, tmpref,
  340. location^.register);
  341. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
  342. a_reg_dealloc(list, NR_R12);
  343. end;
  344. 5 : begin
  345. a_reg_alloc(list, NR_R12);
  346. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  347. inc(tmpref.offset, tcgsize2size[OS_32]);
  348. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  349. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
  350. a_reg_dealloc(list, NR_R12);
  351. end;
  352. 6 : begin
  353. a_reg_alloc(list, NR_R12);
  354. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  355. inc(tmpref.offset, tcgsize2size[OS_32]);
  356. a_load_ref_reg(list, OS_16, location^.size, tmpref, location^.register);
  357. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 16, 16));
  358. a_reg_dealloc(list, NR_R12);
  359. end;
  360. 7 : begin
  361. a_reg_alloc(list, NR_R12);
  362. a_reg_alloc(list, NR_R0);
  363. a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
  364. inc(tmpref.offset, tcgsize2size[OS_32]);
  365. a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
  366. inc(tmpref.offset, tcgsize2size[OS_16]);
  367. a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
  368. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
  369. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
  370. a_reg_dealloc(list, NR_R0);
  371. a_reg_dealloc(list, NR_R12);
  372. end;
  373. else
  374. { still > 8 bytes to load, so load data single register now }
  375. a_load_ref_reg(list, location^.size, location^.size, tmpref,
  376. location^.register);
  377. { the block is > 8 bytes, so we have to store any bytes not
  378. a multiple of the register size beginning with the MSB }
  379. adjusttail := true;
  380. end;
  381. (*
  382. { Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ }
  383. if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
  384. a_op_const_reg(list, OP_SHL, OS_INT,
  385. (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT],
  386. location^.register);
  387. *)
  388. end;
  389. LOC_REFERENCE:
  390. begin
  391. reference_reset_base(ref, location^.reference.index,
  392. location^.reference.offset);
  393. g_concatcopy(list, tmpref, ref, sizeleft);
  394. if assigned(location^.next) then
  395. internalerror(2005010710);
  396. end;
  397. LOC_FPUREGISTER, LOC_CFPUREGISTER:
  398. case location^.size of
  399. OS_F32, OS_F64:
  400. a_loadfpu_ref_reg(list, location^.size, tmpref, location^.register);
  401. else
  402. internalerror(2002072801);
  403. end;
  404. LOC_VOID:
  405. { nothing to do }
  406. ;
  407. else
  408. internalerror(2002081103);
  409. end;
  410. inc(tmpref.offset, tcgsize2size[location^.size]);
  411. dec(sizeleft, tcgsize2size[location^.size]);
  412. location := location^.next;
  413. end;
  414. end;
  415. procedure tcgppc.a_paramaddr_ref(list: taasmoutput; const r: treference; const
  416. paraloc: tcgpara);
  417. var
  418. ref: treference;
  419. tmpreg: tregister;
  420. begin
  421. paraloc.check_simple_location;
  422. case paraloc.location^.loc of
  423. LOC_REGISTER, LOC_CREGISTER:
  424. a_loadaddr_ref_reg(list, r, paraloc.location^.register);
  425. LOC_REFERENCE:
  426. begin
  427. reference_reset(ref);
  428. ref.base := paraloc.location^.reference.index;
  429. ref.offset := paraloc.location^.reference.offset;
  430. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  431. a_loadaddr_ref_reg(list, r, tmpreg);
  432. a_load_reg_ref(list, OS_ADDR, OS_ADDR, tmpreg, ref);
  433. end;
  434. else
  435. internalerror(2002080701);
  436. end;
  437. end;
  438. { calling a procedure by name }
  439. procedure tcgppc.a_call_name(list: taasmoutput; const s: string);
  440. begin
  441. a_call_name_direct(list, s, true, true);
  442. end;
  443. procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
  444. begin
  445. if (prependDot) then
  446. s := '.' + s;
  447. list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
  448. AT_FUNCTION)));
  449. if (addNOP) then
  450. list.concat(taicpu.op_none(A_NOP));
  451. { the compiler does not properly set this flag anymore in pass 1, and
  452. for now we only need it after pass 2 (I hope) (JM) }
  453. include(current_procinfo.flags, pi_do_call);
  454. end;
  455. { calling a procedure by address }
  456. procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister);
  457. var
  458. tmpref: treference;
  459. begin
  460. if (not (cs_littlesize in aktglobalswitches)) then begin
  461. { load actual function entry (reg contains the reference to the function descriptor)
  462. into R0 }
  463. reference_reset_base(tmpref, reg, 0);
  464. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R0);
  465. { save TOC pointer in stackframe }
  466. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  467. a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
  468. { move actual function pointer to CTR register }
  469. list.concat(taicpu.op_reg(A_MTCTR, NR_R0));
  470. { load new TOC pointer from function descriptor into RTOC register }
  471. reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
  472. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  473. { load new environment pointer from function descriptor into R11 register }
  474. reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
  475. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
  476. { call function }
  477. list.concat(taicpu.op_none(A_BCTRL));
  478. end else begin
  479. { call ptrgl helper routine which expects the pointer to the function descriptor
  480. in R11 }
  481. a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
  482. a_call_name_direct(list, '.ptrgl', false, false);
  483. end;
  484. { we need to load the old RTOC from stackframe because we changed it}
  485. reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
  486. a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
  487. include(current_procinfo.flags, pi_do_call);
  488. end;
  489. {********************** load instructions ********************}
  490. procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
  491. reg: TRegister);
  492. { loads a 32 bit constant into the given register, using an optimal instruction sequence.
  493. This is either LIS, LI or LI+ADDIS.
  494. Returns true if during these operations the upper 32 bits were filled with 1 bits (e.g.
  495. sign extension was performed) }
  496. function load32bitconstant(list : taasmoutput; size : TCGSize; a : longint;
  497. reg : TRegister) : boolean;
  498. var
  499. is_half_signed : byte;
  500. begin
  501. { if the lower 16 bits are zero, do a single LIS }
  502. if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
  503. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(hi(a))));
  504. load32bitconstant := longint(a) < 0;
  505. end else begin
  506. is_half_signed := ord(smallint(lo(a)) < 0);
  507. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a and $ffff)));
  508. if smallint(hi(a) + is_half_signed) <> 0 then begin
  509. list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint(hi(a) + is_half_signed)));
  510. end;
  511. load32bitconstant := (smallint(a) < 0) or (a < 0);
  512. end;
  513. end;
  514. { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
  515. the return value. Unused until further testing shows that it is not really necessary;
  516. loading the upper 32 bits of a value is now done using R12, which does not require
  517. special treatment }
  518. procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
  519. reg : TRegister);
  520. begin
  521. { only 16 bit constant? (-2^15 <= a <= +2^15-1) }
  522. if (a >= low(smallint)) and (a <= high(smallint)) then begin
  523. list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
  524. end else begin
  525. { check if we have to start with LI or LIS, load as 32 bit constant }
  526. if ((a and $FFFF) <> 0) then begin
  527. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
  528. list.concat(taicpu.op_reg_reg_const(A_ORI, reg, reg, word(a)));
  529. end else begin
  530. list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
  531. end;
  532. end;
  533. end;
  534. var
  535. extendssign : boolean;
  536. {$IFDEF EXTDEBUG}
  537. astring : string;
  538. {$ENDIF EXTDEBUG}
  539. begin
  540. {$IFDEF EXTDEBUG}
  541. astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
  542. list.concat(tai_comment.create(strpnew(astring)));
  543. {$ENDIF EXTDEBUG}
  544. if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  545. internalerror(2002090902);
  546. if (lo(a) = 0) and (hi(a) <> 0) then begin
  547. { load only upper 32 bits, and shift }
  548. load32bitconstant(list, size, hi(a), reg);
  549. list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));
  550. end else begin
  551. { load lower 32 bits }
  552. extendssign := load32bitconstant(list, size, lo(a), reg);
  553. if (extendssign) and (hi(a) = 0) then
  554. { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic
  555. sign extension, clear those bits }
  556. a_load_reg_reg(list, OS_32, OS_64, reg, reg)
  557. else if (not
  558. ((extendssign and (longint(hi(a)) = -1)) or
  559. ((not extendssign) and (hi(a)=0)))
  560. ) then begin
  561. { only load the upper 32 bits, if the automatic sign extension is not okay,
  562. that is, _not_ if
  563. - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper
  564. 32 bits should contain -1
  565. - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
  566. 32 bits should contain 0 }
  567. load32bitconstant(list, size, hi(a), NR_R12);
  568. { combine both registers }
  569. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
  570. end;
  571. end;
  572. end;
  573. procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
  574. reg: tregister; const ref: treference);
  575. const
  576. StoreInstr: array[OS_8..OS_64, boolean, boolean] of TAsmOp =
  577. { indexed? updating?}
  578. (((A_STB, A_STBU), (A_STBX, A_STBUX)),
  579. ((A_STH, A_STHU), (A_STHX, A_STHUX)),
  580. ((A_STW, A_STWU), (A_STWX, A_STWUX)),
  581. ((A_STD, A_STDU), (A_STDX, A_STDUX))
  582. );
  583. var
  584. op: TAsmOp;
  585. ref2: TReference;
  586. begin
  587. ref2 := ref;
  588. fixref(list, ref2, tosize);
  589. if tosize in [OS_S8..OS_S64] then
  590. { storing is the same for signed and unsigned values }
  591. tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8)));
  592. op := storeinstr[tcgsize2unsigned[tosize], ref2.index <> NR_NO, false];
  593. a_load_store(list, op, reg, ref2);
  594. end;
  595. procedure tcgppc.a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize;
  596. const ref: treference; reg: tregister);
  597. const
  598. LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
  599. { indexed? updating? }
  600. (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  601. ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
  602. ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
  603. ((A_LD, A_LDU), (A_LDX, A_LDUX)),
  604. { 128bit stuff too }
  605. ((A_NONE, A_NONE), (A_NONE, A_NONE)),
  606. { there's no load-byte-with-sign-extend :( }
  607. ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
  608. ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
  609. { there's no load-word-arithmetic-indexed with update, simulate it in code :( }
  610. ((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
  611. ((A_LD, A_LDU), (A_LDX, A_LDUX))
  612. );
  613. var
  614. op: tasmop;
  615. ref2: treference;
  616. begin
  617. if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
  618. internalerror(2002090902);
  619. ref2 := ref;
  620. fixref(list, ref2, tosize);
  621. { the caller is expected to have adjusted the reference already
  622. in this case }
  623. if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
  624. fromsize := tosize;
  625. op := loadinstr[fromsize, ref2.index <> NR_NO, false];
  626. { there is no LWAU instruction, simulate using ADDI and LWA }
  627. if (op = A_NOP) then begin
  628. list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
  629. ref2.offset := 0;
  630. op := A_LWA;
  631. end;
  632. a_load_store(list, op, reg, ref2);
  633. { sign extend shortint if necessary, since there is no
  634. load instruction that does that automatically (JM) }
  635. if fromsize = OS_S8 then
  636. list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
  637. end;
  638. procedure tcgppc.a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize;
  639. reg1, reg2: tregister);
  640. const
  641. movemap : array[OS_8..OS_S128, OS_8..OS_S128] of tasmop = (
  642. { to -> OS_8 OS_16 OS_32 OS_64 OS_128 OS_S8 OS_S16 OS_S32 OS_S64 OS_S128 }
  643. { from }
  644. { OS_8 } (A_MR, A_RLDICL, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  645. { OS_16 } (A_RLDICL, A_MR, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  646. { OS_32 } (A_RLDICL, A_RLDICL, A_MR, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  647. { OS_64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ),
  648. { OS_128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP ),
  649. { OS_S8 } (A_EXTSB, A_EXTSB, A_EXTSB, A_EXTSB, A_NONE, A_MR, A_EXTSB, A_EXTSB, A_EXTSB, A_NOP ),
  650. { OS_S16 } (A_RLDICL, A_EXTSH, A_EXTSH, A_EXTSH, A_NONE, A_EXTSB, A_MR, A_EXTSH, A_EXTSH, A_NOP ),
  651. { OS_S32 } (A_RLDICL, A_RLDICL, A_EXTSW, A_EXTSW, A_NONE, A_EXTSB, A_EXTSH, A_MR, A_EXTSW, A_NOP ),
  652. { OS_S64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_EXTSB, A_EXTSH, A_EXTSW, A_MR, A_NOP ),
  653. { OS_S128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP )
  654. );
  655. var
  656. instr: taicpu;
  657. op : tasmop;
  658. begin
  659. op := movemap[fromsize, tosize];
  660. case op of
  661. A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
  662. A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
  663. else
  664. internalerror(2002090901);
  665. end;
  666. list.concat(instr);
  667. rg[R_INTREGISTER].add_move_instruction(instr);
  668. end;
  669. procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize;
  670. reg1, reg2: tregister);
  671. var
  672. instr: taicpu;
  673. begin
  674. instr := taicpu.op_reg_reg(A_FMR, reg2, reg1);
  675. list.concat(instr);
  676. rg[R_FPUREGISTER].add_move_instruction(instr);
  677. end;
  678. procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize;
  679. const ref: treference; reg: tregister);
  680. const
  681. FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  682. { indexed? updating?}
  683. (((A_LFS, A_LFSU), (A_LFSX, A_LFSUX)),
  684. ((A_LFD, A_LFDU), (A_LFDX, A_LFDUX)));
  685. var
  686. op: tasmop;
  687. ref2: treference;
  688. begin
  689. { several functions call this procedure with OS_32 or OS_64
  690. so this makes life easier (FK) }
  691. case size of
  692. OS_32, OS_F32:
  693. size := OS_F32;
  694. OS_64, OS_F64, OS_C64:
  695. size := OS_F64;
  696. else
  697. internalerror(200201121);
  698. end;
  699. ref2 := ref;
  700. fixref(list, ref2, size);
  701. op := fpuloadinstr[size, ref2.index <> NR_NO, false];
  702. a_load_store(list, op, reg, ref2);
  703. end;
  704. procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
  705. tregister; const ref: treference);
  706. const
  707. FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
  708. { indexed? updating? }
  709. (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
  710. ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
  711. var
  712. op: tasmop;
  713. ref2: treference;
  714. begin
  715. if not (size in [OS_F32, OS_F64]) then
  716. internalerror(200201122);
  717. ref2 := ref;
  718. fixref(list, ref2, size);
  719. op := fpustoreinstr[size, ref2.index <> NR_NO, false];
  720. a_load_store(list, op, reg, ref2);
  721. end;
  722. procedure tcgppc.a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
  723. aint; reg: TRegister);
  724. begin
  725. a_op_const_reg_reg(list, op, size, a, reg, reg);
  726. end;
  727. procedure tcgppc.a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src,
  728. dst: TRegister);
  729. begin
  730. a_op_reg_reg_reg(list, op, size, src, dst, dst);
  731. end;
  732. procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
  733. size: tcgsize; a: aint; src, dst: tregister);
  734. var
  735. useReg : boolean;
  736. procedure do_lo_hi(loOp, hiOp : TAsmOp);
  737. begin
  738. { Optimization for logical ops (excluding AND), trying to do this as efficiently
  739. as possible by only generating code for the affected halfwords. Note that all
  740. the instructions handled here must have "X op 0 = X" for every halfword. }
  741. usereg := false;
  742. if (aword(a) > high(dword)) then begin
  743. usereg := true;
  744. end else begin
  745. if (word(a) <> 0) then begin
  746. list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
  747. if (word(a shr 16) <> 0) then
  748. list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
  749. end else if (word(a shr 16) <> 0) then
  750. list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
  751. end;
  752. end;
  753. procedure do_lo_hi_and;
  754. begin
  755. { optimization logical and with immediate: only use "andi." for 16 bit
  756. ands, otherwise use register method. Doing this for 32 bit constants
  757. would not give any advantage to the register method (via useReg := true),
  758. requiring a scratch register and three instructions. }
  759. usereg := false;
  760. if (aword(a) > high(word)) then
  761. usereg := true
  762. else
  763. list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
  764. end;
  765. procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister;
  766. signed : boolean);
  767. const
  768. negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
  769. var
  770. magic, shift : int64;
  771. u_magic : qword;
  772. u_shift : byte;
  773. u_add : boolean;
  774. power : byte;
  775. isNegPower : boolean;
  776. divreg : tregister;
  777. begin
  778. if (a = 0) then begin
  779. internalerror(2005061701);
  780. end else if (a = 1) then begin
  781. cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst);
  782. end else if (a = -1) then begin
  783. { note: only in the signed case possible..., may overflow }
  784. exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
  785. end else if (ispowerof2(a, power, isNegPower)) then begin
  786. if (signed) then begin
  787. { From "The PowerPC Compiler Writer's Guide", pg. 52ff }
  788. cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
  789. src, dst);
  790. exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
  791. if (isNegPower) then
  792. exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  793. end else begin
  794. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst)
  795. end;
  796. end else begin
  797. { replace division by multiplication, both implementations }
  798. { from "The PowerPC Compiler Writer's Guide" pg. 53ff }
  799. divreg := cg.getintregister(exprasmlist, OS_INT);
  800. if (signed) then begin
  801. getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
  802. { load magic value }
  803. cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
  804. { multiply }
  805. exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
  806. { add/subtract numerator }
  807. if (a > 0) and (magic < 0) then begin
  808. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst);
  809. end else if (a < 0) and (magic > 0) then begin
  810. cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst);
  811. end;
  812. { shift shift places to the right (arithmetic) }
  813. cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst);
  814. { extract and add sign bit }
  815. if (a >= 0) then begin
  816. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg);
  817. end else begin
  818. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg);
  819. end;
  820. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst);
  821. end else begin
  822. getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
  823. { load magic in divreg }
  824. cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
  825. exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
  826. if (u_add) then begin
  827. cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg);
  828. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
  829. cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg);
  830. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst);
  831. end else begin
  832. cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst);
  833. end;
  834. end;
  835. end;
  836. end;
  837. var
  838. scratchreg: tregister;
  839. shift : byte;
  840. shiftmask : longint;
  841. isneg : boolean;
  842. begin
  843. { subtraction is the same as addition with negative constant }
  844. if op = OP_SUB then begin
  845. a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
  846. exit;
  847. end;
  848. { This case includes some peephole optimizations for the various operations,
  849. (e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
  850. independent of architecture? }
  851. { assume that we do not need a scratch register for the operation }
  852. useReg := false;
  853. case (op) of
  854. OP_DIV, OP_IDIV:
  855. if (cs_slowoptimize in aktglobalswitches) then
  856. do_constant_div(list, size, a, src, dst, op = OP_IDIV)
  857. else
  858. usereg := true;
  859. OP_IMUL, OP_MUL:
  860. { idea: factorize constant multiplicands and use adds/shifts with few factors;
  861. however, even a 64 bit multiply is already quite fast on PPC64 }
  862. if (a = 0) then
  863. a_load_const_reg(list, size, 0, dst)
  864. else if (a = -1) then
  865. list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
  866. else if (a = 1) then
  867. a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
  868. else if ispowerof2(a, shift, isneg) then begin
  869. list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
  870. if (isneg) then
  871. exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
  872. end else if (a >= low(smallint)) and (a <= high(smallint)) then
  873. list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
  874. smallint(a)))
  875. else
  876. usereg := true;
  877. OP_ADD:
  878. if (a = 0) then
  879. a_load_reg_reg(list, size, size, src, dst)
  880. else if (a >= low(smallint)) and (a <= high(smallint)) then
  881. list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
  882. else
  883. useReg := true;
  884. OP_OR:
  885. if (a = 0) then
  886. a_load_reg_reg(list, size, size, src, dst)
  887. else if (a = -1) then
  888. a_load_const_reg(list, size, -1, dst)
  889. else
  890. do_lo_hi(A_ORI, A_ORIS);
  891. OP_AND:
  892. if (a = 0) then
  893. a_load_const_reg(list, size, 0, dst)
  894. else if (a = -1) then
  895. a_load_reg_reg(list, size, size, src, dst)
  896. else
  897. do_lo_hi_and;
  898. OP_XOR:
  899. if (a = 0) then
  900. a_load_reg_reg(list, size, size, src, dst)
  901. else if (a = -1) then
  902. list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
  903. else
  904. do_lo_hi(A_XORI, A_XORIS);
  905. OP_SHL, OP_SHR, OP_SAR:
  906. begin
  907. if (size in [OS_64, OS_S64]) then
  908. shift := 6
  909. else
  910. shift := 5;
  911. shiftmask := (1 shl shift)-1;
  912. if (a and shiftmask) <> 0 then
  913. list.concat(taicpu.op_reg_reg_const(
  914. TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask))
  915. else
  916. a_load_reg_reg(list, size, size, src, dst);
  917. if ((a shr shift) <> 0) then
  918. internalError(68991);
  919. end
  920. else
  921. internalerror(200109091);
  922. end;
  923. { if all else failed, load the constant in a register and then
  924. perform the operation }
  925. if (useReg) then begin
  926. scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  927. a_load_const_reg(list, size, a, scratchreg);
  928. a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
  929. end;
  930. end;
  931. procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
  932. size: tcgsize; src1, src2, dst: tregister);
  933. const
  934. op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
  935. (A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
  936. A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR);
  937. op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
  938. (A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
  939. A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
  940. begin
  941. case op of
  942. OP_NEG, OP_NOT:
  943. begin
  944. list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
  945. if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
  946. { zero/sign extend result again, fromsize is not important here }
  947. a_load_reg_reg(list, OS_S64, size, dst, dst)
  948. end;
  949. else
  950. if (size in [OS_64, OS_S64]) then begin
  951. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
  952. src1));
  953. end else begin
  954. list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
  955. src1));
  956. end;
  957. end;
  958. end;
  959. {*************** compare instructructions ****************}
  960. procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize;
  961. cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
  962. var
  963. scratch_register: TRegister;
  964. signed: boolean;
  965. begin
  966. signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
  967. { in the following case, we generate more efficient code when }
  968. { signed is true }
  969. if (cmp_op in [OC_EQ, OC_NE]) and
  970. (aword(a) > $FFFF) then
  971. signed := true;
  972. if signed then
  973. if (a >= low(smallint)) and (a <= high(smallint)) then
  974. list.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR0, reg, a))
  975. else begin
  976. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  977. a_load_const_reg(list, OS_64, a, scratch_register);
  978. list.concat(taicpu.op_reg_reg_reg(A_CMPD, NR_CR0, reg, scratch_register));
  979. end
  980. else if (aword(a) <= $FFFF) then
  981. list.concat(taicpu.op_reg_reg_const(A_CMPLDI, NR_CR0, reg, aword(a)))
  982. else begin
  983. scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  984. a_load_const_reg(list, OS_64, a, scratch_register);
  985. list.concat(taicpu.op_reg_reg_reg(A_CMPLD, NR_CR0, reg,
  986. scratch_register));
  987. end;
  988. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  989. end;
  990. procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize;
  991. cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
  992. var
  993. op: tasmop;
  994. begin
  995. if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
  996. if (size in [OS_64, OS_S64]) then
  997. op := A_CMPD
  998. else
  999. op := A_CMPW
  1000. else
  1001. if (size in [OS_64, OS_S64]) then
  1002. op := A_CMPLD
  1003. else
  1004. op := A_CMPLW;
  1005. list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1));
  1006. a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
  1007. end;
  1008. procedure tcgppc.a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel);
  1009. begin
  1010. a_jmp(list, A_BC, TOpCmp2AsmCond[cond], 0, l);
  1011. end;
  1012. procedure tcgppc.a_jmp_name(list: taasmoutput; const s: string);
  1013. var
  1014. p: taicpu;
  1015. begin
  1016. p := taicpu.op_sym(A_B, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
  1017. AT_LABEL));
  1018. p.is_jmp := true;
  1019. list.concat(p)
  1020. end;
  1021. procedure tcgppc.a_jmp_always(list: taasmoutput; l: tasmlabel);
  1022. begin
  1023. a_jmp(list, A_B, C_None, 0, l);
  1024. end;
  1025. procedure tcgppc.a_jmp_flags(list: taasmoutput; const f: TResFlags; l:
  1026. tasmlabel);
  1027. var
  1028. c: tasmcond;
  1029. begin
  1030. c := flags_to_cond(f);
  1031. a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l);
  1032. end;
  1033. procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f:
  1034. TResFlags; reg: TRegister);
  1035. var
  1036. testbit: byte;
  1037. bitvalue: boolean;
  1038. begin
  1039. { get the bit to extract from the conditional register + its requested value (0 or 1) }
  1040. testbit := ((f.cr - RS_CR0) * 4);
  1041. case f.flag of
  1042. F_EQ, F_NE:
  1043. begin
  1044. inc(testbit, 2);
  1045. bitvalue := f.flag = F_EQ;
  1046. end;
  1047. F_LT, F_GE:
  1048. begin
  1049. bitvalue := f.flag = F_LT;
  1050. end;
  1051. F_GT, F_LE:
  1052. begin
  1053. inc(testbit);
  1054. bitvalue := f.flag = F_GT;
  1055. end;
  1056. else
  1057. internalerror(200112261);
  1058. end;
  1059. { load the conditional register in the destination reg }
  1060. list.concat(taicpu.op_reg(A_MFCR, reg));
  1061. { we will move the bit that has to be tested to bit 0 by rotating left }
  1062. testbit := (testbit + 1) and 31;
  1063. { extract bit }
  1064. list.concat(taicpu.op_reg_reg_const_const_const(
  1065. A_RLWINM,reg,reg,testbit,31,31));
  1066. { if we need the inverse, xor with 1 }
  1067. if not bitvalue then
  1068. list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1));
  1069. end;
  1070. { *********** entry/exit code and address loading ************ }
  1071. procedure tcgppc.g_save_standard_registers(list: Taasmoutput);
  1072. begin
  1073. { this work is done in g_proc_entry; additionally it is not safe
  1074. to use it because it is called at some weird time }
  1075. end;
  1076. procedure tcgppc.g_restore_standard_registers(list: Taasmoutput);
  1077. begin
  1078. { this work is done in g_proc_exit; mainly because it is not safe to
  1079. put the register restore code here because it is called at some weird time }
  1080. end;
  1081. procedure tcgppc.calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint);
  1082. var
  1083. reg : TSuperRegister;
  1084. begin
  1085. fprcount := 0;
  1086. firstfpr := RS_F31;
  1087. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1088. for reg := RS_F14 to RS_F31 do
  1089. if reg in rg[R_FPUREGISTER].used_in_proc then begin
  1090. fprcount := ord(RS_F31)-ord(reg)+1;
  1091. firstfpr := reg;
  1092. break;
  1093. end;
  1094. end;
  1095. procedure tcgppc.calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
  1096. var
  1097. reg : TSuperRegister;
  1098. begin
  1099. gprcount := 0;
  1100. firstgpr := RS_R31;
  1101. if not (po_assembler in current_procinfo.procdef.procoptions) then
  1102. for reg := RS_R14 to RS_R31 do
  1103. if reg in rg[R_INTREGISTER].used_in_proc then begin
  1104. gprcount := ord(RS_R31)-ord(reg)+1;
  1105. firstgpr := reg;
  1106. break;
  1107. end;
  1108. end;
  1109. { Generates the entry code of a procedure/function.
  1110. This procedure may be called before, as well as after g_return_from_proc
  1111. is called. localsize is the sum of the size necessary for local variables
  1112. and the maximum possible combined size of ALL the parameters of a procedure
  1113. called by the current one
  1114. IMPORTANT: registers are not to be allocated through the register
  1115. allocator here, because the register colouring has already occured !!
  1116. }
  1117. procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
  1118. nostackframe: boolean);
  1119. var
  1120. firstregfpu, firstreggpr: TSuperRegister;
  1121. needslinkreg: boolean;
  1122. fprcount, gprcount : aint;
  1123. { Save standard registers, both FPR and GPR; does not support VMX/Altivec }
  1124. procedure save_standard_registers;
  1125. var
  1126. regcount : TSuperRegister;
  1127. href : TReference;
  1128. mayNeedLRStore : boolean;
  1129. begin
  1130. { there are two ways to do this: manually, by generating a few "std" instructions,
  1131. or via the restore helper functions. The latter are selected by the -Og switch,
  1132. i.e. "optimize for size" }
  1133. if (cs_littlesize in aktglobalswitches) then begin
  1134. mayNeedLRStore := false;
  1135. if ((fprcount > 0) and (gprcount > 0)) then begin
  1136. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1137. a_call_name_direct(list, '_savegpr1_' + intToStr(32-gprcount), false, false);
  1138. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false);
  1139. end else if (gprcount > 0) then
  1140. a_call_name_direct(list, '_savegpr0_' + intToStr(32-gprcount), false, false)
  1141. else if (fprcount > 0) then
  1142. a_call_name_direct(list, '_savefpr_' + intToStr(32-fprcount), false, false)
  1143. else
  1144. mayNeedLRStore := true;
  1145. end else begin
  1146. { save registers, FPU first, then GPR }
  1147. reference_reset_base(href, NR_STACK_POINTER_REG, -8);
  1148. if (fprcount > 0) then
  1149. for regcount := RS_F31 downto firstregfpu do begin
  1150. a_loadfpu_reg_ref(list, OS_FLOAT, newreg(R_FPUREGISTER, regcount,
  1151. R_SUBNONE), href);
  1152. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1153. end;
  1154. if (gprcount > 0) then
  1155. for regcount := RS_R31 downto firstreggpr do begin
  1156. a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount,
  1157. R_SUBNONE), href);
  1158. dec(href.offset, tcgsize2size[OS_INT]);
  1159. end;
  1160. { VMX registers not supported by FPC atm }
  1161. { in this branch we may always need to store LR ourselves}
  1162. mayNeedLRStore := true;
  1163. end;
  1164. { we may need to store R0 (=LR) ourselves }
  1165. if (mayNeedLRStore) and (needslinkreg) then begin
  1166. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1167. list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
  1168. end;
  1169. end;
  1170. var
  1171. href: treference;
  1172. begin
  1173. calcFirstUsedFPR(firstregfpu, fprcount);
  1174. calcFirstUsedGPR(firstreggpr, gprcount);
  1175. { calculate real stack frame size }
  1176. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1177. gprcount, fprcount);
  1178. { determine whether we need to save the link register }
  1179. needslinkreg :=
  1180. ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)) or
  1181. ((cs_littlesize in aktglobalswitches) and ((fprcount > 0) or (gprcount > 0)));
  1182. a_reg_alloc(list, NR_STACK_POINTER_REG);
  1183. a_reg_alloc(list, NR_R0);
  1184. { move link register to r0 }
  1185. if (needslinkreg) then
  1186. list.concat(taicpu.op_reg(A_MFLR, NR_R0));
  1187. save_standard_registers;
  1188. { save old stack frame pointer }
  1189. if (localsize > 0) then begin
  1190. a_reg_alloc(list, NR_OLD_STACK_POINTER_REG);
  1191. list.concat(taicpu.op_reg_reg(A_MR, NR_OLD_STACK_POINTER_REG, NR_STACK_POINTER_REG));
  1192. end;
  1193. { create stack frame }
  1194. if (not nostackframe) and (localsize > 0) then begin
  1195. if (localsize <= high(smallint)) then begin
  1196. reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
  1197. a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href);
  1198. end else begin
  1199. reference_reset_base(href, NR_NO, -localsize);
  1200. { Use R0 for loading the constant (which is definitely > 32k when entering
  1201. this branch).
  1202. Inlined at this position because it must not use temp registers because
  1203. register allocations have already been done }
  1204. { Code template:
  1205. lis r0,ofs@highest
  1206. ori r0,r0,ofs@higher
  1207. sldi r0,r0,32
  1208. oris r0,r0,ofs@h
  1209. ori r0,r0,ofs@l
  1210. }
  1211. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1212. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1213. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1214. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1215. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1216. list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
  1217. end;
  1218. end;
  1219. { CR register not used by FPC atm }
  1220. { keep R1 allocated??? }
  1221. a_reg_dealloc(list, NR_R0);
  1222. end;
  1223. { Generates the exit code for a method.
  1224. This procedure may be called before, as well as after g_stackframe_entry
  1225. is called.
  1226. IMPORTANT: registers are not to be allocated through the register
  1227. allocator here, because the register colouring has already occured !!
  1228. }
  1229. procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
  1230. boolean);
  1231. var
  1232. firstregfpu, firstreggpr: TSuperRegister;
  1233. needslinkreg : boolean;
  1234. fprcount, gprcount: aint;
  1235. { Restore standard registers, both FPR and GPR; does not support VMX/Altivec }
  1236. procedure restore_standard_registers;
  1237. var
  1238. { flag indicating whether we need to manually add the exit code (e.g. blr instruction)
  1239. or not }
  1240. needsExitCode : Boolean;
  1241. href : treference;
  1242. regcount : TSuperRegister;
  1243. begin
  1244. { there are two ways to do this: manually, by generating a few "ld" instructions,
  1245. or via the restore helper functions. The latter are selected by the -Og switch,
  1246. i.e. "optimize for size" }
  1247. if (cs_littlesize in aktglobalswitches) then begin
  1248. needsExitCode := false;
  1249. if ((fprcount > 0) and (gprcount > 0)) then begin
  1250. a_op_const_reg_reg(list, OP_SUB, OS_INT, 8 * fprcount, NR_R1, NR_R12);
  1251. a_call_name_direct(list, '_restgpr1_' + intToStr(32-gprcount), false, false);
  1252. a_jmp_name(list, '_restfpr_' + intToStr(32-fprcount));
  1253. end else if (gprcount > 0) then
  1254. a_jmp_name(list, '_restgpr0_' + intToStr(32-gprcount))
  1255. else if (fprcount > 0) then
  1256. a_jmp_name(list, '_restfpr_' + intToStr(32-fprcount))
  1257. else
  1258. needsExitCode := true;
  1259. end else begin
  1260. needsExitCode := true;
  1261. { restore registers, FPU first, GPR next }
  1262. reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
  1263. if (fprcount > 0) then
  1264. for regcount := RS_F31 downto firstregfpu do begin
  1265. a_loadfpu_ref_reg(list, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount,
  1266. R_SUBNONE));
  1267. dec(href.offset, tcgsize2size[OS_FLOAT]);
  1268. end;
  1269. if (gprcount > 0) then
  1270. for regcount := RS_R31 downto firstreggpr do begin
  1271. a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount,
  1272. R_SUBNONE));
  1273. dec(href.offset, tcgsize2size[OS_INT]);
  1274. end;
  1275. { VMX not supported by FPC atm }
  1276. end;
  1277. if (needsExitCode) then begin
  1278. { restore LR (if needed) }
  1279. if (needslinkreg) then begin
  1280. reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
  1281. list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
  1282. list.concat(taicpu.op_reg(A_MTLR, NR_R0));
  1283. end;
  1284. { generate return instruction }
  1285. list.concat(taicpu.op_none(A_BLR));
  1286. end;
  1287. end;
  1288. var
  1289. href: treference;
  1290. localsize : aint;
  1291. begin
  1292. calcFirstUsedFPR(firstregfpu, fprcount);
  1293. calcFirstUsedGPR(firstreggpr, gprcount);
  1294. { determine whether we need to restore the link register }
  1295. needslinkreg :=
  1296. ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)) or
  1297. ((cs_littlesize in aktglobalswitches) and ((fprcount > 0) or (gprcount > 0)));
  1298. { calculate stack frame }
  1299. localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
  1300. gprcount, fprcount);
  1301. { CR register not supported }
  1302. { restore stack pointer }
  1303. if (not nostackframe) and (localsize > 0) then begin
  1304. if (localsize <= high(smallint)) then begin
  1305. list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
  1306. end else begin
  1307. reference_reset_base(href, NR_NO, localsize);
  1308. { use R0 for loading the constant (which is definitely > 32k when entering
  1309. this branch)
  1310. Inlined because it must not use temp registers because register allocations
  1311. have already been done
  1312. }
  1313. { Code template:
  1314. lis r0,ofs@highest
  1315. ori r0,ofs@higher
  1316. sldi r0,r0,32
  1317. oris r0,r0,ofs@h
  1318. ori r0,r0,ofs@l
  1319. }
  1320. list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48)));
  1321. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32)));
  1322. list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32));
  1323. list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16)));
  1324. list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset)));
  1325. list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0));
  1326. end;
  1327. end;
  1328. restore_standard_registers;
  1329. end;
  1330. procedure tcgppc.a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
  1331. tregister);
  1332. var
  1333. ref2, tmpref: treference;
  1334. { register used to construct address }
  1335. tempreg : TRegister;
  1336. begin
  1337. ref2 := ref;
  1338. fixref(list, ref2, OS_64);
  1339. { load a symbol }
  1340. if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
  1341. { add the symbol's value to the base of the reference, and if the }
  1342. { reference doesn't have a base, create one }
  1343. reference_reset(tmpref);
  1344. tmpref.offset := ref2.offset;
  1345. tmpref.symbol := ref2.symbol;
  1346. tmpref.relsymbol := ref2.relsymbol;
  1347. { load 64 bit reference into r. If the reference already has a base register,
  1348. first load the 64 bit value into a temp register, then add it to the result
  1349. register rD }
  1350. if (ref2.base <> NR_NO) then begin
  1351. { already have a base register, so allocate a new one }
  1352. tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1353. end else begin
  1354. tempreg := r;
  1355. end;
  1356. { code for loading a reference from a symbol into a register rD }
  1357. (*
  1358. lis rX,SYM@highest
  1359. ori rX,SYM@higher
  1360. sldi rX,rX,32
  1361. oris rX,rX,SYM@h
  1362. ori rX,rX,SYM@l
  1363. *)
  1364. tmpref.refaddr := addr_highest;
  1365. list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref));
  1366. tmpref.refaddr := addr_higher;
  1367. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1368. list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32));
  1369. tmpref.refaddr := addr_high;
  1370. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref));
  1371. tmpref.refaddr := addr_low;
  1372. list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
  1373. { if there's already a base register, add the temp register contents to
  1374. the base register }
  1375. if (ref2.base <> NR_NO) then begin
  1376. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
  1377. end;
  1378. end else if ref2.offset <> 0 then begin
  1379. { no symbol, but offset <> 0 }
  1380. if ref2.base <> NR_NO then begin
  1381. a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
  1382. { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
  1383. occurs, so now only ref.offset has to be loaded }
  1384. end else begin
  1385. a_load_const_reg(list, OS_64, ref2.offset, r)
  1386. end;
  1387. end else if ref.index <> NR_NO then
  1388. list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index))
  1389. else if (ref2.base <> NR_NO) and
  1390. (r <> ref2.base) then
  1391. a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r)
  1392. else begin
  1393. list.concat(taicpu.op_reg_const(A_LI, r, 0));
  1394. end;
  1395. end;
  1396. { ************* concatcopy ************ }
  1397. const
  1398. maxmoveunit = 8;
  1399. procedure tcgppc.g_concatcopy(list: taasmoutput; const source, dest: treference;
  1400. len: aint);
  1401. var
  1402. countreg, tempreg: TRegister;
  1403. src, dst: TReference;
  1404. lab: tasmlabel;
  1405. count, count2: longint;
  1406. size: tcgsize;
  1407. begin
  1408. {$IFDEF extdebug}
  1409. if len > high(aint) then
  1410. internalerror(2002072704);
  1411. list.concat(tai_comment.create(strpnew('g_concatcopy')));
  1412. {$ENDIF extdebug}
  1413. { make sure short loads are handled as optimally as possible;
  1414. note that the data here never overlaps, so we can do a forward
  1415. copy at all times.
  1416. NOTE: maybe use some scratch registers to pair load/store instructions
  1417. }
  1418. if (len <= maxmoveunit) then begin
  1419. src := source; dst := dest;
  1420. while (len <> 0) do begin
  1421. if (len = 8) then begin
  1422. a_load_ref_ref(list, OS_64, OS_64, src, dst);
  1423. dec(len, 8);
  1424. end else if (len >= 4) then begin
  1425. a_load_ref_ref(list, OS_32, OS_32, src, dst);
  1426. inc(src.offset, 4); inc(dst.offset, 4);
  1427. dec(len, 4);
  1428. end else if (len >= 2) then begin
  1429. a_load_ref_ref(list, OS_16, OS_16, src, dst);
  1430. inc(src.offset, 2); inc(dst.offset, 2);
  1431. dec(len, 2);
  1432. end else begin
  1433. a_load_ref_ref(list, OS_8, OS_8, src, dst);
  1434. inc(src.offset, 1); inc(dst.offset, 1);
  1435. dec(len, 1);
  1436. end;
  1437. end;
  1438. exit;
  1439. end;
  1440. count := len div maxmoveunit;
  1441. reference_reset(src);
  1442. reference_reset(dst);
  1443. { load the address of source into src.base }
  1444. if (count > 4) or
  1445. not issimpleref(source) or
  1446. ((source.index <> NR_NO) and
  1447. ((source.offset + len) > high(smallint))) then begin
  1448. src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1449. a_loadaddr_ref_reg(list, source, src.base);
  1450. end else begin
  1451. src := source;
  1452. end;
  1453. { load the address of dest into dst.base }
  1454. if (count > 4) or
  1455. not issimpleref(dest) or
  1456. ((dest.index <> NR_NO) and
  1457. ((dest.offset + len) > high(smallint))) then begin
  1458. dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1459. a_loadaddr_ref_reg(list, dest, dst.base);
  1460. end else begin
  1461. dst := dest;
  1462. end;
  1463. { generate a loop }
  1464. if count > 4 then begin
  1465. { the offsets are zero after the a_loadaddress_ref_reg and just
  1466. have to be set to 8. I put an Inc there so debugging may be
  1467. easier (should offset be different from zero here, it will be
  1468. easy to notice in the generated assembler }
  1469. inc(dst.offset, 8);
  1470. inc(src.offset, 8);
  1471. list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
  1472. list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
  1473. countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1474. a_load_const_reg(list, OS_64, count, countreg);
  1475. { explicitely allocate F0 since it can be used safely here
  1476. (for holding date that's being copied) }
  1477. a_reg_alloc(list, NR_F0);
  1478. objectlibrary.getjumplabel(lab);
  1479. a_label(list, lab);
  1480. list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
  1481. list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src));
  1482. list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst));
  1483. a_jmp(list, A_BC, C_NE, 0, lab);
  1484. a_reg_dealloc(list, NR_F0);
  1485. len := len mod 8;
  1486. end;
  1487. count := len div 8;
  1488. { unrolled loop }
  1489. if count > 0 then begin
  1490. a_reg_alloc(list, NR_F0);
  1491. for count2 := 1 to count do begin
  1492. a_loadfpu_ref_reg(list, OS_F64, src, NR_F0);
  1493. a_loadfpu_reg_ref(list, OS_F64, NR_F0, dst);
  1494. inc(src.offset, 8);
  1495. inc(dst.offset, 8);
  1496. end;
  1497. a_reg_dealloc(list, NR_F0);
  1498. len := len mod 8;
  1499. end;
  1500. if (len and 4) <> 0 then begin
  1501. a_reg_alloc(list, NR_R0);
  1502. a_load_ref_reg(list, OS_32, OS_32, src, NR_R0);
  1503. a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst);
  1504. inc(src.offset, 4);
  1505. inc(dst.offset, 4);
  1506. a_reg_dealloc(list, NR_R0);
  1507. end;
  1508. { copy the leftovers }
  1509. if (len and 2) <> 0 then begin
  1510. a_reg_alloc(list, NR_R0);
  1511. a_load_ref_reg(list, OS_16, OS_16, src, NR_R0);
  1512. a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst);
  1513. inc(src.offset, 2);
  1514. inc(dst.offset, 2);
  1515. a_reg_dealloc(list, NR_R0);
  1516. end;
  1517. if (len and 1) <> 0 then begin
  1518. a_reg_alloc(list, NR_R0);
  1519. a_load_ref_reg(list, OS_8, OS_8, src, NR_R0);
  1520. a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst);
  1521. a_reg_dealloc(list, NR_R0);
  1522. end;
  1523. end;
  1524. procedure tcgppc.g_overflowcheck(list: taasmoutput; const l: tlocation; def:
  1525. tdef);
  1526. var
  1527. hl: tasmlabel;
  1528. flags : TResFlags;
  1529. begin
  1530. if not (cs_check_overflow in aktlocalswitches) then
  1531. exit;
  1532. objectlibrary.getjumplabel(hl);
  1533. if not ((def.deftype = pointerdef) or
  1534. ((def.deftype = orddef) and
  1535. (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
  1536. bool8bit, bool16bit, bool32bit]))) then
  1537. begin
  1538. { ... instructions setting overflow flag ...
  1539. mfxerf R0
  1540. mtcrf 128, R0
  1541. ble cr0, label }
  1542. list.concat(taicpu.op_reg(A_MFXER, NR_R0));
  1543. list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
  1544. flags.cr := RS_CR0;
  1545. flags.flag := F_LE;
  1546. a_jmp_flags(list, flags, hl);
  1547. end else
  1548. a_jmp_cond(list, OC_AE, hl);
  1549. a_call_name(list, 'FPC_OVERFLOW');
  1550. a_label(list, hl);
  1551. end;
  1552. procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
  1553. labelname: string; ioffset: longint);
  1554. procedure loadvmttor11;
  1555. var
  1556. href: treference;
  1557. begin
  1558. reference_reset_base(href, NR_R3, 0);
  1559. cg.a_load_ref_reg(list, OS_ADDR, OS_ADDR, href, NR_R11);
  1560. end;
  1561. procedure op_onr11methodaddr;
  1562. var
  1563. href: treference;
  1564. begin
  1565. if (procdef.extnumber = $FFFF) then
  1566. Internalerror(200006139);
  1567. { call/jmp vmtoffs(%eax) ; method offs }
  1568. reference_reset_base(href, NR_R11,
  1569. procdef._class.vmtmethodoffset(procdef.extnumber));
  1570. if not (hasLargeOffset(href)) then begin
  1571. list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
  1572. smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
  1573. 0))));
  1574. href.offset := smallint(href.offset and $FFFF);
  1575. end else
  1576. { add support for offsets > 16 bit }
  1577. internalerror(200510201);
  1578. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1579. { the loaded reference is a function descriptor reference, so deref again
  1580. (at ofs 0 there's the real pointer) }
  1581. {$warning ts:TODO: update GOT reference}
  1582. reference_reset_base(href, NR_R11, 0);
  1583. list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
  1584. list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
  1585. list.concat(taicpu.op_none(A_BCTR));
  1586. { NOP needed for the linker...? }
  1587. list.concat(taicpu.op_none(A_NOP));
  1588. end;
  1589. var
  1590. make_global: boolean;
  1591. begin
  1592. if (not (procdef.proctypeoption in [potype_function, potype_procedure])) then
  1593. Internalerror(200006137);
  1594. if not assigned(procdef._class) or
  1595. (procdef.procoptions * [po_classmethod, po_staticmethod,
  1596. po_methodpointer, po_interrupt, po_iocheck] <> []) then
  1597. Internalerror(200006138);
  1598. if procdef.owner.symtabletype <> objectsymtable then
  1599. Internalerror(200109191);
  1600. make_global := false;
  1601. if (not current_module.is_unit) or
  1602. (cs_create_smart in aktmoduleswitches) or
  1603. (procdef.owner.defowner.owner.symtabletype = globalsymtable) then
  1604. make_global := true;
  1605. if make_global then
  1606. List.concat(Tai_symbol.Createname_global(labelname, AT_FUNCTION, 0))
  1607. else
  1608. List.concat(Tai_symbol.Createname(labelname, AT_FUNCTION, 0));
  1609. { set param1 interface to self }
  1610. g_adjust_self_value(list, procdef, ioffset);
  1611. if po_virtualmethod in procdef.procoptions then begin
  1612. loadvmttor11;
  1613. op_onr11methodaddr;
  1614. end else
  1615. {$note ts:todo add GOT change?? - think not needed :) }
  1616. list.concat(taicpu.op_sym(A_B,
  1617. objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,
  1618. AT_FUNCTION)));
  1619. List.concat(Tai_symbol_end.Createname(labelname));
  1620. end;
  1621. {***************** This is private property, keep out! :) *****************}
  1622. function tcgppc.issimpleref(const ref: treference): boolean;
  1623. begin
  1624. if (ref.base = NR_NO) and
  1625. (ref.index <> NR_NO) then
  1626. internalerror(200208101);
  1627. result :=
  1628. not (assigned(ref.symbol)) and
  1629. (((ref.index = NR_NO) and
  1630. (ref.offset >= low(smallint)) and
  1631. (ref.offset <= high(smallint))) or
  1632. ((ref.index <> NR_NO) and
  1633. (ref.offset = 0)));
  1634. end;
  1635. function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
  1636. var
  1637. tmpreg: tregister;
  1638. needsAlign : boolean;
  1639. begin
  1640. result := false;
  1641. needsAlign := size in [OS_S32, OS_64, OS_S64];
  1642. if (ref.base = NR_NO) then begin
  1643. ref.base := ref.index;
  1644. ref.index := NR_NO;
  1645. end;
  1646. if (ref.base <> NR_NO) and (ref.index <> NR_NO) and
  1647. ((ref.offset <> 0) or assigned(ref.symbol)) then begin
  1648. result := true;
  1649. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1650. a_op_reg_reg_reg(list, OP_ADD, size, ref.base, ref.index, tmpreg);
  1651. ref.index := NR_NO;
  1652. ref.base := tmpreg;
  1653. end;
  1654. end;
  1655. procedure tcgppc.a_load_store(list: taasmoutput; op: tasmop; reg: tregister;
  1656. ref: treference);
  1657. var
  1658. tmpreg, tmpreg2: tregister;
  1659. tmpref: treference;
  1660. largeOffset: Boolean;
  1661. begin
  1662. { at this point there must not be a combination of values in the ref treference
  1663. which is not possible to directly map to instructions of the PowerPC architecture }
  1664. if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
  1665. internalerror(200310131);
  1666. { for some instructions we need to check that the offset is divisible by at
  1667. least four. If not, add the bytes which are "off" to the base register and
  1668. adjust the offset accordingly }
  1669. case op of
  1670. A_LD, A_LDU, A_STD, A_STDU, A_LWA :
  1671. if ((ref.offset mod 4) <> 0) then begin
  1672. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1673. if (ref.base <> NR_NO) then begin
  1674. a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
  1675. ref.base := tmpreg;
  1676. end else begin
  1677. list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
  1678. ref.base := tmpreg;
  1679. end;
  1680. ref.offset := (ref.offset div 4) * 4;
  1681. end;
  1682. end;
  1683. { if we have to load/store from a symbol or large addresses, use a temporary register
  1684. containing the address }
  1685. if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
  1686. tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1687. if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
  1688. ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1689. a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
  1690. ref.offset := 0;
  1691. end;
  1692. reference_reset(tmpref);
  1693. tmpref.symbol := ref.symbol;
  1694. tmpref.relsymbol := ref.relsymbol;
  1695. tmpref.offset := ref.offset;
  1696. if (ref.base <> NR_NO) then begin
  1697. { As long as the TOC isn't working we try to achieve highest speed (in this
  1698. case by allowing instructions execute in parallel) as possible at the cost
  1699. of using another temporary register. So the code template when there is
  1700. a base register and an offset is the following:
  1701. lis rT1, SYM+offs@highest
  1702. ori rT1, rT1, SYM+offs@higher
  1703. lis rT2, SYM+offs@hi
  1704. ori rT2, SYM+offs@lo
  1705. rldimi rT2, rT1, 32
  1706. <op>X reg, base, rT2
  1707. }
  1708. tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
  1709. tmpref.refaddr := addr_highest;
  1710. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1711. tmpref.refaddr := addr_higher;
  1712. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1713. tmpref.refaddr := addr_high;
  1714. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
  1715. tmpref.refaddr := addr_low;
  1716. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
  1717. list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
  1718. reference_reset(tmpref);
  1719. tmpref.base := ref.base;
  1720. tmpref.index := tmpreg2;
  1721. case op of
  1722. { the code generator doesn't generate update instructions anyway, so
  1723. error out on those instructions }
  1724. A_LBZ : op := A_LBZX;
  1725. A_LHZ : op := A_LHZX;
  1726. A_LWZ : op := A_LWZX;
  1727. A_LD : op := A_LDX;
  1728. A_LHA : op := A_LHAX;
  1729. A_LWA : op := A_LWAX;
  1730. A_LFS : op := A_LFSX;
  1731. A_LFD : op := A_LFDX;
  1732. A_STB : op := A_STBX;
  1733. A_STH : op := A_STHX;
  1734. A_STW : op := A_STWX;
  1735. A_STD : op := A_STDX;
  1736. A_STFS : op := A_STFSX;
  1737. A_STFD : op := A_STFDX;
  1738. else
  1739. { unknown load/store opcode }
  1740. internalerror(2005101302);
  1741. end;
  1742. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1743. end else begin
  1744. { when accessing value from a reference without a base register, use the
  1745. following code template:
  1746. lis rT,SYM+offs@highesta
  1747. ori rT,SYM+offs@highera
  1748. sldi rT,rT,32
  1749. oris rT,rT,SYM+offs@ha
  1750. ld rD,SYM+offs@l(rT)
  1751. }
  1752. tmpref.refaddr := addr_highesta;
  1753. list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
  1754. tmpref.refaddr := addr_highera;
  1755. list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
  1756. list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32));
  1757. tmpref.refaddr := addr_higha;
  1758. list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
  1759. tmpref.base := tmpreg;
  1760. tmpref.refaddr := addr_low;
  1761. list.concat(taicpu.op_reg_ref(op, reg, tmpref));
  1762. end;
  1763. end else begin
  1764. list.concat(taicpu.op_reg_ref(op, reg, ref));
  1765. end;
  1766. end;
  1767. procedure tcgppc.a_jmp(list: taasmoutput; op: tasmop; c: tasmcondflag;
  1768. crval: longint; l: tasmlabel);
  1769. var
  1770. p: taicpu;
  1771. begin
  1772. p := taicpu.op_sym(op, objectlibrary.newasmsymbol(l.name, AB_EXTERNAL,
  1773. AT_LABEL));
  1774. if op <> A_B then
  1775. create_cond_norm(c, crval, p.condition);
  1776. p.is_jmp := true;
  1777. list.concat(p)
  1778. end;
  1779. function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
  1780. begin
  1781. { this rather strange calculation is required because offsets of TReferences are unsigned }
  1782. result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
  1783. end;
  1784. begin
  1785. cg := tcgppc.create;
  1786. end.